mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-22 17:50:21 +02:00
Remove DocumentSource Enum from postgres (#1217)
This commit is contained in:
parent
2a8e53c94f
commit
4036e7c6c6
38
backend/alembic/versions/e50154680a5c_no_source_enum.py
Normal file
38
backend/alembic/versions/e50154680a5c_no_source_enum.py
Normal file
@ -0,0 +1,38 @@
|
||||
"""No Source Enum
|
||||
|
||||
Revision ID: e50154680a5c
|
||||
Revises: fcd135795f21
|
||||
Create Date: 2024-03-14 18:06:08.523106
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
from danswer.configs.constants import DocumentSource
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "e50154680a5c"
|
||||
down_revision = "fcd135795f21"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.alter_column(
|
||||
"search_doc",
|
||||
"source_type",
|
||||
type_=sa.String(length=50),
|
||||
existing_type=sa.Enum(DocumentSource, native_enum=False),
|
||||
existing_nullable=False,
|
||||
)
|
||||
op.execute("DROP TYPE IF EXISTS documentsource")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.alter_column(
|
||||
"search_doc",
|
||||
"source_type",
|
||||
type_=sa.Enum(DocumentSource, native_enum=False),
|
||||
existing_type=sa.String(length=50),
|
||||
existing_nullable=False,
|
||||
)
|
@ -27,6 +27,7 @@ from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
@ -100,13 +101,18 @@ def extract_urls_from_sitemap(sitemap_url: str) -> list[str]:
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
return [_ensure_absolute_url(sitemap_url, loc_tag.text) for loc_tag in soup.find_all("loc")]
|
||||
return [
|
||||
_ensure_absolute_url(sitemap_url, loc_tag.text)
|
||||
for loc_tag in soup.find_all("loc")
|
||||
]
|
||||
|
||||
def _ensure_absolute_url(source_url:str, maybe_relative_url: str) -> str:
|
||||
|
||||
def _ensure_absolute_url(source_url: str, maybe_relative_url: str) -> str:
|
||||
if not urlparse(maybe_relative_url).netloc:
|
||||
return urljoin(source_url, maybe_relative_url)
|
||||
return maybe_relative_url
|
||||
|
||||
|
||||
def _ensure_valid_url(url: str) -> str:
|
||||
if "://" not in url:
|
||||
return "https://" + url
|
||||
|
Loading…
x
Reference in New Issue
Block a user