mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-24 18:50:06 +02:00
Remove DocumentSource Enum from postgres (#1217)
This commit is contained in:
parent
2a8e53c94f
commit
4036e7c6c6
38
backend/alembic/versions/e50154680a5c_no_source_enum.py
Normal file
38
backend/alembic/versions/e50154680a5c_no_source_enum.py
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
"""No Source Enum
|
||||||
|
|
||||||
|
Revision ID: e50154680a5c
|
||||||
|
Revises: fcd135795f21
|
||||||
|
Create Date: 2024-03-14 18:06:08.523106
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
from danswer.configs.constants import DocumentSource
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = "e50154680a5c"
|
||||||
|
down_revision = "fcd135795f21"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.alter_column(
|
||||||
|
"search_doc",
|
||||||
|
"source_type",
|
||||||
|
type_=sa.String(length=50),
|
||||||
|
existing_type=sa.Enum(DocumentSource, native_enum=False),
|
||||||
|
existing_nullable=False,
|
||||||
|
)
|
||||||
|
op.execute("DROP TYPE IF EXISTS documentsource")
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.alter_column(
|
||||||
|
"search_doc",
|
||||||
|
"source_type",
|
||||||
|
type_=sa.Enum(DocumentSource, native_enum=False),
|
||||||
|
existing_type=sa.String(length=50),
|
||||||
|
existing_nullable=False,
|
||||||
|
)
|
@ -27,6 +27,7 @@ from danswer.connectors.models import Document
|
|||||||
from danswer.connectors.models import Section
|
from danswer.connectors.models import Section
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
|
|
||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
@ -100,13 +101,18 @@ def extract_urls_from_sitemap(sitemap_url: str) -> list[str]:
|
|||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
soup = BeautifulSoup(response.content, "html.parser")
|
soup = BeautifulSoup(response.content, "html.parser")
|
||||||
return [_ensure_absolute_url(sitemap_url, loc_tag.text) for loc_tag in soup.find_all("loc")]
|
return [
|
||||||
|
_ensure_absolute_url(sitemap_url, loc_tag.text)
|
||||||
|
for loc_tag in soup.find_all("loc")
|
||||||
|
]
|
||||||
|
|
||||||
def _ensure_absolute_url(source_url:str, maybe_relative_url: str) -> str:
|
|
||||||
|
def _ensure_absolute_url(source_url: str, maybe_relative_url: str) -> str:
|
||||||
if not urlparse(maybe_relative_url).netloc:
|
if not urlparse(maybe_relative_url).netloc:
|
||||||
return urljoin(source_url, maybe_relative_url)
|
return urljoin(source_url, maybe_relative_url)
|
||||||
return maybe_relative_url
|
return maybe_relative_url
|
||||||
|
|
||||||
|
|
||||||
def _ensure_valid_url(url: str) -> str:
|
def _ensure_valid_url(url: str) -> str:
|
||||||
if "://" not in url:
|
if "://" not in url:
|
||||||
return "https://" + url
|
return "https://" + url
|
||||||
|
Loading…
x
Reference in New Issue
Block a user