From 132a9f750def18c6c955e5b483eec4136b553491 Mon Sep 17 00:00:00 2001 From: Chris Weaver <25087905+Weves@users.noreply.github.com> Date: Sat, 29 Jul 2023 16:53:38 -0700 Subject: [PATCH] Add Github Action to run mypy / reorder-python-imports / black on all PRs (#251) Also fixes import ordering (previously, local imports weren't grouped together as they should have been) --- .github/workflows/pr-python-checks.yml | 41 +++++++++++++++++++ backend/.pre-commit-config.yaml | 38 +++++++++++++++-- ...78d9b7f9_larger_access_tokens_for_oauth.py | 16 ++------ backend/danswer/background/update.py | 3 +- backend/danswer/configs/app_configs.py | 8 +++- .../connectors/confluence/connector.py | 1 + .../connectors/danswer_jira/connector.py | 5 ++- backend/danswer/connectors/factory.py | 2 +- .../danswer/connectors/github/connector.py | 7 ++-- .../connectors/google_drive/connector.py | 7 ++-- .../connectors/google_drive/connector_auth.py | 9 ++-- backend/danswer/connectors/guru/connector.py | 1 + backend/danswer/connectors/models.py | 3 +- .../danswer/connectors/notion/connector.py | 1 + .../connectors/productboard/connector.py | 5 ++- backend/danswer/connectors/slab/connector.py | 3 +- backend/danswer/connectors/slack/connector.py | 9 ++-- backend/danswer/connectors/slack/utils.py | 3 +- backend/danswer/connectors/web/connector.py | 13 +++--- backend/danswer/datastores/qdrant/indexing.py | 17 ++++---- backend/danswer/datastores/qdrant/store.py | 13 +++--- backend/danswer/datastores/typesense/store.py | 3 +- backend/danswer/db/auth.py | 13 +++--- backend/danswer/db/connector.py | 13 +++--- .../danswer/db/connector_credential_pair.py | 7 ++-- backend/danswer/db/credentials.py | 7 ++-- backend/danswer/db/engine.py | 13 +++--- backend/danswer/db/index_attempt.py | 7 ++-- backend/danswer/db/models.py | 7 ++-- backend/danswer/direct_qa/__init__.py | 5 ++- backend/danswer/direct_qa/gpt_4_all.py | 3 +- backend/danswer/direct_qa/open_ai.py | 5 ++- backend/danswer/direct_qa/qa_utils.py | 1 + .../dynamic_configs/file_system/store.py | 3 +- backend/danswer/listeners/slack_listener.py | 11 ++--- backend/danswer/main.py | 28 ++++++++----- backend/danswer/search/danswer_helper.py | 3 +- backend/danswer/search/keyword_search.py | 7 ++-- backend/danswer/search/search_utils.py | 9 ++-- backend/danswer/search/semantic_search.py | 3 +- backend/danswer/server/event_loading.py | 3 +- backend/danswer/server/health.py | 3 +- backend/danswer/server/manage.py | 19 +++++---- backend/danswer/server/models.py | 5 ++- backend/danswer/server/search_backend.py | 7 ++-- backend/danswer/utils/clients.py | 3 +- backend/danswer/utils/text_processing.py | 1 + backend/scripts/reset_indexes.py | 7 ++-- backend/scripts/reset_postgres.py | 1 + backend/scripts/save_load_state.py | 5 ++- backend/scripts/simulate_frontend.py | 1 + 51 files changed, 265 insertions(+), 143 deletions(-) create mode 100644 .github/workflows/pr-python-checks.yml diff --git a/.github/workflows/pr-python-checks.yml b/.github/workflows/pr-python-checks.yml new file mode 100644 index 000000000..896235410 --- /dev/null +++ b/.github/workflows/pr-python-checks.yml @@ -0,0 +1,41 @@ +name: Python Checks + +on: + pull_request: + branches: [ main ] + +jobs: + mypy-check: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' + cache-dependency-path: | + backend/requirements/default.txt + backend/requirements/dev.txt + - run: | + python -m pip install --upgrade pip + pip install -r backend/requirements/default.txt + pip install -r backend/requirements/dev.txt + + - name: Run MyPy + run: | + cd backend + mypy . + + - name: Check import order with reorder-python-imports + run: | + cd backend + find ./danswer -name "*.py" | xargs reorder-python-imports --py311-plus + + - name: Check code formatting with Black + run: | + cd backend + black --check . diff --git a/backend/.pre-commit-config.yaml b/backend/.pre-commit-config.yaml index f4ad18883..2568b101b 100644 --- a/backend/.pre-commit-config.yaml +++ b/backend/.pre-commit-config.yaml @@ -1,12 +1,44 @@ repos: -- repo: https://github.com/psf/black + - repo: https://github.com/psf/black rev: 23.3.0 hooks: - id: black language_version: python3.11 -- repo: https://github.com/asottile/reorder_python_imports + - repo: https://github.com/asottile/reorder_python_imports rev: v3.9.0 hooks: - id: reorder-python-imports - args: ['--py311-plus'] \ No newline at end of file + args: ['--py311-plus', '--application-directories=backend/'] + # need to ignore alembic files, since reorder-python-imports gets confused + # and thinks that alembic is a local package since there is a folder + # in the backend directory called `alembic` + exclude: ^backend/alembic/ + + # We would like to have a mypy pre-commit hook, but due to the fact that + # pre-commit runs in it's own isolated environment, we would need to install + # and keep in sync all dependencies so mypy has access to the appropriate type + # stubs. This does not seem worth it at the moment, so for now we will stick to + # having mypy run via Github Actions / manually by contributors + # - repo: https://github.com/pre-commit/mirrors-mypy + # rev: v1.1.1 + # hooks: + # - id: mypy + # exclude: ^tests/ + # # below are needed for type stubs since pre-commit runs in it's own + # # isolated environment. Unfortunately, this needs to be kept in sync + # # with requirements/dev.txt + requirements/default.txt + # additional_dependencies: [ + # alembic==1.10.4, + # types-beautifulsoup4==4.12.0.3, + # types-html5lib==1.1.11.13, + # types-oauthlib==3.2.0.9, + # types-psycopg2==2.9.21.10, + # types-python-dateutil==2.8.19.13, + # types-regex==2023.3.23.1, + # types-requests==2.28.11.17, + # types-retry==0.9.9.3, + # types-urllib3==1.26.25.11 + # ] + # # TODO: add back once errors are addressed + # # args: [--strict] \ No newline at end of file diff --git a/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py b/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py index 61d797e52..7d6f7f873 100644 --- a/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py +++ b/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py @@ -10,23 +10,15 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. -revision = '465f78d9b7f9' -down_revision = '3c5e35aa9af0' +revision = "465f78d9b7f9" +down_revision = "3c5e35aa9af0" branch_labels = None depends_on = None def upgrade() -> None: - op.alter_column( - "oauth_account", - "access_token", - type_=sa.Text() - ) + op.alter_column("oauth_account", "access_token", type_=sa.Text()) def downgrade() -> None: - op.alter_column( - "oauth_account", - "access_token", - type_=sa.String(length=1024) - ) + op.alter_column("oauth_account", "access_token", type_=sa.String(length=1024)) diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index da0e36dd7..5a412a69c 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -2,6 +2,8 @@ import time from datetime import datetime from datetime import timezone +from sqlalchemy.orm import Session + from danswer.connectors.factory import instantiate_connector from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector @@ -25,7 +27,6 @@ from danswer.db.models import Connector from danswer.db.models import IndexAttempt from danswer.db.models import IndexingStatus from danswer.utils.logger import setup_logger -from sqlalchemy.orm import Session logger = setup_logger() diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index 8d25858c1..4dc30582c 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -47,8 +47,12 @@ VALID_EMAIL_DOMAIN = os.environ.get("VALID_EMAIL_DOMAIN", "") # OAuth Login Flow ENABLE_OAUTH = os.environ.get("ENABLE_OAUTH", "").lower() != "false" OAUTH_TYPE = os.environ.get("OAUTH_TYPE", "google").lower() -OAUTH_CLIENT_ID = os.environ.get("OAUTH_CLIENT_ID", os.environ.get("GOOGLE_OAUTH_CLIENT_ID", "")) -OAUTH_CLIENT_SECRET = os.environ.get("OAUTH_CLIENT_SECRET", os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET", "")) +OAUTH_CLIENT_ID = os.environ.get( + "OAUTH_CLIENT_ID", os.environ.get("GOOGLE_OAUTH_CLIENT_ID", "") +) +OAUTH_CLIENT_SECRET = os.environ.get( + "OAUTH_CLIENT_SECRET", os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET", "") +) OPENID_CONFIG_URL = os.environ.get("OPENID_CONFIG_URL", "") MASK_CREDENTIAL_PREFIX = ( os.environ.get("MASK_CREDENTIAL_PREFIX", "True").lower() != "false" diff --git a/backend/danswer/connectors/confluence/connector.py b/backend/danswer/connectors/confluence/connector.py index 0cc7eb117..e165e1a36 100644 --- a/backend/danswer/connectors/confluence/connector.py +++ b/backend/danswer/connectors/confluence/connector.py @@ -6,6 +6,7 @@ from typing import Any from urllib.parse import urlparse from atlassian import Confluence # type:ignore + from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.interfaces import GenerateDocumentsOutput diff --git a/backend/danswer/connectors/danswer_jira/connector.py b/backend/danswer/connectors/danswer_jira/connector.py index e5c3609c3..d68fd5cd0 100644 --- a/backend/danswer/connectors/danswer_jira/connector.py +++ b/backend/danswer/connectors/danswer_jira/connector.py @@ -3,6 +3,9 @@ from datetime import timezone from typing import Any from urllib.parse import urlparse +from jira import JIRA +from jira.resources import Issue + from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.interfaces import GenerateDocumentsOutput @@ -13,8 +16,6 @@ from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section from danswer.utils.logger import setup_logger -from jira import JIRA -from jira.resources import Issue logger = setup_logger() diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index 01bd1ca71..615946b33 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -8,13 +8,13 @@ from danswer.connectors.danswer_jira.connector import JiraConnector from danswer.connectors.file.connector import LocalFileConnector from danswer.connectors.github.connector import GithubConnector from danswer.connectors.google_drive.connector import GoogleDriveConnector -from danswer.connectors.notion.connector import NotionConnector from danswer.connectors.guru.connector import GuruConnector from danswer.connectors.interfaces import BaseConnector from danswer.connectors.interfaces import EventConnector from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector from danswer.connectors.models import InputType +from danswer.connectors.notion.connector import NotionConnector from danswer.connectors.productboard.connector import ProductboardConnector from danswer.connectors.slab.connector import SlabConnector from danswer.connectors.slack.connector import SlackLoadConnector diff --git a/backend/danswer/connectors/github/connector.py b/backend/danswer/connectors/github/connector.py index 109995465..d0c0dfd6c 100644 --- a/backend/danswer/connectors/github/connector.py +++ b/backend/danswer/connectors/github/connector.py @@ -2,6 +2,10 @@ import itertools from collections.abc import Generator from typing import Any +from github import Github +from github.PaginatedList import PaginatedList +from github.PullRequest import PullRequest + from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.interfaces import GenerateDocumentsOutput @@ -10,9 +14,6 @@ from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section from danswer.utils.logger import setup_logger -from github import Github -from github.PaginatedList import PaginatedList -from github.PullRequest import PullRequest logger = setup_logger() diff --git a/backend/danswer/connectors/google_drive/connector.py b/backend/danswer/connectors/google_drive/connector.py index 44d01c1cf..b7a5dd540 100644 --- a/backend/danswer/connectors/google_drive/connector.py +++ b/backend/danswer/connectors/google_drive/connector.py @@ -7,6 +7,10 @@ from itertools import chain from typing import Any import docx2txt # type:ignore +from google.oauth2.credentials import Credentials # type: ignore +from googleapiclient import discovery # type: ignore +from PyPDF2 import PdfReader + from danswer.configs.app_configs import GOOGLE_DRIVE_INCLUDE_SHARED from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource @@ -20,9 +24,6 @@ from danswer.connectors.models import Document from danswer.connectors.models import Section from danswer.connectors.utils import batch_generator from danswer.utils.logger import setup_logger -from google.oauth2.credentials import Credentials # type: ignore -from googleapiclient import discovery # type: ignore -from PyPDF2 import PdfReader logger = setup_logger() diff --git a/backend/danswer/connectors/google_drive/connector_auth.py b/backend/danswer/connectors/google_drive/connector_auth.py index c33694d44..ea57a73ca 100644 --- a/backend/danswer/connectors/google_drive/connector_auth.py +++ b/backend/danswer/connectors/google_drive/connector_auth.py @@ -4,16 +4,17 @@ from urllib.parse import parse_qs from urllib.parse import ParseResult from urllib.parse import urlparse +from google.auth.transport.requests import Request # type: ignore +from google.oauth2.credentials import Credentials # type: ignore +from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore +from sqlalchemy.orm import Session + from danswer.configs.app_configs import WEB_DOMAIN from danswer.db.credentials import update_credential_json from danswer.db.models import User from danswer.dynamic_configs import get_dynamic_config_store from danswer.server.models import GoogleAppCredentials from danswer.utils.logger import setup_logger -from google.auth.transport.requests import Request # type: ignore -from google.oauth2.credentials import Credentials # type: ignore -from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore -from sqlalchemy.orm import Session logger = setup_logger() diff --git a/backend/danswer/connectors/guru/connector.py b/backend/danswer/connectors/guru/connector.py index 4f648ebff..0b6e7040a 100644 --- a/backend/danswer/connectors/guru/connector.py +++ b/backend/danswer/connectors/guru/connector.py @@ -4,6 +4,7 @@ from datetime import timezone from typing import Any import requests + from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.interfaces import GenerateDocumentsOutput diff --git a/backend/danswer/connectors/models.py b/backend/danswer/connectors/models.py index b1eb77cf5..588dfd08e 100644 --- a/backend/danswer/connectors/models.py +++ b/backend/danswer/connectors/models.py @@ -2,9 +2,10 @@ from dataclasses import dataclass from enum import Enum from typing import Any -from danswer.configs.constants import DocumentSource from pydantic import BaseModel +from danswer.configs.constants import DocumentSource + class ConnectorMissingCredentialError(PermissionError): def __init__(self, connector_name: str) -> None: diff --git a/backend/danswer/connectors/notion/connector.py b/backend/danswer/connectors/notion/connector.py index 46fac3b73..1e9b9f977 100644 --- a/backend/danswer/connectors/notion/connector.py +++ b/backend/danswer/connectors/notion/connector.py @@ -8,6 +8,7 @@ from typing import List from typing import Optional import requests + from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.interfaces import GenerateDocumentsOutput diff --git a/backend/danswer/connectors/productboard/connector.py b/backend/danswer/connectors/productboard/connector.py index d475687ac..eb5f41506 100644 --- a/backend/danswer/connectors/productboard/connector.py +++ b/backend/danswer/connectors/productboard/connector.py @@ -5,6 +5,9 @@ from typing import cast import requests from bs4 import BeautifulSoup +from dateutil import parser +from retry import retry + from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.interfaces import GenerateDocumentsOutput @@ -13,8 +16,6 @@ from danswer.connectors.interfaces import SecondsSinceUnixEpoch from danswer.connectors.models import Document from danswer.connectors.models import Section from danswer.utils.logger import setup_logger -from dateutil import parser -from retry import retry logger = setup_logger() diff --git a/backend/danswer/connectors/slab/connector.py b/backend/danswer/connectors/slab/connector.py index 2b491eed2..80380ff7c 100644 --- a/backend/danswer/connectors/slab/connector.py +++ b/backend/danswer/connectors/slab/connector.py @@ -7,6 +7,8 @@ from typing import Any from urllib.parse import urljoin import requests +from dateutil import parser + from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.interfaces import GenerateDocumentsOutput @@ -17,7 +19,6 @@ from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section from danswer.utils.logger import setup_logger -from dateutil import parser # Fairly generous retry because it's not understood why occasionally GraphQL requests fail even with timeout > 1 min SLAB_GRAPHQL_MAX_TRIES = 10 diff --git a/backend/danswer/connectors/slack/connector.py b/backend/danswer/connectors/slack/connector.py index fb99372ba..e10cb68dc 100644 --- a/backend/danswer/connectors/slack/connector.py +++ b/backend/danswer/connectors/slack/connector.py @@ -6,6 +6,9 @@ from pathlib import Path from typing import Any from typing import cast +from slack_sdk import WebClient +from slack_sdk.web import SlackResponse + from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.interfaces import GenerateDocumentsOutput @@ -20,8 +23,6 @@ from danswer.connectors.slack.utils import make_slack_api_call_paginated from danswer.connectors.slack.utils import make_slack_api_rate_limited from danswer.connectors.slack.utils import UserIdReplacer from danswer.utils.logger import setup_logger -from slack_sdk import WebClient -from slack_sdk.web import SlackResponse logger = setup_logger() @@ -48,7 +49,9 @@ def get_channel_info(client: WebClient, channel_id: str) -> ChannelType: def get_channels(client: WebClient, exclude_archived: bool = True) -> list[ChannelType]: """Get all channels in the workspace""" channels: list[dict[str, Any]] = [] - for result in _make_slack_api_call(client.conversations_list, exclude_archived=exclude_archived): + for result in _make_slack_api_call( + client.conversations_list, exclude_archived=exclude_archived + ): channels.extend(result["channels"]) return channels diff --git a/backend/danswer/connectors/slack/utils.py b/backend/danswer/connectors/slack/utils.py index 8fd2282af..0d486cfbd 100644 --- a/backend/danswer/connectors/slack/utils.py +++ b/backend/danswer/connectors/slack/utils.py @@ -4,11 +4,12 @@ from collections.abc import Callable from typing import Any from typing import cast -from danswer.utils.logger import setup_logger from slack_sdk import WebClient from slack_sdk.errors import SlackApiError from slack_sdk.web import SlackResponse +from danswer.utils.logger import setup_logger + logger = setup_logger() # number of messages we request per page when fetching paginated slack messages diff --git a/backend/danswer/connectors/web/connector.py b/backend/danswer/connectors/web/connector.py index aa6299da1..1780120f7 100644 --- a/backend/danswer/connectors/web/connector.py +++ b/backend/danswer/connectors/web/connector.py @@ -10,6 +10,13 @@ from urllib.parse import urlparse import bs4 import requests from bs4 import BeautifulSoup +from oauthlib.oauth2 import BackendApplicationClient +from playwright.sync_api import BrowserContext +from playwright.sync_api import Playwright +from playwright.sync_api import sync_playwright +from PyPDF2 import PdfReader +from requests_oauthlib import OAuth2Session # type:ignore + from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_CLASSES from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_ELEMENTS @@ -22,12 +29,6 @@ from danswer.connectors.interfaces import LoadConnector from danswer.connectors.models import Document from danswer.connectors.models import Section from danswer.utils.logger import setup_logger -from oauthlib.oauth2 import BackendApplicationClient -from playwright.sync_api import BrowserContext -from playwright.sync_api import Playwright -from playwright.sync_api import sync_playwright -from PyPDF2 import PdfReader -from requests_oauthlib import OAuth2Session # type:ignore logger = setup_logger() diff --git a/backend/danswer/datastores/qdrant/indexing.py b/backend/danswer/datastores/qdrant/indexing.py index 4e4f88e63..db9692323 100644 --- a/backend/danswer/datastores/qdrant/indexing.py +++ b/backend/danswer/datastores/qdrant/indexing.py @@ -2,6 +2,15 @@ import json from functools import partial from uuid import UUID +from qdrant_client import QdrantClient +from qdrant_client.http import models +from qdrant_client.http.exceptions import ResponseHandlingException +from qdrant_client.http.models.models import UpdateResult +from qdrant_client.models import CollectionsResponse +from qdrant_client.models import Distance +from qdrant_client.models import PointStruct +from qdrant_client.models import VectorParams + from danswer.chunking.models import EmbeddedIndexChunk from danswer.configs.constants import ALLOWED_GROUPS from danswer.configs.constants import ALLOWED_USERS @@ -21,14 +30,6 @@ from danswer.datastores.datastore_utils import get_uuid_from_chunk from danswer.datastores.datastore_utils import update_doc_user_map from danswer.utils.clients import get_qdrant_client from danswer.utils.logger import setup_logger -from qdrant_client import QdrantClient -from qdrant_client.http import models -from qdrant_client.http.exceptions import ResponseHandlingException -from qdrant_client.http.models.models import UpdateResult -from qdrant_client.models import CollectionsResponse -from qdrant_client.models import Distance -from qdrant_client.models import PointStruct -from qdrant_client.models import VectorParams logger = setup_logger() diff --git a/backend/danswer/datastores/qdrant/store.py b/backend/danswer/datastores/qdrant/store.py index 5284ee3b0..d13a293a5 100644 --- a/backend/danswer/datastores/qdrant/store.py +++ b/backend/danswer/datastores/qdrant/store.py @@ -1,5 +1,12 @@ from uuid import UUID +from qdrant_client.http.exceptions import ResponseHandlingException +from qdrant_client.http.exceptions import UnexpectedResponse +from qdrant_client.http.models import FieldCondition +from qdrant_client.http.models import Filter +from qdrant_client.http.models import MatchAny +from qdrant_client.http.models import MatchValue + from danswer.chunking.models import EmbeddedIndexChunk from danswer.chunking.models import InferenceChunk from danswer.configs.app_configs import NUM_RETURNED_HITS @@ -15,12 +22,6 @@ from danswer.search.search_utils import get_default_embedding_model from danswer.utils.clients import get_qdrant_client from danswer.utils.logger import setup_logger from danswer.utils.timing import log_function_time -from qdrant_client.http.exceptions import ResponseHandlingException -from qdrant_client.http.exceptions import UnexpectedResponse -from qdrant_client.http.models import FieldCondition -from qdrant_client.http.models import Filter -from qdrant_client.http.models import MatchAny -from qdrant_client.http.models import MatchValue logger = setup_logger() diff --git a/backend/danswer/datastores/typesense/store.py b/backend/danswer/datastores/typesense/store.py index 1a1c0b513..bdf9ac8ce 100644 --- a/backend/danswer/datastores/typesense/store.py +++ b/backend/danswer/datastores/typesense/store.py @@ -4,6 +4,8 @@ from typing import Any from uuid import UUID import typesense # type: ignore +from typesense.exceptions import ObjectNotFound # type: ignore + from danswer.chunking.models import EmbeddedIndexChunk from danswer.chunking.models import IndexChunk from danswer.chunking.models import InferenceChunk @@ -27,7 +29,6 @@ from danswer.datastores.interfaces import IndexFilter from danswer.datastores.interfaces import KeywordIndex from danswer.utils.clients import get_typesense_client from danswer.utils.logger import setup_logger -from typesense.exceptions import ObjectNotFound # type: ignore logger = setup_logger() diff --git a/backend/danswer/db/auth.py b/backend/danswer/db/auth.py index 28438a396..1883e8abd 100644 --- a/backend/danswer/db/auth.py +++ b/backend/danswer/db/auth.py @@ -2,12 +2,6 @@ from collections.abc import AsyncGenerator from typing import Any from typing import Dict -from danswer.auth.schemas import UserRole -from danswer.db.engine import get_async_session -from danswer.db.engine import get_sqlalchemy_async_engine -from danswer.db.models import AccessToken -from danswer.db.models import OAuthAccount -from danswer.db.models import User from fastapi import Depends from fastapi_users.db import SQLAlchemyUserDatabase from fastapi_users.models import UP @@ -16,6 +10,13 @@ from sqlalchemy import func from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select +from danswer.auth.schemas import UserRole +from danswer.db.engine import get_async_session +from danswer.db.engine import get_sqlalchemy_async_engine +from danswer.db.models import AccessToken +from danswer.db.models import OAuthAccount +from danswer.db.models import User + async def get_user_count() -> int: async with AsyncSession(get_sqlalchemy_async_engine()) as asession: diff --git a/backend/danswer/db/connector.py b/backend/danswer/db/connector.py index cdf1ae31d..2113a8a17 100644 --- a/backend/danswer/db/connector.py +++ b/backend/danswer/db/connector.py @@ -1,5 +1,12 @@ from typing import cast +from fastapi import HTTPException +from sqlalchemy import and_ +from sqlalchemy import func +from sqlalchemy import select +from sqlalchemy.orm import aliased +from sqlalchemy.orm import Session + from danswer.configs.constants import DocumentSource from danswer.connectors.models import InputType from danswer.db.models import Connector @@ -8,12 +15,6 @@ from danswer.server.models import ConnectorBase from danswer.server.models import ObjectCreationIdResponse from danswer.server.models import StatusResponse from danswer.utils.logger import setup_logger -from fastapi import HTTPException -from sqlalchemy import and_ -from sqlalchemy import func -from sqlalchemy import select -from sqlalchemy.orm import aliased -from sqlalchemy.orm import Session logger = setup_logger() diff --git a/backend/danswer/db/connector_credential_pair.py b/backend/danswer/db/connector_credential_pair.py index 2212137d8..803078c4c 100644 --- a/backend/danswer/db/connector_credential_pair.py +++ b/backend/danswer/db/connector_credential_pair.py @@ -1,5 +1,9 @@ from datetime import datetime +from fastapi import HTTPException +from sqlalchemy import select +from sqlalchemy.orm import Session + from danswer.db.connector import fetch_connector_by_id from danswer.db.credentials import fetch_credential_by_id from danswer.db.models import ConnectorCredentialPair @@ -7,9 +11,6 @@ from danswer.db.models import IndexingStatus from danswer.db.models import User from danswer.server.models import StatusResponse from danswer.utils.logger import setup_logger -from fastapi import HTTPException -from sqlalchemy import select -from sqlalchemy.orm import Session logger = setup_logger() diff --git a/backend/danswer/db/credentials.py b/backend/danswer/db/credentials.py index a4794dce1..61d110c06 100644 --- a/backend/danswer/db/credentials.py +++ b/backend/danswer/db/credentials.py @@ -1,14 +1,15 @@ from typing import Any +from sqlalchemy import select +from sqlalchemy.orm import Session +from sqlalchemy.sql.expression import or_ + from danswer.db.engine import get_sqlalchemy_engine from danswer.db.models import Credential from danswer.db.models import User from danswer.server.models import CredentialBase from danswer.server.models import ObjectCreationIdResponse from danswer.utils.logger import setup_logger -from sqlalchemy import select -from sqlalchemy.orm import Session -from sqlalchemy.sql.expression import or_ logger = setup_logger() diff --git a/backend/danswer/db/engine.py b/backend/danswer/db/engine.py index 438c7cd1f..1741be44f 100644 --- a/backend/danswer/db/engine.py +++ b/backend/danswer/db/engine.py @@ -3,12 +3,6 @@ from collections.abc import Generator from datetime import datetime from datetime import timezone -from danswer.configs.app_configs import POSTGRES_DB -from danswer.configs.app_configs import POSTGRES_HOST -from danswer.configs.app_configs import POSTGRES_PASSWORD -from danswer.configs.app_configs import POSTGRES_PORT -from danswer.configs.app_configs import POSTGRES_USER -from danswer.utils.logger import setup_logger from sqlalchemy import text from sqlalchemy.engine import create_engine from sqlalchemy.engine import Engine @@ -17,6 +11,13 @@ from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import create_async_engine from sqlalchemy.orm import Session +from danswer.configs.app_configs import POSTGRES_DB +from danswer.configs.app_configs import POSTGRES_HOST +from danswer.configs.app_configs import POSTGRES_PASSWORD +from danswer.configs.app_configs import POSTGRES_PORT +from danswer.configs.app_configs import POSTGRES_USER +from danswer.utils.logger import setup_logger + logger = setup_logger() SYNC_DB_API = "psycopg2" diff --git a/backend/danswer/db/index_attempt.py b/backend/danswer/db/index_attempt.py index 4001ab5ee..30502d73c 100644 --- a/backend/danswer/db/index_attempt.py +++ b/backend/danswer/db/index_attempt.py @@ -1,10 +1,11 @@ -from danswer.db.models import IndexAttempt -from danswer.db.models import IndexingStatus -from danswer.utils.logger import setup_logger from sqlalchemy import desc from sqlalchemy import select from sqlalchemy.orm import Session +from danswer.db.models import IndexAttempt +from danswer.db.models import IndexingStatus +from danswer.utils.logger import setup_logger + logger = setup_logger() diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index d708818ec..815904840 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -4,9 +4,6 @@ from typing import Any from typing import List from uuid import UUID -from danswer.auth.schemas import UserRole -from danswer.configs.constants import DocumentSource -from danswer.connectors.models import InputType from fastapi_users.db import SQLAlchemyBaseOAuthAccountTableUUID from fastapi_users.db import SQLAlchemyBaseUserTableUUID from fastapi_users_db_sqlalchemy.access_token import SQLAlchemyBaseAccessTokenTableUUID @@ -24,6 +21,10 @@ from sqlalchemy.orm import Mapped from sqlalchemy.orm import mapped_column from sqlalchemy.orm import relationship +from danswer.auth.schemas import UserRole +from danswer.configs.constants import DocumentSource +from danswer.connectors.models import InputType + class IndexingStatus(str, PyEnum): NOT_STARTED = "not_started" diff --git a/backend/danswer/direct_qa/__init__.py b/backend/danswer/direct_qa/__init__.py index 27ffa0a8b..2ffe84ceb 100644 --- a/backend/danswer/direct_qa/__init__.py +++ b/backend/danswer/direct_qa/__init__.py @@ -1,13 +1,14 @@ from typing import Any +from openai.error import AuthenticationError +from openai.error import Timeout + from danswer.configs.app_configs import QA_TIMEOUT from danswer.configs.model_configs import INTERNAL_MODEL_VERSION from danswer.direct_qa.exceptions import UnknownModelError from danswer.direct_qa.interfaces import QAModel from danswer.direct_qa.open_ai import OpenAIChatCompletionQA from danswer.direct_qa.open_ai import OpenAICompletionQA -from openai.error import AuthenticationError -from openai.error import Timeout # Imports commented out temporarily due to incompatibility of gpt4all with M1 Mac hardware currently # from danswer.direct_qa.gpt_4_all import GPT4AllChatCompletionQA diff --git a/backend/danswer/direct_qa/gpt_4_all.py b/backend/danswer/direct_qa/gpt_4_all.py index 16dc6d21b..673cf94e7 100644 --- a/backend/danswer/direct_qa/gpt_4_all.py +++ b/backend/danswer/direct_qa/gpt_4_all.py @@ -1,6 +1,8 @@ from collections.abc import Generator from typing import Any +from gpt4all import GPT4All # type:ignore + from danswer.chunking.models import InferenceChunk from danswer.configs.model_configs import GEN_AI_MAX_OUTPUT_TOKENS from danswer.configs.model_configs import GEN_AI_MODEL_VERSION @@ -15,7 +17,6 @@ from danswer.direct_qa.qa_utils import process_answer from danswer.direct_qa.qa_utils import process_model_tokens from danswer.utils.logger import setup_logger from danswer.utils.timing import log_function_time -from gpt4all import GPT4All # type:ignore logger = setup_logger() diff --git a/backend/danswer/direct_qa/open_ai.py b/backend/danswer/direct_qa/open_ai.py index 47ed4cbeb..022ea8cd2 100644 --- a/backend/danswer/direct_qa/open_ai.py +++ b/backend/danswer/direct_qa/open_ai.py @@ -10,6 +10,9 @@ from typing import TypeVar import openai import tiktoken +from openai.error import AuthenticationError +from openai.error import Timeout + from danswer.chunking.models import InferenceChunk from danswer.configs.app_configs import INCLUDE_METADATA from danswer.configs.app_configs import OPENAI_API_KEY @@ -31,8 +34,6 @@ from danswer.dynamic_configs import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.utils.logger import setup_logger from danswer.utils.timing import log_function_time -from openai.error import AuthenticationError -from openai.error import Timeout logger = setup_logger() diff --git a/backend/danswer/direct_qa/qa_utils.py b/backend/danswer/direct_qa/qa_utils.py index 4687fc6c7..b06766159 100644 --- a/backend/danswer/direct_qa/qa_utils.py +++ b/backend/danswer/direct_qa/qa_utils.py @@ -7,6 +7,7 @@ from typing import Optional from typing import Tuple import regex + from danswer.chunking.models import InferenceChunk from danswer.configs.app_configs import QUOTE_ALLOWED_ERROR_PERCENT from danswer.configs.constants import BLURB diff --git a/backend/danswer/dynamic_configs/file_system/store.py b/backend/danswer/dynamic_configs/file_system/store.py index bab25a333..75cc0d740 100644 --- a/backend/danswer/dynamic_configs/file_system/store.py +++ b/backend/danswer/dynamic_configs/file_system/store.py @@ -3,10 +3,11 @@ import os from pathlib import Path from typing import cast +from filelock import FileLock + from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.dynamic_configs.interface import DynamicConfigStore from danswer.dynamic_configs.interface import JSON_ro -from filelock import FileLock FILE_LOCK_TIMEOUT = 10 diff --git a/backend/danswer/listeners/slack_listener.py b/backend/danswer/listeners/slack_listener.py index 360b2b91c..1453d6192 100644 --- a/backend/danswer/listeners/slack_listener.py +++ b/backend/danswer/listeners/slack_listener.py @@ -1,5 +1,11 @@ import os +from retry import retry +from slack_sdk import WebClient +from slack_sdk.socket_mode import SocketModeClient +from slack_sdk.socket_mode.request import SocketModeRequest +from slack_sdk.socket_mode.response import SocketModeResponse + from danswer.configs.app_configs import DANSWER_BOT_NUM_DOCS_TO_DISPLAY from danswer.configs.app_configs import DANSWER_BOT_NUM_RETRIES from danswer.configs.app_configs import QDRANT_DEFAULT_COLLECTION @@ -10,11 +16,6 @@ from danswer.server.models import QAResponse from danswer.server.models import QuestionRequest from danswer.server.models import SearchDoc from danswer.utils.logger import setup_logger -from retry import retry -from slack_sdk import WebClient -from slack_sdk.socket_mode import SocketModeClient -from slack_sdk.socket_mode.request import SocketModeRequest -from slack_sdk.socket_mode.response import SocketModeResponse logger = setup_logger() diff --git a/backend/danswer/main.py b/backend/danswer/main.py index 32509af79..493f287f5 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -1,18 +1,26 @@ import nltk # type:ignore import uvicorn +from fastapi import FastAPI +from fastapi import Request +from fastapi.exceptions import RequestValidationError +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse + from danswer.auth.schemas import UserCreate from danswer.auth.schemas import UserRead from danswer.auth.schemas import UserUpdate from danswer.auth.users import auth_backend from danswer.auth.users import fastapi_users from danswer.auth.users import oauth_client -from danswer.configs.app_configs import APP_HOST, OAUTH_TYPE, OPENID_CONFIG_URL +from danswer.configs.app_configs import APP_HOST from danswer.configs.app_configs import APP_PORT from danswer.configs.app_configs import DISABLE_AUTH from danswer.configs.app_configs import DISABLE_GENERATIVE_AI from danswer.configs.app_configs import ENABLE_OAUTH from danswer.configs.app_configs import OAUTH_CLIENT_ID from danswer.configs.app_configs import OAUTH_CLIENT_SECRET +from danswer.configs.app_configs import OAUTH_TYPE +from danswer.configs.app_configs import OPENID_CONFIG_URL from danswer.configs.app_configs import QDRANT_DEFAULT_COLLECTION from danswer.configs.app_configs import SECRET from danswer.configs.app_configs import TYPESENSE_DEFAULT_COLLECTION @@ -29,11 +37,6 @@ from danswer.server.health import router as health_router from danswer.server.manage import router as admin_router from danswer.server.search_backend import router as backend_router from danswer.utils.logger import setup_logger -from fastapi import FastAPI -from fastapi import Request -from fastapi.exceptions import RequestValidationError -from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import JSONResponse logger = setup_logger() @@ -50,7 +53,7 @@ def validation_exception_handler( def value_error_handler(_: Request, exc: ValueError) -> JSONResponse: try: - raise(exc) + raise (exc) except: # log stacktrace logger.exception("ValueError") @@ -93,6 +96,9 @@ def get_application() -> FastAPI: tags=["users"], ) if ENABLE_OAUTH: + if oauth_client is None: + raise RuntimeError("OAuth is enabled but no OAuth client is configured") + if OAUTH_TYPE == "google": # special case for google application.include_router( @@ -124,9 +130,7 @@ def get_application() -> FastAPI: tags=["auth"], ) application.include_router( - fastapi_users.get_oauth_associate_router( - oauth_client, UserRead, SECRET - ), + fastapi_users.get_oauth_associate_router(oauth_client, UserRead, SECRET), prefix="/auth/associate/oauth", tags=["auth"], ) @@ -161,7 +165,9 @@ def get_application() -> FastAPI: if not OAUTH_CLIENT_ID: logger.warning("OAuth is turned on but OAUTH_CLIENT_ID is empty") if not OAUTH_CLIENT_SECRET: - logger.warning("OAuth is turned on but OAUTH_CLIENT_SECRET is empty") + logger.warning( + "OAuth is turned on but OAUTH_CLIENT_SECRET is empty" + ) if OAUTH_TYPE == "openid" and not OPENID_CONFIG_URL: logger.warning("OpenID is turned on but OPENID_CONFIG_URL is emtpy") else: diff --git a/backend/danswer/search/danswer_helper.py b/backend/danswer/search/danswer_helper.py index 466ebff33..3d5d788e3 100644 --- a/backend/danswer/search/danswer_helper.py +++ b/backend/danswer/search/danswer_helper.py @@ -1,5 +1,7 @@ import numpy as np import tensorflow as tf # type:ignore +from transformers import AutoTokenizer # type:ignore + from danswer.search.keyword_search import remove_stop_words from danswer.search.models import QueryFlow from danswer.search.models import SearchType @@ -9,7 +11,6 @@ from danswer.search.search_utils import get_default_tokenizer from danswer.server.models import HelperResponse from danswer.utils.logger import setup_logger from danswer.utils.timing import log_function_time -from transformers import AutoTokenizer # type:ignore logger = setup_logger() diff --git a/backend/danswer/search/keyword_search.py b/backend/danswer/search/keyword_search.py index 09a2f571f..232166af9 100644 --- a/backend/danswer/search/keyword_search.py +++ b/backend/danswer/search/keyword_search.py @@ -1,15 +1,16 @@ import json from uuid import UUID +from nltk.corpus import stopwords # type:ignore +from nltk.stem import WordNetLemmatizer # type:ignore +from nltk.tokenize import word_tokenize # type:ignore + from danswer.chunking.models import InferenceChunk from danswer.configs.app_configs import NUM_RETURNED_HITS from danswer.datastores.interfaces import IndexFilter from danswer.datastores.interfaces import KeywordIndex from danswer.utils.logger import setup_logger from danswer.utils.timing import log_function_time -from nltk.corpus import stopwords # type:ignore -from nltk.stem import WordNetLemmatizer # type:ignore -from nltk.tokenize import word_tokenize # type:ignore logger = setup_logger() diff --git a/backend/danswer/search/search_utils.py b/backend/danswer/search/search_utils.py index 94cc46c45..4b896fcd6 100644 --- a/backend/danswer/search/search_utils.py +++ b/backend/danswer/search/search_utils.py @@ -1,13 +1,14 @@ +from sentence_transformers import CrossEncoder # type: ignore +from sentence_transformers import SentenceTransformer # type: ignore +from transformers import AutoTokenizer # type: ignore +from transformers import TFDistilBertForSequenceClassification # type: ignore + from danswer.configs.model_configs import CROSS_EMBED_CONTEXT_SIZE from danswer.configs.model_configs import CROSS_ENCODER_MODEL_ENSEMBLE from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE from danswer.configs.model_configs import DOCUMENT_ENCODER_MODEL from danswer.configs.model_configs import INTENT_MODEL_VERSION from danswer.configs.model_configs import QUERY_MAX_CONTEXT_SIZE -from sentence_transformers import CrossEncoder # type: ignore -from sentence_transformers import SentenceTransformer # type: ignore -from transformers import AutoTokenizer # type: ignore -from transformers import TFDistilBertForSequenceClassification # type: ignore _TOKENIZER: None | AutoTokenizer = None diff --git a/backend/danswer/search/semantic_search.py b/backend/danswer/search/semantic_search.py index 6acf49af3..31eb94f79 100644 --- a/backend/danswer/search/semantic_search.py +++ b/backend/danswer/search/semantic_search.py @@ -2,6 +2,8 @@ import json from uuid import UUID import numpy +from sentence_transformers import SentenceTransformer # type: ignore + from danswer.chunking.models import EmbeddedIndexChunk from danswer.chunking.models import IndexChunk from danswer.chunking.models import InferenceChunk @@ -18,7 +20,6 @@ from danswer.search.search_utils import get_default_reranking_model_ensemble from danswer.server.models import SearchDoc from danswer.utils.logger import setup_logger from danswer.utils.timing import log_function_time -from sentence_transformers import SentenceTransformer # type: ignore logger = setup_logger() diff --git a/backend/danswer/server/event_loading.py b/backend/danswer/server/event_loading.py index 906de97df..f040ffdb2 100644 --- a/backend/danswer/server/event_loading.py +++ b/backend/danswer/server/event_loading.py @@ -1,10 +1,11 @@ from typing import Any -from danswer.utils.logger import setup_logger from fastapi import APIRouter from pydantic import BaseModel from pydantic import Extra +from danswer.utils.logger import setup_logger + router = APIRouter() logger = setup_logger() diff --git a/backend/danswer/server/health.py b/backend/danswer/server/health.py index 54da5dffe..a287f7e71 100644 --- a/backend/danswer/server/health.py +++ b/backend/danswer/server/health.py @@ -1,6 +1,7 @@ -from danswer.server.models import StatusResponse from fastapi import APIRouter +from danswer.server.models import StatusResponse + router = APIRouter() diff --git a/backend/danswer/server/manage.py b/backend/danswer/server/manage.py index 9594095b6..0a708c38c 100644 --- a/backend/danswer/server/manage.py +++ b/backend/danswer/server/manage.py @@ -2,6 +2,16 @@ from datetime import datetime from datetime import timedelta from typing import cast +from fastapi import APIRouter +from fastapi import Depends +from fastapi import HTTPException +from fastapi import Request +from fastapi import Response +from fastapi import UploadFile +from fastapi_users.db import SQLAlchemyUserDatabase +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.orm import Session + from danswer.auth.schemas import UserRole from danswer.auth.users import current_admin_user from danswer.auth.users import current_user @@ -61,15 +71,6 @@ from danswer.server.models import StatusResponse from danswer.server.models import UserByEmail from danswer.server.models import UserRoleResponse from danswer.utils.logger import setup_logger -from fastapi import APIRouter -from fastapi import Depends -from fastapi import HTTPException -from fastapi import Request -from fastapi import Response -from fastapi import UploadFile -from fastapi_users.db import SQLAlchemyUserDatabase -from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy.orm import Session router = APIRouter(prefix="/manage") diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py index e18d27f85..bfb8b94b4 100644 --- a/backend/danswer/server/models.py +++ b/backend/danswer/server/models.py @@ -6,6 +6,9 @@ from typing import Optional from typing import TypeVar from uuid import UUID +from pydantic import BaseModel +from pydantic.generics import GenericModel + from danswer.configs.constants import DocumentSource from danswer.connectors.models import InputType from danswer.datastores.interfaces import IndexFilter @@ -13,8 +16,6 @@ from danswer.db.models import Connector from danswer.db.models import IndexingStatus from danswer.search.models import QueryFlow from danswer.search.models import SearchType -from pydantic import BaseModel -from pydantic.generics import GenericModel DataT = TypeVar("DataT") diff --git a/backend/danswer/server/search_backend.py b/backend/danswer/server/search_backend.py index 65316f137..2e0e79e89 100644 --- a/backend/danswer/server/search_backend.py +++ b/backend/danswer/server/search_backend.py @@ -1,6 +1,10 @@ import json from collections.abc import Generator +from fastapi import APIRouter +from fastapi import Depends +from fastapi.responses import StreamingResponse + from danswer.auth.users import current_user from danswer.chunking.models import InferenceChunk from danswer.configs.app_configs import DISABLE_GENERATIVE_AI @@ -25,9 +29,6 @@ from danswer.server.models import QuestionRequest from danswer.server.models import SearchResponse from danswer.utils.logger import setup_logger from danswer.utils.timing import log_generator_function_time -from fastapi import APIRouter -from fastapi import Depends -from fastapi.responses import StreamingResponse logger = setup_logger() diff --git a/backend/danswer/utils/clients.py b/backend/danswer/utils/clients.py index 10807e5f5..db5d47b9f 100644 --- a/backend/danswer/utils/clients.py +++ b/backend/danswer/utils/clients.py @@ -1,4 +1,6 @@ import typesense # type: ignore +from qdrant_client import QdrantClient + from danswer.configs.app_configs import QDRANT_API_KEY from danswer.configs.app_configs import QDRANT_HOST from danswer.configs.app_configs import QDRANT_PORT @@ -6,7 +8,6 @@ from danswer.configs.app_configs import QDRANT_URL from danswer.configs.app_configs import TYPESENSE_API_KEY from danswer.configs.app_configs import TYPESENSE_HOST from danswer.configs.app_configs import TYPESENSE_PORT -from qdrant_client import QdrantClient _qdrant_client: QdrantClient | None = None diff --git a/backend/danswer/utils/text_processing.py b/backend/danswer/utils/text_processing.py index 7d79f9bf6..f11388f53 100644 --- a/backend/danswer/utils/text_processing.py +++ b/backend/danswer/utils/text_processing.py @@ -1,4 +1,5 @@ from bs4 import BeautifulSoup + from danswer.configs.constants import HTML_SEPARATOR diff --git a/backend/scripts/reset_indexes.py b/backend/scripts/reset_indexes.py index 5c214bf50..b7f94c593 100644 --- a/backend/scripts/reset_indexes.py +++ b/backend/scripts/reset_indexes.py @@ -1,12 +1,13 @@ # This file is purely for development use, not included in any builds +from qdrant_client.http.models import Distance +from qdrant_client.http.models import VectorParams +from typesense.exceptions import ObjectNotFound # type: ignore + from danswer.configs.model_configs import DOC_EMBEDDING_DIM from danswer.datastores.typesense.store import create_typesense_collection from danswer.utils.clients import get_qdrant_client from danswer.utils.clients import get_typesense_client from danswer.utils.logger import setup_logger -from qdrant_client.http.models import Distance -from qdrant_client.http.models import VectorParams -from typesense.exceptions import ObjectNotFound # type: ignore logger = setup_logger() diff --git a/backend/scripts/reset_postgres.py b/backend/scripts/reset_postgres.py index 0b4bc9a1e..ace7cd128 100644 --- a/backend/scripts/reset_postgres.py +++ b/backend/scripts/reset_postgres.py @@ -1,4 +1,5 @@ import psycopg2 + from danswer.configs.app_configs import POSTGRES_DB from danswer.configs.app_configs import POSTGRES_HOST from danswer.configs.app_configs import POSTGRES_PASSWORD diff --git a/backend/scripts/save_load_state.py b/backend/scripts/save_load_state.py index f56845fb7..9848dd98f 100644 --- a/backend/scripts/save_load_state.py +++ b/backend/scripts/save_load_state.py @@ -6,6 +6,9 @@ import subprocess from datetime import datetime import requests +from qdrant_client.http.models.models import SnapshotDescription +from typesense.exceptions import ObjectNotFound # type: ignore + from alembic import command from alembic.config import Config from danswer.configs.app_configs import POSTGRES_DB @@ -23,8 +26,6 @@ from danswer.datastores.typesense.store import create_typesense_collection from danswer.utils.clients import get_qdrant_client from danswer.utils.clients import get_typesense_client from danswer.utils.logger import setup_logger -from qdrant_client.http.models.models import SnapshotDescription -from typesense.exceptions import ObjectNotFound # type: ignore logger = setup_logger() diff --git a/backend/scripts/simulate_frontend.py b/backend/scripts/simulate_frontend.py index 1377b52a0..8ccca2ba3 100644 --- a/backend/scripts/simulate_frontend.py +++ b/backend/scripts/simulate_frontend.py @@ -5,6 +5,7 @@ import urllib from pprint import pprint import requests + from danswer.configs.app_configs import APP_PORT from danswer.configs.app_configs import QDRANT_DEFAULT_COLLECTION from danswer.configs.constants import SOURCE_TYPE