Add Github Action to run mypy / reorder-python-imports / black on all PRs ()

Also fixes import ordering (previously, local imports weren't grouped together as they should have been)
This commit is contained in:
Chris Weaver 2023-07-29 16:53:38 -07:00 committed by GitHub
parent 87fe6f7575
commit 132a9f750d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
51 changed files with 265 additions and 143 deletions

41
.github/workflows/pr-python-checks.yml vendored Normal file

@ -0,0 +1,41 @@
name: Python Checks
on:
pull_request:
branches: [ main ]
jobs:
mypy-check:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
cache: 'pip'
cache-dependency-path: |
backend/requirements/default.txt
backend/requirements/dev.txt
- run: |
python -m pip install --upgrade pip
pip install -r backend/requirements/default.txt
pip install -r backend/requirements/dev.txt
- name: Run MyPy
run: |
cd backend
mypy .
- name: Check import order with reorder-python-imports
run: |
cd backend
find ./danswer -name "*.py" | xargs reorder-python-imports --py311-plus
- name: Check code formatting with Black
run: |
cd backend
black --check .

@ -1,12 +1,44 @@
repos:
- repo: https://github.com/psf/black
- repo: https://github.com/psf/black
rev: 23.3.0
hooks:
- id: black
language_version: python3.11
- repo: https://github.com/asottile/reorder_python_imports
- repo: https://github.com/asottile/reorder_python_imports
rev: v3.9.0
hooks:
- id: reorder-python-imports
args: ['--py311-plus']
args: ['--py311-plus', '--application-directories=backend/']
# need to ignore alembic files, since reorder-python-imports gets confused
# and thinks that alembic is a local package since there is a folder
# in the backend directory called `alembic`
exclude: ^backend/alembic/
# We would like to have a mypy pre-commit hook, but due to the fact that
# pre-commit runs in it's own isolated environment, we would need to install
# and keep in sync all dependencies so mypy has access to the appropriate type
# stubs. This does not seem worth it at the moment, so for now we will stick to
# having mypy run via Github Actions / manually by contributors
# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.1.1
# hooks:
# - id: mypy
# exclude: ^tests/
# # below are needed for type stubs since pre-commit runs in it's own
# # isolated environment. Unfortunately, this needs to be kept in sync
# # with requirements/dev.txt + requirements/default.txt
# additional_dependencies: [
# alembic==1.10.4,
# types-beautifulsoup4==4.12.0.3,
# types-html5lib==1.1.11.13,
# types-oauthlib==3.2.0.9,
# types-psycopg2==2.9.21.10,
# types-python-dateutil==2.8.19.13,
# types-regex==2023.3.23.1,
# types-requests==2.28.11.17,
# types-retry==0.9.9.3,
# types-urllib3==1.26.25.11
# ]
# # TODO: add back once errors are addressed
# # args: [--strict]

@ -10,23 +10,15 @@ import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '465f78d9b7f9'
down_revision = '3c5e35aa9af0'
revision = "465f78d9b7f9"
down_revision = "3c5e35aa9af0"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.alter_column(
"oauth_account",
"access_token",
type_=sa.Text()
)
op.alter_column("oauth_account", "access_token", type_=sa.Text())
def downgrade() -> None:
op.alter_column(
"oauth_account",
"access_token",
type_=sa.String(length=1024)
)
op.alter_column("oauth_account", "access_token", type_=sa.String(length=1024))

@ -2,6 +2,8 @@ import time
from datetime import datetime
from datetime import timezone
from sqlalchemy.orm import Session
from danswer.connectors.factory import instantiate_connector
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
@ -25,7 +27,6 @@ from danswer.db.models import Connector
from danswer.db.models import IndexAttempt
from danswer.db.models import IndexingStatus
from danswer.utils.logger import setup_logger
from sqlalchemy.orm import Session
logger = setup_logger()

@ -47,8 +47,12 @@ VALID_EMAIL_DOMAIN = os.environ.get("VALID_EMAIL_DOMAIN", "")
# OAuth Login Flow
ENABLE_OAUTH = os.environ.get("ENABLE_OAUTH", "").lower() != "false"
OAUTH_TYPE = os.environ.get("OAUTH_TYPE", "google").lower()
OAUTH_CLIENT_ID = os.environ.get("OAUTH_CLIENT_ID", os.environ.get("GOOGLE_OAUTH_CLIENT_ID", ""))
OAUTH_CLIENT_SECRET = os.environ.get("OAUTH_CLIENT_SECRET", os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET", ""))
OAUTH_CLIENT_ID = os.environ.get(
"OAUTH_CLIENT_ID", os.environ.get("GOOGLE_OAUTH_CLIENT_ID", "")
)
OAUTH_CLIENT_SECRET = os.environ.get(
"OAUTH_CLIENT_SECRET", os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET", "")
)
OPENID_CONFIG_URL = os.environ.get("OPENID_CONFIG_URL", "")
MASK_CREDENTIAL_PREFIX = (
os.environ.get("MASK_CREDENTIAL_PREFIX", "True").lower() != "false"

@ -6,6 +6,7 @@ from typing import Any
from urllib.parse import urlparse
from atlassian import Confluence # type:ignore
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput

@ -3,6 +3,9 @@ from datetime import timezone
from typing import Any
from urllib.parse import urlparse
from jira import JIRA
from jira.resources import Issue
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
@ -13,8 +16,6 @@ from danswer.connectors.models import ConnectorMissingCredentialError
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.utils.logger import setup_logger
from jira import JIRA
from jira.resources import Issue
logger = setup_logger()

@ -8,13 +8,13 @@ from danswer.connectors.danswer_jira.connector import JiraConnector
from danswer.connectors.file.connector import LocalFileConnector
from danswer.connectors.github.connector import GithubConnector
from danswer.connectors.google_drive.connector import GoogleDriveConnector
from danswer.connectors.notion.connector import NotionConnector
from danswer.connectors.guru.connector import GuruConnector
from danswer.connectors.interfaces import BaseConnector
from danswer.connectors.interfaces import EventConnector
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.models import InputType
from danswer.connectors.notion.connector import NotionConnector
from danswer.connectors.productboard.connector import ProductboardConnector
from danswer.connectors.slab.connector import SlabConnector
from danswer.connectors.slack.connector import SlackLoadConnector

@ -2,6 +2,10 @@ import itertools
from collections.abc import Generator
from typing import Any
from github import Github
from github.PaginatedList import PaginatedList
from github.PullRequest import PullRequest
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
@ -10,9 +14,6 @@ from danswer.connectors.models import ConnectorMissingCredentialError
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.utils.logger import setup_logger
from github import Github
from github.PaginatedList import PaginatedList
from github.PullRequest import PullRequest
logger = setup_logger()

@ -7,6 +7,10 @@ from itertools import chain
from typing import Any
import docx2txt # type:ignore
from google.oauth2.credentials import Credentials # type: ignore
from googleapiclient import discovery # type: ignore
from PyPDF2 import PdfReader
from danswer.configs.app_configs import GOOGLE_DRIVE_INCLUDE_SHARED
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
@ -20,9 +24,6 @@ from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.connectors.utils import batch_generator
from danswer.utils.logger import setup_logger
from google.oauth2.credentials import Credentials # type: ignore
from googleapiclient import discovery # type: ignore
from PyPDF2 import PdfReader
logger = setup_logger()

@ -4,16 +4,17 @@ from urllib.parse import parse_qs
from urllib.parse import ParseResult
from urllib.parse import urlparse
from google.auth.transport.requests import Request # type: ignore
from google.oauth2.credentials import Credentials # type: ignore
from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore
from sqlalchemy.orm import Session
from danswer.configs.app_configs import WEB_DOMAIN
from danswer.db.credentials import update_credential_json
from danswer.db.models import User
from danswer.dynamic_configs import get_dynamic_config_store
from danswer.server.models import GoogleAppCredentials
from danswer.utils.logger import setup_logger
from google.auth.transport.requests import Request # type: ignore
from google.oauth2.credentials import Credentials # type: ignore
from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore
from sqlalchemy.orm import Session
logger = setup_logger()

@ -4,6 +4,7 @@ from datetime import timezone
from typing import Any
import requests
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput

@ -2,9 +2,10 @@ from dataclasses import dataclass
from enum import Enum
from typing import Any
from danswer.configs.constants import DocumentSource
from pydantic import BaseModel
from danswer.configs.constants import DocumentSource
class ConnectorMissingCredentialError(PermissionError):
def __init__(self, connector_name: str) -> None:

@ -8,6 +8,7 @@ from typing import List
from typing import Optional
import requests
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput

@ -5,6 +5,9 @@ from typing import cast
import requests
from bs4 import BeautifulSoup
from dateutil import parser
from retry import retry
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
@ -13,8 +16,6 @@ from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.utils.logger import setup_logger
from dateutil import parser
from retry import retry
logger = setup_logger()

@ -7,6 +7,8 @@ from typing import Any
from urllib.parse import urljoin
import requests
from dateutil import parser
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
@ -17,7 +19,6 @@ from danswer.connectors.models import ConnectorMissingCredentialError
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.utils.logger import setup_logger
from dateutil import parser
# Fairly generous retry because it's not understood why occasionally GraphQL requests fail even with timeout > 1 min
SLAB_GRAPHQL_MAX_TRIES = 10

@ -6,6 +6,9 @@ from pathlib import Path
from typing import Any
from typing import cast
from slack_sdk import WebClient
from slack_sdk.web import SlackResponse
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.interfaces import GenerateDocumentsOutput
@ -20,8 +23,6 @@ from danswer.connectors.slack.utils import make_slack_api_call_paginated
from danswer.connectors.slack.utils import make_slack_api_rate_limited
from danswer.connectors.slack.utils import UserIdReplacer
from danswer.utils.logger import setup_logger
from slack_sdk import WebClient
from slack_sdk.web import SlackResponse
logger = setup_logger()
@ -48,7 +49,9 @@ def get_channel_info(client: WebClient, channel_id: str) -> ChannelType:
def get_channels(client: WebClient, exclude_archived: bool = True) -> list[ChannelType]:
"""Get all channels in the workspace"""
channels: list[dict[str, Any]] = []
for result in _make_slack_api_call(client.conversations_list, exclude_archived=exclude_archived):
for result in _make_slack_api_call(
client.conversations_list, exclude_archived=exclude_archived
):
channels.extend(result["channels"])
return channels

@ -4,11 +4,12 @@ from collections.abc import Callable
from typing import Any
from typing import cast
from danswer.utils.logger import setup_logger
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from slack_sdk.web import SlackResponse
from danswer.utils.logger import setup_logger
logger = setup_logger()
# number of messages we request per page when fetching paginated slack messages

@ -10,6 +10,13 @@ from urllib.parse import urlparse
import bs4
import requests
from bs4 import BeautifulSoup
from oauthlib.oauth2 import BackendApplicationClient
from playwright.sync_api import BrowserContext
from playwright.sync_api import Playwright
from playwright.sync_api import sync_playwright
from PyPDF2 import PdfReader
from requests_oauthlib import OAuth2Session # type:ignore
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_CLASSES
from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_ELEMENTS
@ -22,12 +29,6 @@ from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.utils.logger import setup_logger
from oauthlib.oauth2 import BackendApplicationClient
from playwright.sync_api import BrowserContext
from playwright.sync_api import Playwright
from playwright.sync_api import sync_playwright
from PyPDF2 import PdfReader
from requests_oauthlib import OAuth2Session # type:ignore
logger = setup_logger()

@ -2,6 +2,15 @@ import json
from functools import partial
from uuid import UUID
from qdrant_client import QdrantClient
from qdrant_client.http import models
from qdrant_client.http.exceptions import ResponseHandlingException
from qdrant_client.http.models.models import UpdateResult
from qdrant_client.models import CollectionsResponse
from qdrant_client.models import Distance
from qdrant_client.models import PointStruct
from qdrant_client.models import VectorParams
from danswer.chunking.models import EmbeddedIndexChunk
from danswer.configs.constants import ALLOWED_GROUPS
from danswer.configs.constants import ALLOWED_USERS
@ -21,14 +30,6 @@ from danswer.datastores.datastore_utils import get_uuid_from_chunk
from danswer.datastores.datastore_utils import update_doc_user_map
from danswer.utils.clients import get_qdrant_client
from danswer.utils.logger import setup_logger
from qdrant_client import QdrantClient
from qdrant_client.http import models
from qdrant_client.http.exceptions import ResponseHandlingException
from qdrant_client.http.models.models import UpdateResult
from qdrant_client.models import CollectionsResponse
from qdrant_client.models import Distance
from qdrant_client.models import PointStruct
from qdrant_client.models import VectorParams
logger = setup_logger()

@ -1,5 +1,12 @@
from uuid import UUID
from qdrant_client.http.exceptions import ResponseHandlingException
from qdrant_client.http.exceptions import UnexpectedResponse
from qdrant_client.http.models import FieldCondition
from qdrant_client.http.models import Filter
from qdrant_client.http.models import MatchAny
from qdrant_client.http.models import MatchValue
from danswer.chunking.models import EmbeddedIndexChunk
from danswer.chunking.models import InferenceChunk
from danswer.configs.app_configs import NUM_RETURNED_HITS
@ -15,12 +22,6 @@ from danswer.search.search_utils import get_default_embedding_model
from danswer.utils.clients import get_qdrant_client
from danswer.utils.logger import setup_logger
from danswer.utils.timing import log_function_time
from qdrant_client.http.exceptions import ResponseHandlingException
from qdrant_client.http.exceptions import UnexpectedResponse
from qdrant_client.http.models import FieldCondition
from qdrant_client.http.models import Filter
from qdrant_client.http.models import MatchAny
from qdrant_client.http.models import MatchValue
logger = setup_logger()

@ -4,6 +4,8 @@ from typing import Any
from uuid import UUID
import typesense # type: ignore
from typesense.exceptions import ObjectNotFound # type: ignore
from danswer.chunking.models import EmbeddedIndexChunk
from danswer.chunking.models import IndexChunk
from danswer.chunking.models import InferenceChunk
@ -27,7 +29,6 @@ from danswer.datastores.interfaces import IndexFilter
from danswer.datastores.interfaces import KeywordIndex
from danswer.utils.clients import get_typesense_client
from danswer.utils.logger import setup_logger
from typesense.exceptions import ObjectNotFound # type: ignore
logger = setup_logger()

@ -2,12 +2,6 @@ from collections.abc import AsyncGenerator
from typing import Any
from typing import Dict
from danswer.auth.schemas import UserRole
from danswer.db.engine import get_async_session
from danswer.db.engine import get_sqlalchemy_async_engine
from danswer.db.models import AccessToken
from danswer.db.models import OAuthAccount
from danswer.db.models import User
from fastapi import Depends
from fastapi_users.db import SQLAlchemyUserDatabase
from fastapi_users.models import UP
@ -16,6 +10,13 @@ from sqlalchemy import func
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from danswer.auth.schemas import UserRole
from danswer.db.engine import get_async_session
from danswer.db.engine import get_sqlalchemy_async_engine
from danswer.db.models import AccessToken
from danswer.db.models import OAuthAccount
from danswer.db.models import User
async def get_user_count() -> int:
async with AsyncSession(get_sqlalchemy_async_engine()) as asession:

@ -1,5 +1,12 @@
from typing import cast
from fastapi import HTTPException
from sqlalchemy import and_
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Session
from danswer.configs.constants import DocumentSource
from danswer.connectors.models import InputType
from danswer.db.models import Connector
@ -8,12 +15,6 @@ from danswer.server.models import ConnectorBase
from danswer.server.models import ObjectCreationIdResponse
from danswer.server.models import StatusResponse
from danswer.utils.logger import setup_logger
from fastapi import HTTPException
from sqlalchemy import and_
from sqlalchemy import func
from sqlalchemy import select
from sqlalchemy.orm import aliased
from sqlalchemy.orm import Session
logger = setup_logger()

@ -1,5 +1,9 @@
from datetime import datetime
from fastapi import HTTPException
from sqlalchemy import select
from sqlalchemy.orm import Session
from danswer.db.connector import fetch_connector_by_id
from danswer.db.credentials import fetch_credential_by_id
from danswer.db.models import ConnectorCredentialPair
@ -7,9 +11,6 @@ from danswer.db.models import IndexingStatus
from danswer.db.models import User
from danswer.server.models import StatusResponse
from danswer.utils.logger import setup_logger
from fastapi import HTTPException
from sqlalchemy import select
from sqlalchemy.orm import Session
logger = setup_logger()

@ -1,14 +1,15 @@
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session
from sqlalchemy.sql.expression import or_
from danswer.db.engine import get_sqlalchemy_engine
from danswer.db.models import Credential
from danswer.db.models import User
from danswer.server.models import CredentialBase
from danswer.server.models import ObjectCreationIdResponse
from danswer.utils.logger import setup_logger
from sqlalchemy import select
from sqlalchemy.orm import Session
from sqlalchemy.sql.expression import or_
logger = setup_logger()

@ -3,12 +3,6 @@ from collections.abc import Generator
from datetime import datetime
from datetime import timezone
from danswer.configs.app_configs import POSTGRES_DB
from danswer.configs.app_configs import POSTGRES_HOST
from danswer.configs.app_configs import POSTGRES_PASSWORD
from danswer.configs.app_configs import POSTGRES_PORT
from danswer.configs.app_configs import POSTGRES_USER
from danswer.utils.logger import setup_logger
from sqlalchemy import text
from sqlalchemy.engine import create_engine
from sqlalchemy.engine import Engine
@ -17,6 +11,13 @@ from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import Session
from danswer.configs.app_configs import POSTGRES_DB
from danswer.configs.app_configs import POSTGRES_HOST
from danswer.configs.app_configs import POSTGRES_PASSWORD
from danswer.configs.app_configs import POSTGRES_PORT
from danswer.configs.app_configs import POSTGRES_USER
from danswer.utils.logger import setup_logger
logger = setup_logger()
SYNC_DB_API = "psycopg2"

@ -1,10 +1,11 @@
from danswer.db.models import IndexAttempt
from danswer.db.models import IndexingStatus
from danswer.utils.logger import setup_logger
from sqlalchemy import desc
from sqlalchemy import select
from sqlalchemy.orm import Session
from danswer.db.models import IndexAttempt
from danswer.db.models import IndexingStatus
from danswer.utils.logger import setup_logger
logger = setup_logger()

@ -4,9 +4,6 @@ from typing import Any
from typing import List
from uuid import UUID
from danswer.auth.schemas import UserRole
from danswer.configs.constants import DocumentSource
from danswer.connectors.models import InputType
from fastapi_users.db import SQLAlchemyBaseOAuthAccountTableUUID
from fastapi_users.db import SQLAlchemyBaseUserTableUUID
from fastapi_users_db_sqlalchemy.access_token import SQLAlchemyBaseAccessTokenTableUUID
@ -24,6 +21,10 @@ from sqlalchemy.orm import Mapped
from sqlalchemy.orm import mapped_column
from sqlalchemy.orm import relationship
from danswer.auth.schemas import UserRole
from danswer.configs.constants import DocumentSource
from danswer.connectors.models import InputType
class IndexingStatus(str, PyEnum):
NOT_STARTED = "not_started"

@ -1,13 +1,14 @@
from typing import Any
from openai.error import AuthenticationError
from openai.error import Timeout
from danswer.configs.app_configs import QA_TIMEOUT
from danswer.configs.model_configs import INTERNAL_MODEL_VERSION
from danswer.direct_qa.exceptions import UnknownModelError
from danswer.direct_qa.interfaces import QAModel
from danswer.direct_qa.open_ai import OpenAIChatCompletionQA
from danswer.direct_qa.open_ai import OpenAICompletionQA
from openai.error import AuthenticationError
from openai.error import Timeout
# Imports commented out temporarily due to incompatibility of gpt4all with M1 Mac hardware currently
# from danswer.direct_qa.gpt_4_all import GPT4AllChatCompletionQA

@ -1,6 +1,8 @@
from collections.abc import Generator
from typing import Any
from gpt4all import GPT4All # type:ignore
from danswer.chunking.models import InferenceChunk
from danswer.configs.model_configs import GEN_AI_MAX_OUTPUT_TOKENS
from danswer.configs.model_configs import GEN_AI_MODEL_VERSION
@ -15,7 +17,6 @@ from danswer.direct_qa.qa_utils import process_answer
from danswer.direct_qa.qa_utils import process_model_tokens
from danswer.utils.logger import setup_logger
from danswer.utils.timing import log_function_time
from gpt4all import GPT4All # type:ignore
logger = setup_logger()

@ -10,6 +10,9 @@ from typing import TypeVar
import openai
import tiktoken
from openai.error import AuthenticationError
from openai.error import Timeout
from danswer.chunking.models import InferenceChunk
from danswer.configs.app_configs import INCLUDE_METADATA
from danswer.configs.app_configs import OPENAI_API_KEY
@ -31,8 +34,6 @@ from danswer.dynamic_configs import get_dynamic_config_store
from danswer.dynamic_configs.interface import ConfigNotFoundError
from danswer.utils.logger import setup_logger
from danswer.utils.timing import log_function_time
from openai.error import AuthenticationError
from openai.error import Timeout
logger = setup_logger()

@ -7,6 +7,7 @@ from typing import Optional
from typing import Tuple
import regex
from danswer.chunking.models import InferenceChunk
from danswer.configs.app_configs import QUOTE_ALLOWED_ERROR_PERCENT
from danswer.configs.constants import BLURB

@ -3,10 +3,11 @@ import os
from pathlib import Path
from typing import cast
from filelock import FileLock
from danswer.dynamic_configs.interface import ConfigNotFoundError
from danswer.dynamic_configs.interface import DynamicConfigStore
from danswer.dynamic_configs.interface import JSON_ro
from filelock import FileLock
FILE_LOCK_TIMEOUT = 10

@ -1,5 +1,11 @@
import os
from retry import retry
from slack_sdk import WebClient
from slack_sdk.socket_mode import SocketModeClient
from slack_sdk.socket_mode.request import SocketModeRequest
from slack_sdk.socket_mode.response import SocketModeResponse
from danswer.configs.app_configs import DANSWER_BOT_NUM_DOCS_TO_DISPLAY
from danswer.configs.app_configs import DANSWER_BOT_NUM_RETRIES
from danswer.configs.app_configs import QDRANT_DEFAULT_COLLECTION
@ -10,11 +16,6 @@ from danswer.server.models import QAResponse
from danswer.server.models import QuestionRequest
from danswer.server.models import SearchDoc
from danswer.utils.logger import setup_logger
from retry import retry
from slack_sdk import WebClient
from slack_sdk.socket_mode import SocketModeClient
from slack_sdk.socket_mode.request import SocketModeRequest
from slack_sdk.socket_mode.response import SocketModeResponse
logger = setup_logger()

@ -1,18 +1,26 @@
import nltk # type:ignore
import uvicorn
from fastapi import FastAPI
from fastapi import Request
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from danswer.auth.schemas import UserCreate
from danswer.auth.schemas import UserRead
from danswer.auth.schemas import UserUpdate
from danswer.auth.users import auth_backend
from danswer.auth.users import fastapi_users
from danswer.auth.users import oauth_client
from danswer.configs.app_configs import APP_HOST, OAUTH_TYPE, OPENID_CONFIG_URL
from danswer.configs.app_configs import APP_HOST
from danswer.configs.app_configs import APP_PORT
from danswer.configs.app_configs import DISABLE_AUTH
from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
from danswer.configs.app_configs import ENABLE_OAUTH
from danswer.configs.app_configs import OAUTH_CLIENT_ID
from danswer.configs.app_configs import OAUTH_CLIENT_SECRET
from danswer.configs.app_configs import OAUTH_TYPE
from danswer.configs.app_configs import OPENID_CONFIG_URL
from danswer.configs.app_configs import QDRANT_DEFAULT_COLLECTION
from danswer.configs.app_configs import SECRET
from danswer.configs.app_configs import TYPESENSE_DEFAULT_COLLECTION
@ -29,11 +37,6 @@ from danswer.server.health import router as health_router
from danswer.server.manage import router as admin_router
from danswer.server.search_backend import router as backend_router
from danswer.utils.logger import setup_logger
from fastapi import FastAPI
from fastapi import Request
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
logger = setup_logger()
@ -50,7 +53,7 @@ def validation_exception_handler(
def value_error_handler(_: Request, exc: ValueError) -> JSONResponse:
try:
raise(exc)
raise (exc)
except:
# log stacktrace
logger.exception("ValueError")
@ -93,6 +96,9 @@ def get_application() -> FastAPI:
tags=["users"],
)
if ENABLE_OAUTH:
if oauth_client is None:
raise RuntimeError("OAuth is enabled but no OAuth client is configured")
if OAUTH_TYPE == "google":
# special case for google
application.include_router(
@ -124,9 +130,7 @@ def get_application() -> FastAPI:
tags=["auth"],
)
application.include_router(
fastapi_users.get_oauth_associate_router(
oauth_client, UserRead, SECRET
),
fastapi_users.get_oauth_associate_router(oauth_client, UserRead, SECRET),
prefix="/auth/associate/oauth",
tags=["auth"],
)
@ -161,7 +165,9 @@ def get_application() -> FastAPI:
if not OAUTH_CLIENT_ID:
logger.warning("OAuth is turned on but OAUTH_CLIENT_ID is empty")
if not OAUTH_CLIENT_SECRET:
logger.warning("OAuth is turned on but OAUTH_CLIENT_SECRET is empty")
logger.warning(
"OAuth is turned on but OAUTH_CLIENT_SECRET is empty"
)
if OAUTH_TYPE == "openid" and not OPENID_CONFIG_URL:
logger.warning("OpenID is turned on but OPENID_CONFIG_URL is emtpy")
else:

@ -1,5 +1,7 @@
import numpy as np
import tensorflow as tf # type:ignore
from transformers import AutoTokenizer # type:ignore
from danswer.search.keyword_search import remove_stop_words
from danswer.search.models import QueryFlow
from danswer.search.models import SearchType
@ -9,7 +11,6 @@ from danswer.search.search_utils import get_default_tokenizer
from danswer.server.models import HelperResponse
from danswer.utils.logger import setup_logger
from danswer.utils.timing import log_function_time
from transformers import AutoTokenizer # type:ignore
logger = setup_logger()

@ -1,15 +1,16 @@
import json
from uuid import UUID
from nltk.corpus import stopwords # type:ignore
from nltk.stem import WordNetLemmatizer # type:ignore
from nltk.tokenize import word_tokenize # type:ignore
from danswer.chunking.models import InferenceChunk
from danswer.configs.app_configs import NUM_RETURNED_HITS
from danswer.datastores.interfaces import IndexFilter
from danswer.datastores.interfaces import KeywordIndex
from danswer.utils.logger import setup_logger
from danswer.utils.timing import log_function_time
from nltk.corpus import stopwords # type:ignore
from nltk.stem import WordNetLemmatizer # type:ignore
from nltk.tokenize import word_tokenize # type:ignore
logger = setup_logger()

@ -1,13 +1,14 @@
from sentence_transformers import CrossEncoder # type: ignore
from sentence_transformers import SentenceTransformer # type: ignore
from transformers import AutoTokenizer # type: ignore
from transformers import TFDistilBertForSequenceClassification # type: ignore
from danswer.configs.model_configs import CROSS_EMBED_CONTEXT_SIZE
from danswer.configs.model_configs import CROSS_ENCODER_MODEL_ENSEMBLE
from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
from danswer.configs.model_configs import DOCUMENT_ENCODER_MODEL
from danswer.configs.model_configs import INTENT_MODEL_VERSION
from danswer.configs.model_configs import QUERY_MAX_CONTEXT_SIZE
from sentence_transformers import CrossEncoder # type: ignore
from sentence_transformers import SentenceTransformer # type: ignore
from transformers import AutoTokenizer # type: ignore
from transformers import TFDistilBertForSequenceClassification # type: ignore
_TOKENIZER: None | AutoTokenizer = None

@ -2,6 +2,8 @@ import json
from uuid import UUID
import numpy
from sentence_transformers import SentenceTransformer # type: ignore
from danswer.chunking.models import EmbeddedIndexChunk
from danswer.chunking.models import IndexChunk
from danswer.chunking.models import InferenceChunk
@ -18,7 +20,6 @@ from danswer.search.search_utils import get_default_reranking_model_ensemble
from danswer.server.models import SearchDoc
from danswer.utils.logger import setup_logger
from danswer.utils.timing import log_function_time
from sentence_transformers import SentenceTransformer # type: ignore
logger = setup_logger()

@ -1,10 +1,11 @@
from typing import Any
from danswer.utils.logger import setup_logger
from fastapi import APIRouter
from pydantic import BaseModel
from pydantic import Extra
from danswer.utils.logger import setup_logger
router = APIRouter()
logger = setup_logger()

@ -1,6 +1,7 @@
from danswer.server.models import StatusResponse
from fastapi import APIRouter
from danswer.server.models import StatusResponse
router = APIRouter()

@ -2,6 +2,16 @@ from datetime import datetime
from datetime import timedelta
from typing import cast
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Request
from fastapi import Response
from fastapi import UploadFile
from fastapi_users.db import SQLAlchemyUserDatabase
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session
from danswer.auth.schemas import UserRole
from danswer.auth.users import current_admin_user
from danswer.auth.users import current_user
@ -61,15 +71,6 @@ from danswer.server.models import StatusResponse
from danswer.server.models import UserByEmail
from danswer.server.models import UserRoleResponse
from danswer.utils.logger import setup_logger
from fastapi import APIRouter
from fastapi import Depends
from fastapi import HTTPException
from fastapi import Request
from fastapi import Response
from fastapi import UploadFile
from fastapi_users.db import SQLAlchemyUserDatabase
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import Session
router = APIRouter(prefix="/manage")

@ -6,6 +6,9 @@ from typing import Optional
from typing import TypeVar
from uuid import UUID
from pydantic import BaseModel
from pydantic.generics import GenericModel
from danswer.configs.constants import DocumentSource
from danswer.connectors.models import InputType
from danswer.datastores.interfaces import IndexFilter
@ -13,8 +16,6 @@ from danswer.db.models import Connector
from danswer.db.models import IndexingStatus
from danswer.search.models import QueryFlow
from danswer.search.models import SearchType
from pydantic import BaseModel
from pydantic.generics import GenericModel
DataT = TypeVar("DataT")

@ -1,6 +1,10 @@
import json
from collections.abc import Generator
from fastapi import APIRouter
from fastapi import Depends
from fastapi.responses import StreamingResponse
from danswer.auth.users import current_user
from danswer.chunking.models import InferenceChunk
from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
@ -25,9 +29,6 @@ from danswer.server.models import QuestionRequest
from danswer.server.models import SearchResponse
from danswer.utils.logger import setup_logger
from danswer.utils.timing import log_generator_function_time
from fastapi import APIRouter
from fastapi import Depends
from fastapi.responses import StreamingResponse
logger = setup_logger()

@ -1,4 +1,6 @@
import typesense # type: ignore
from qdrant_client import QdrantClient
from danswer.configs.app_configs import QDRANT_API_KEY
from danswer.configs.app_configs import QDRANT_HOST
from danswer.configs.app_configs import QDRANT_PORT
@ -6,7 +8,6 @@ from danswer.configs.app_configs import QDRANT_URL
from danswer.configs.app_configs import TYPESENSE_API_KEY
from danswer.configs.app_configs import TYPESENSE_HOST
from danswer.configs.app_configs import TYPESENSE_PORT
from qdrant_client import QdrantClient
_qdrant_client: QdrantClient | None = None

@ -1,4 +1,5 @@
from bs4 import BeautifulSoup
from danswer.configs.constants import HTML_SEPARATOR

@ -1,12 +1,13 @@
# This file is purely for development use, not included in any builds
from qdrant_client.http.models import Distance
from qdrant_client.http.models import VectorParams
from typesense.exceptions import ObjectNotFound # type: ignore
from danswer.configs.model_configs import DOC_EMBEDDING_DIM
from danswer.datastores.typesense.store import create_typesense_collection
from danswer.utils.clients import get_qdrant_client
from danswer.utils.clients import get_typesense_client
from danswer.utils.logger import setup_logger
from qdrant_client.http.models import Distance
from qdrant_client.http.models import VectorParams
from typesense.exceptions import ObjectNotFound # type: ignore
logger = setup_logger()

@ -1,4 +1,5 @@
import psycopg2
from danswer.configs.app_configs import POSTGRES_DB
from danswer.configs.app_configs import POSTGRES_HOST
from danswer.configs.app_configs import POSTGRES_PASSWORD

@ -6,6 +6,9 @@ import subprocess
from datetime import datetime
import requests
from qdrant_client.http.models.models import SnapshotDescription
from typesense.exceptions import ObjectNotFound # type: ignore
from alembic import command
from alembic.config import Config
from danswer.configs.app_configs import POSTGRES_DB
@ -23,8 +26,6 @@ from danswer.datastores.typesense.store import create_typesense_collection
from danswer.utils.clients import get_qdrant_client
from danswer.utils.clients import get_typesense_client
from danswer.utils.logger import setup_logger
from qdrant_client.http.models.models import SnapshotDescription
from typesense.exceptions import ObjectNotFound # type: ignore
logger = setup_logger()

@ -5,6 +5,7 @@ import urllib
from pprint import pprint
import requests
from danswer.configs.app_configs import APP_PORT
from danswer.configs.app_configs import QDRANT_DEFAULT_COLLECTION
from danswer.configs.constants import SOURCE_TYPE