mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-08 03:48:14 +02:00
Better Logging (#184)
This commit is contained in:
parent
3436b864a3
commit
4b699fdab3
@ -125,7 +125,7 @@ The first time running Danswer, you will need to run the migrations. Navigate to
|
||||
alembic upgrade head
|
||||
```
|
||||
|
||||
To run the backend api server, navigate to `danswer/backend` and run:
|
||||
To run the backend API server, navigate to `danswer/backend` and run:
|
||||
```bash
|
||||
DISABLE_AUTH=True TYPESENSE_API_KEY=local_dev_typesense DYNAMIC_CONFIG_DIR_PATH=./dynamic_config_storage uvicorn danswer.main:app --reload --port 8080
|
||||
```
|
||||
@ -135,6 +135,7 @@ To run the background job to check for connector updates and index documents, na
|
||||
PYTHONPATH=. TYPESENSE_API_KEY=local_dev_typesense DYNAMIC_CONFIG_DIR_PATH=./dynamic_config_storage python danswer/background/update.py
|
||||
```
|
||||
|
||||
Note: if you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services.
|
||||
|
||||
### Formatting and Linting
|
||||
#### Backend
|
||||
|
@ -27,7 +27,7 @@ from danswer.db.auth import get_user_db
|
||||
from danswer.db.engine import get_async_session
|
||||
from danswer.db.models import AccessToken
|
||||
from danswer.db.models import User
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from fastapi import Depends
|
||||
from fastapi import HTTPException
|
||||
from fastapi import Request
|
||||
|
@ -22,7 +22,7 @@ from danswer.db.models import Connector
|
||||
from danswer.db.models import IndexAttempt
|
||||
from danswer.db.models import IndexingStatus
|
||||
from danswer.utils.indexing_pipeline import build_indexing_pipeline
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
logger = setup_logger()
|
||||
|
@ -2,7 +2,7 @@ import time
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
@ -56,4 +56,6 @@ class InferenceChunk(BaseChunk):
|
||||
}
|
||||
if METADATA in init_kwargs:
|
||||
init_kwargs[METADATA] = json.loads(init_kwargs[METADATA])
|
||||
else:
|
||||
init_kwargs[METADATA] = {}
|
||||
return cls(**init_kwargs)
|
||||
|
@ -138,6 +138,8 @@ DYNAMIC_CONFIG_STORE = os.environ.get(
|
||||
"DYNAMIC_CONFIG_STORE", "FileSystemBackedDynamicConfigStore"
|
||||
)
|
||||
DYNAMIC_CONFIG_DIR_PATH = os.environ.get("DYNAMIC_CONFIG_DIR_PATH", "/home/storage")
|
||||
# notset, debug, info, warning, error, or critical
|
||||
LOG_LEVEL = os.environ.get("LOG_LEVEL", "info")
|
||||
|
||||
|
||||
#####
|
||||
|
@ -11,7 +11,7 @@ from danswer.connectors.interfaces import PollConnector
|
||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from jira import JIRA
|
||||
from jira.resources import Issue
|
||||
|
||||
|
@ -15,7 +15,7 @@ from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
@ -8,7 +8,7 @@ from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from github import Github
|
||||
from github.PaginatedList import PaginatedList
|
||||
from github.PullRequest import PullRequest
|
||||
|
@ -14,7 +14,7 @@ from danswer.connectors.interfaces import PollConnector
|
||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from google.auth.transport.requests import Request # type: ignore
|
||||
from google.oauth2.credentials import Credentials # type: ignore
|
||||
from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore
|
||||
|
@ -9,7 +9,7 @@ from danswer.db.credentials import update_credential_json
|
||||
from danswer.db.models import User
|
||||
from danswer.dynamic_configs import get_dynamic_config_store
|
||||
from danswer.server.models import GoogleAppCredentials
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from google.auth.transport.requests import Request # type: ignore
|
||||
from google.oauth2.credentials import Credentials # type: ignore
|
||||
from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore
|
||||
|
@ -15,7 +15,7 @@ from danswer.connectors.interfaces import PollConnector
|
||||
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from dateutil import parser
|
||||
|
||||
# Fairly generous retry because it's not understood why occasionally GraphQL requests fail even with timeout > 1 min
|
||||
|
@ -18,7 +18,7 @@ from danswer.connectors.slack.utils import get_message_link
|
||||
from danswer.connectors.slack.utils import make_slack_api_call_paginated
|
||||
from danswer.connectors.slack.utils import make_slack_api_rate_limited
|
||||
from danswer.connectors.slack.utils import UserIdReplacer
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from slack_sdk import WebClient
|
||||
from slack_sdk.web import SlackResponse
|
||||
|
||||
|
@ -4,7 +4,7 @@ from collections.abc import Callable
|
||||
from typing import Any
|
||||
from typing import cast
|
||||
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from slack_sdk import WebClient
|
||||
from slack_sdk.errors import SlackApiError
|
||||
from slack_sdk.web import SlackResponse
|
||||
|
@ -14,7 +14,7 @@ from danswer.connectors.interfaces import GenerateDocumentsOutput
|
||||
from danswer.connectors.interfaces import LoadConnector
|
||||
from danswer.connectors.models import Document
|
||||
from danswer.connectors.models import Section
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from playwright.sync_api import sync_playwright
|
||||
from PyPDF2 import PdfReader
|
||||
|
||||
|
@ -20,7 +20,7 @@ from danswer.datastores.datastore_utils import DEFAULT_BATCH_SIZE
|
||||
from danswer.datastores.datastore_utils import get_uuid_from_chunk
|
||||
from danswer.datastores.datastore_utils import update_doc_user_map
|
||||
from danswer.utils.clients import get_qdrant_client
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models
|
||||
from qdrant_client.http.exceptions import ResponseHandlingException
|
||||
|
@ -13,7 +13,7 @@ from danswer.datastores.interfaces import VectorIndex
|
||||
from danswer.datastores.qdrant.indexing import index_qdrant_chunks
|
||||
from danswer.search.search_utils import get_default_embedding_model
|
||||
from danswer.utils.clients import get_qdrant_client
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.timing import log_function_time
|
||||
from qdrant_client.http.exceptions import ResponseHandlingException
|
||||
from qdrant_client.http.exceptions import UnexpectedResponse
|
||||
|
@ -26,7 +26,7 @@ from danswer.datastores.datastore_utils import update_doc_user_map
|
||||
from danswer.datastores.interfaces import IndexFilter
|
||||
from danswer.datastores.interfaces import KeywordIndex
|
||||
from danswer.utils.clients import get_typesense_client
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from typesense.exceptions import ObjectNotFound # type: ignore
|
||||
|
||||
|
||||
|
@ -7,7 +7,7 @@ from danswer.db.models import IndexAttempt
|
||||
from danswer.server.models import ConnectorBase
|
||||
from danswer.server.models import ObjectCreationIdResponse
|
||||
from danswer.server.models import StatusResponse
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from fastapi import HTTPException
|
||||
from sqlalchemy import and_
|
||||
from sqlalchemy import func
|
||||
|
@ -4,7 +4,7 @@ from danswer.db.models import ConnectorCredentialPair
|
||||
from danswer.db.models import IndexingStatus
|
||||
from danswer.db.models import User
|
||||
from danswer.server.models import StatusResponse
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from fastapi import HTTPException
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy import select
|
||||
|
@ -5,7 +5,7 @@ from danswer.db.models import Credential
|
||||
from danswer.db.models import User
|
||||
from danswer.server.models import CredentialBase
|
||||
from danswer.server.models import ObjectCreationIdResponse
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.sql.expression import or_
|
||||
|
@ -1,7 +1,7 @@
|
||||
from danswer.db.engine import translate_db_time_to_server_time
|
||||
from danswer.db.models import IndexAttempt
|
||||
from danswer.db.models import IndexingStatus
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from sqlalchemy import desc
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
@ -14,7 +14,7 @@ from danswer.search.semantic_search import chunks_to_search_docs
|
||||
from danswer.search.semantic_search import retrieve_ranked_documents
|
||||
from danswer.server.models import QAResponse
|
||||
from danswer.server.models import QuestionRequest
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
@ -35,7 +35,8 @@ from danswer.direct_qa.qa_prompts import json_processor
|
||||
from danswer.direct_qa.qa_prompts import QUOTE_PAT
|
||||
from danswer.direct_qa.qa_prompts import UNCERTAINTY_PAT
|
||||
from danswer.dynamic_configs import get_dynamic_config_store
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.dynamic_configs.interface import ConfigNotFoundError
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.text_processing import clean_model_quote
|
||||
from danswer.utils.text_processing import shared_precompare_cleanup
|
||||
from danswer.utils.timing import log_function_time
|
||||
@ -179,11 +180,20 @@ def process_answer(
|
||||
) -> tuple[str | None, dict[str, dict[str, str | int | None]] | None]:
|
||||
answer, quote_strings = separate_answer_quotes(answer_raw)
|
||||
if answer == UNCERTAINTY_PAT or not answer:
|
||||
if answer == UNCERTAINTY_PAT:
|
||||
logger.debug("Answer matched UNCERTAINTY_PAT")
|
||||
else:
|
||||
logger.debug("No answer extracted from raw output")
|
||||
return None, None
|
||||
|
||||
logger.info(f"Answer: {answer}")
|
||||
if not quote_strings:
|
||||
logger.debug("No quotes extracted from raw output")
|
||||
return answer, None
|
||||
logger.info(f"All quotes (including unmatched): {quote_strings}")
|
||||
quotes_dict = match_quotes_to_docs(quote_strings, chunks)
|
||||
logger.info(f"Final quotes dict: {quotes_dict}")
|
||||
|
||||
return answer, quotes_dict
|
||||
|
||||
|
||||
@ -263,9 +273,12 @@ class OpenAICompletionQA(OpenAIQAModel):
|
||||
self.prompt_processor = prompt_processor
|
||||
self.model_version = model_version
|
||||
self.max_output_tokens = max_output_tokens
|
||||
self.api_key = api_key or get_openai_api_key()
|
||||
self.timeout = timeout
|
||||
self.include_metadata = include_metadata
|
||||
try:
|
||||
self.api_key = api_key or get_openai_api_key()
|
||||
except ConfigNotFoundError:
|
||||
raise RuntimeError("No OpenAI Key available")
|
||||
|
||||
@log_function_time()
|
||||
def answer_question(
|
||||
@ -373,9 +386,12 @@ class OpenAIChatCompletionQA(OpenAIQAModel):
|
||||
self.model_version = model_version
|
||||
self.max_output_tokens = max_output_tokens
|
||||
self.reflexion_try_count = reflexion_try_count
|
||||
self.api_key = api_key or get_openai_api_key()
|
||||
self.timeout = timeout
|
||||
self.include_metadata = include_metadata
|
||||
try:
|
||||
self.api_key = api_key or get_openai_api_key()
|
||||
except ConfigNotFoundError:
|
||||
raise RuntimeError("No OpenAI Key available")
|
||||
|
||||
@log_function_time()
|
||||
def answer_question(
|
||||
@ -384,7 +400,7 @@ class OpenAIChatCompletionQA(OpenAIQAModel):
|
||||
context_docs: list[InferenceChunk],
|
||||
) -> tuple[str | None, dict[str, dict[str, str | int | None]] | None]:
|
||||
messages = self.prompt_processor(query, context_docs, self.include_metadata)
|
||||
logger.debug(messages)
|
||||
logger.debug(json.dumps(messages, indent=4))
|
||||
model_output = ""
|
||||
for _ in range(self.reflexion_try_count + 1):
|
||||
openai_call = _handle_openai_exceptions_wrapper(
|
||||
@ -418,7 +434,7 @@ class OpenAIChatCompletionQA(OpenAIQAModel):
|
||||
self, query: str, context_docs: list[InferenceChunk]
|
||||
) -> Generator[dict[str, Any] | None, None, None]:
|
||||
messages = self.prompt_processor(query, context_docs, self.include_metadata)
|
||||
logger.debug(messages)
|
||||
logger.debug(json.dumps(messages, indent=4))
|
||||
|
||||
openai_call = _handle_openai_exceptions_wrapper(
|
||||
openai_call=openai.ChatCompletion.create,
|
||||
@ -446,10 +462,14 @@ class OpenAIChatCompletionQA(OpenAIQAModel):
|
||||
event_text = event_dict["content"]
|
||||
model_previous = model_output
|
||||
model_output += event_text
|
||||
logger.debug(f"GPT returned token: {event_text}")
|
||||
|
||||
if not found_answer_start and '{"answer":"' in model_output.replace(
|
||||
" ", ""
|
||||
).replace("\n", ""):
|
||||
# Note, if the token that completes the pattern has additional text, for example if the token is "?
|
||||
# Then the chars after " will not be streamed, but this is ok as it prevents streaming the ? in the
|
||||
# event that the model outputs the UNCERTAINTY_PAT
|
||||
found_answer_start = True
|
||||
continue
|
||||
|
||||
@ -463,14 +483,5 @@ class OpenAIChatCompletionQA(OpenAIQAModel):
|
||||
logger.debug(model_output)
|
||||
|
||||
answer, quotes_dict = process_answer(model_output, context_docs)
|
||||
if answer:
|
||||
logger.info(answer)
|
||||
else:
|
||||
logger.warning(
|
||||
"Answer extraction from model output failed, most likely no quotes provided"
|
||||
)
|
||||
|
||||
if quotes_dict is None:
|
||||
yield {}
|
||||
else:
|
||||
yield quotes_dict
|
||||
yield {} if quotes_dict is None else quotes_dict
|
||||
|
@ -9,7 +9,7 @@ from danswer.direct_qa.answer_question import answer_question
|
||||
from danswer.server.models import QAResponse
|
||||
from danswer.server.models import QuestionRequest
|
||||
from danswer.server.models import SearchDoc
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from retry import retry
|
||||
from slack_sdk import WebClient
|
||||
from slack_sdk.socket_mode import SocketModeClient
|
||||
|
@ -8,7 +8,10 @@ from danswer.auth.users import fastapi_users
|
||||
from danswer.auth.users import google_oauth_client
|
||||
from danswer.configs.app_configs import APP_HOST
|
||||
from danswer.configs.app_configs import APP_PORT
|
||||
from danswer.configs.app_configs import DISABLE_AUTH
|
||||
from danswer.configs.app_configs import ENABLE_OAUTH
|
||||
from danswer.configs.app_configs import GOOGLE_OAUTH_CLIENT_ID
|
||||
from danswer.configs.app_configs import GOOGLE_OAUTH_CLIENT_SECRET
|
||||
from danswer.configs.app_configs import SECRET
|
||||
from danswer.configs.app_configs import TYPESENSE_DEFAULT_COLLECTION
|
||||
from danswer.configs.app_configs import WEB_DOMAIN
|
||||
@ -16,11 +19,14 @@ from danswer.datastores.qdrant.indexing import list_qdrant_collections
|
||||
from danswer.datastores.typesense.store import check_typesense_collection_exist
|
||||
from danswer.datastores.typesense.store import create_typesense_collection
|
||||
from danswer.db.credentials import create_initial_public_credential
|
||||
from danswer.direct_qa.key_validation import check_openai_api_key_is_valid
|
||||
from danswer.direct_qa.llm import get_openai_api_key
|
||||
from danswer.dynamic_configs.interface import ConfigNotFoundError
|
||||
from danswer.server.event_loading import router as event_processing_router
|
||||
from danswer.server.health import router as health_router
|
||||
from danswer.server.manage import router as admin_router
|
||||
from danswer.server.search_backend import router as backend_router
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from fastapi import FastAPI
|
||||
from fastapi import Request
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
@ -117,6 +123,17 @@ def get_application() -> FastAPI:
|
||||
from danswer.datastores.qdrant.indexing import create_qdrant_collection
|
||||
from danswer.configs.app_configs import QDRANT_DEFAULT_COLLECTION
|
||||
|
||||
auth_status = "off" if DISABLE_AUTH else "on"
|
||||
logger.info(f"User auth is turned {auth_status}")
|
||||
|
||||
if not ENABLE_OAUTH:
|
||||
logger.debug("OAuth is turned off")
|
||||
else:
|
||||
if not GOOGLE_OAUTH_CLIENT_ID or not GOOGLE_OAUTH_CLIENT_SECRET:
|
||||
logger.warning("OAuth is turned on but incorrectly configured")
|
||||
else:
|
||||
logger.debug("OAuth is turned on")
|
||||
|
||||
logger.info("Warming up local NLP models.")
|
||||
warm_up_models()
|
||||
|
||||
|
@ -7,15 +7,22 @@ from danswer.search.search_utils import get_default_intent_model
|
||||
from danswer.search.search_utils import get_default_intent_model_tokenizer
|
||||
from danswer.search.search_utils import get_default_tokenizer
|
||||
from danswer.server.models import HelperResponse
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.timing import log_function_time
|
||||
from transformers import AutoTokenizer # type:ignore
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def count_unk_tokens(text: str, tokenizer: AutoTokenizer) -> int:
|
||||
"""Unclear if the wordpiece tokenizer used is actually tokenizing anything as the [UNK] token
|
||||
It splits up even foreign characters and unicode emojis without using UNK"""
|
||||
tokenized_text = tokenizer.tokenize(text)
|
||||
return len([token for token in tokenized_text if token == tokenizer.unk_token])
|
||||
num_unk_tokens = len(
|
||||
[token for token in tokenized_text if token == tokenizer.unk_token]
|
||||
)
|
||||
logger.debug(f"Total of {num_unk_tokens} UNKNOWN tokens found")
|
||||
return num_unk_tokens
|
||||
|
||||
|
||||
@log_function_time()
|
||||
@ -34,16 +41,26 @@ def query_intent(query: str) -> tuple[SearchType, QueryFlow]:
|
||||
if qa > 20:
|
||||
# If one class is very certain, choose it still
|
||||
if keyword > 70:
|
||||
return SearchType.KEYWORD, QueryFlow.SEARCH
|
||||
if semantic > 70:
|
||||
return SearchType.SEMANTIC, QueryFlow.SEARCH
|
||||
predicted_search = SearchType.KEYWORD
|
||||
predicted_flow = QueryFlow.SEARCH
|
||||
elif semantic > 70:
|
||||
predicted_search = SearchType.SEMANTIC
|
||||
predicted_flow = QueryFlow.SEARCH
|
||||
# If it's a QA question, it must be a "Semantic" style statement/question
|
||||
return SearchType.SEMANTIC, QueryFlow.QUESTION_ANSWER
|
||||
else:
|
||||
predicted_search = SearchType.SEMANTIC
|
||||
predicted_flow = QueryFlow.QUESTION_ANSWER
|
||||
# If definitely not a QA question, choose between keyword or semantic search
|
||||
elif keyword > semantic:
|
||||
return SearchType.KEYWORD, QueryFlow.SEARCH
|
||||
predicted_search = SearchType.KEYWORD
|
||||
predicted_flow = QueryFlow.SEARCH
|
||||
else:
|
||||
return SearchType.SEMANTIC, QueryFlow.SEARCH
|
||||
predicted_search = SearchType.SEMANTIC
|
||||
predicted_flow = QueryFlow.SEARCH
|
||||
|
||||
logger.debug(f"Predicted Search: {predicted_search}")
|
||||
logger.debug(f"Predicted Flow: {predicted_flow}")
|
||||
return predicted_search, predicted_flow
|
||||
|
||||
|
||||
def recommend_search_flow(
|
||||
|
@ -5,7 +5,7 @@ from danswer.chunking.models import InferenceChunk
|
||||
from danswer.configs.app_configs import NUM_RETURNED_HITS
|
||||
from danswer.datastores.interfaces import IndexFilter
|
||||
from danswer.datastores.interfaces import KeywordIndex
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.timing import log_function_time
|
||||
from nltk.corpus import stopwords # type:ignore
|
||||
from nltk.stem import WordNetLemmatizer # type:ignore
|
||||
@ -45,7 +45,7 @@ def retrieve_keyword_documents(
|
||||
if not top_chunks:
|
||||
filters_log_msg = json.dumps(filters, separators=(",", ":")).replace("\n", "")
|
||||
logger.warning(
|
||||
f"Keyword search returned no results...\nfilters: {filters_log_msg}\nedited query: {edited_query}"
|
||||
f"Keyword search returned no results - Filters: {filters_log_msg}\tEdited Query: {edited_query}"
|
||||
)
|
||||
return None
|
||||
return top_chunks
|
||||
|
@ -16,7 +16,7 @@ from danswer.search.models import Embedder
|
||||
from danswer.search.search_utils import get_default_embedding_model
|
||||
from danswer.search.search_utils import get_default_reranking_model_ensemble
|
||||
from danswer.server.models import SearchDoc
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.timing import log_function_time
|
||||
from sentence_transformers import SentenceTransformer # type: ignore
|
||||
|
||||
@ -47,12 +47,16 @@ def semantic_reranking(
|
||||
chunks: list[InferenceChunk],
|
||||
) -> list[InferenceChunk]:
|
||||
cross_encoders = get_default_reranking_model_ensemble()
|
||||
sim_scores = sum([encoder.predict([(query, chunk.content) for chunk in chunks]) for encoder in cross_encoders]) # type: ignore
|
||||
scored_results = list(zip(sim_scores, chunks))
|
||||
sim_scores = [
|
||||
encoder.predict([(query, chunk.content) for chunk in chunks]) # type: ignore
|
||||
for encoder in cross_encoders
|
||||
]
|
||||
averaged_sim_scores = sum(sim_scores) / len(sim_scores)
|
||||
scored_results = list(zip(averaged_sim_scores, chunks))
|
||||
scored_results.sort(key=lambda x: x[0], reverse=True)
|
||||
ranked_sim_scores, ranked_chunks = zip(*scored_results)
|
||||
|
||||
logger.debug(f"Reranked similarity scores: {str(ranked_sim_scores)}")
|
||||
logger.debug(f"Reranked similarity scores: {ranked_sim_scores}")
|
||||
|
||||
return ranked_chunks
|
||||
|
||||
|
@ -4,7 +4,7 @@ from danswer.connectors.slack.connector import get_channel_info
|
||||
from danswer.connectors.slack.connector import get_thread
|
||||
from danswer.connectors.slack.connector import thread_to_doc
|
||||
from danswer.utils.indexing_pipeline import build_indexing_pipeline
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from fastapi import APIRouter
|
||||
from pydantic import BaseModel
|
||||
from pydantic import Extra
|
||||
|
@ -57,7 +57,7 @@ from danswer.server.models import RunConnectorRequest
|
||||
from danswer.server.models import StatusResponse
|
||||
from danswer.server.models import UserByEmail
|
||||
from danswer.server.models import UserRoleResponse
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi import HTTPException
|
||||
|
@ -21,7 +21,7 @@ from danswer.server.models import HelperResponse
|
||||
from danswer.server.models import QAResponse
|
||||
from danswer.server.models import QuestionRequest
|
||||
from danswer.server.models import SearchResponse
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from fastapi import APIRouter
|
||||
from fastapi import Depends
|
||||
from fastapi.responses import StreamingResponse
|
||||
@ -93,18 +93,23 @@ def direct_qa(
|
||||
def stream_direct_qa(
|
||||
question: QuestionRequest, user: User = Depends(current_user)
|
||||
) -> StreamingResponse:
|
||||
send_packet_debug_msg = "Sending Packet: {}"
|
||||
top_documents_key = "top_documents"
|
||||
unranked_top_docs_key = "unranked_top_documents"
|
||||
predicted_flow_key = "predicted_flow"
|
||||
predicted_search_key = "predicted_search"
|
||||
|
||||
logger.debug(f"Received QA query: {question.query}")
|
||||
logger.debug(f"Query filters: {question.filters}")
|
||||
|
||||
def stream_qa_portions() -> Generator[str, None, None]:
|
||||
start_time = time.time()
|
||||
|
||||
query = question.query
|
||||
collection = question.collection
|
||||
filters = question.filters
|
||||
use_keyword = question.use_keyword
|
||||
offset_count = question.offset if question.offset is not None else 0
|
||||
logger.info(f"Received QA query: {query}")
|
||||
|
||||
predicted_search, predicted_flow = query_intent(query)
|
||||
if use_keyword is None:
|
||||
@ -121,14 +126,15 @@ def stream_direct_qa(
|
||||
query, user_id, filters, QdrantIndex(collection)
|
||||
)
|
||||
if not ranked_chunks:
|
||||
yield get_json_line(
|
||||
{
|
||||
top_documents_key: None,
|
||||
unranked_top_docs_key: None,
|
||||
predicted_flow_key: predicted_flow,
|
||||
predicted_search_key: predicted_search,
|
||||
}
|
||||
)
|
||||
logger.debug("No Documents Found")
|
||||
empty_docs_result = {
|
||||
top_documents_key: None,
|
||||
unranked_top_docs_key: None,
|
||||
predicted_flow_key: predicted_flow,
|
||||
predicted_search_key: predicted_search,
|
||||
}
|
||||
logger.debug(send_packet_debug_msg.format(empty_docs_result))
|
||||
yield get_json_line(empty_docs_result)
|
||||
return
|
||||
|
||||
top_docs = chunks_to_search_docs(ranked_chunks)
|
||||
@ -139,6 +145,7 @@ def stream_direct_qa(
|
||||
predicted_flow_key: predicted_flow,
|
||||
predicted_search_key: predicted_search,
|
||||
}
|
||||
logger.debug(send_packet_debug_msg.format(initial_response_dict))
|
||||
yield get_json_line(initial_response_dict)
|
||||
|
||||
qa_model = get_default_backend_qa_model(timeout=QA_TIMEOUT)
|
||||
@ -156,11 +163,12 @@ def stream_direct_qa(
|
||||
):
|
||||
if response_dict is None:
|
||||
continue
|
||||
logger.debug(response_dict)
|
||||
logger.debug(f"Sending packet: {response_dict}")
|
||||
yield get_json_line(response_dict)
|
||||
except Exception:
|
||||
# exception is logged in the answer_question method, no need to re-log
|
||||
pass
|
||||
logger.info(f"Total QA took {time.time() - start_time} seconds")
|
||||
return
|
||||
|
||||
return StreamingResponse(stream_qa_portions(), media_type="application/json")
|
||||
|
@ -12,7 +12,7 @@ from danswer.datastores.qdrant.store import QdrantIndex
|
||||
from danswer.datastores.typesense.store import TypesenseIndex
|
||||
from danswer.search.models import Embedder
|
||||
from danswer.search.semantic_search import DefaultEmbedder
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
@ -1,8 +1,25 @@
|
||||
import logging
|
||||
from logging import Logger
|
||||
|
||||
from danswer.configs.app_configs import LOG_LEVEL
|
||||
|
||||
def setup_logger(name: str = __name__, log_level: int = logging.INFO) -> Logger:
|
||||
|
||||
def get_log_level_from_str(log_level_str: str = LOG_LEVEL) -> int:
|
||||
log_level_dict = {
|
||||
"CRITICAL": logging.CRITICAL,
|
||||
"ERROR": logging.ERROR,
|
||||
"WARNING": logging.WARNING,
|
||||
"INFO": logging.INFO,
|
||||
"DEBUG": logging.DEBUG,
|
||||
"NOTSET": logging.NOTSET,
|
||||
}
|
||||
|
||||
return log_level_dict.get(log_level_str.upper(), logging.INFO)
|
||||
|
||||
|
||||
def setup_logger(
|
||||
name: str = __name__, log_level: int = get_log_level_from_str()
|
||||
) -> Logger:
|
||||
logger = logging.getLogger(name)
|
||||
|
||||
# If the logger already has handlers, assume it was already configured and return it.
|
@ -4,7 +4,7 @@ from typing import Any
|
||||
from typing import cast
|
||||
from typing import TypeVar
|
||||
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
@ -3,7 +3,7 @@ from danswer.configs.model_configs import DOC_EMBEDDING_DIM
|
||||
from danswer.datastores.typesense.store import create_typesense_collection
|
||||
from danswer.utils.clients import get_qdrant_client
|
||||
from danswer.utils.clients import get_typesense_client
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from qdrant_client.http.models import Distance
|
||||
from qdrant_client.http.models import VectorParams
|
||||
from typesense.exceptions import ObjectNotFound # type: ignore
|
||||
|
@ -22,7 +22,7 @@ from danswer.datastores.qdrant.indexing import list_qdrant_collections
|
||||
from danswer.datastores.typesense.store import create_typesense_collection
|
||||
from danswer.utils.clients import get_qdrant_client
|
||||
from danswer.utils.clients import get_typesense_client
|
||||
from danswer.utils.logging import setup_logger
|
||||
from danswer.utils.logger import setup_logger
|
||||
from qdrant_client.http.models.models import SnapshotDescription
|
||||
from typesense.exceptions import ObjectNotFound # type: ignore
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user