mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-29 11:12:02 +01:00
Fix Model Server (#1320)
This commit is contained in:
parent
f346c2fc86
commit
b59912884b
@ -13,19 +13,13 @@ RUN apt-get remove -y --allow-remove-essential perl-base && \
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Needed for model configs and defaults
|
||||
COPY ./danswer/configs /app/danswer/configs
|
||||
COPY ./danswer/dynamic_configs /app/danswer/dynamic_configs
|
||||
|
||||
# Utils used by model server
|
||||
COPY ./danswer/utils/logger.py /app/danswer/utils/logger.py
|
||||
COPY ./danswer/utils/timing.py /app/danswer/utils/timing.py
|
||||
COPY ./danswer/utils/telemetry.py /app/danswer/utils/telemetry.py
|
||||
|
||||
# Place to fetch version information
|
||||
COPY ./danswer/__init__.py /app/danswer/__init__.py
|
||||
|
||||
# Request/Response models
|
||||
# Shared between Danswer Backend and Model Server
|
||||
COPY ./shared_configs /app/shared_configs
|
||||
|
||||
# Model Server main code
|
||||
|
@ -15,9 +15,6 @@ from danswer.background.indexing.run_indexing import run_indexing_entrypoint
|
||||
from danswer.configs.app_configs import CLEANUP_INDEXING_JOBS_TIMEOUT
|
||||
from danswer.configs.app_configs import DASK_JOB_CLIENT_ENABLED
|
||||
from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
|
||||
from danswer.configs.app_configs import INDEXING_MODEL_SERVER_HOST
|
||||
from danswer.configs.app_configs import LOG_LEVEL
|
||||
from danswer.configs.app_configs import MODEL_SERVER_PORT
|
||||
from danswer.configs.app_configs import NUM_INDEXING_WORKERS
|
||||
from danswer.db.connector import fetch_connectors
|
||||
from danswer.db.connector_credential_pair import get_connector_credential_pairs
|
||||
@ -46,6 +43,9 @@ from danswer.db.models import IndexingStatus
|
||||
from danswer.db.models import IndexModelStatus
|
||||
from danswer.search.search_nlp_models import warm_up_encoders
|
||||
from danswer.utils.logger import setup_logger
|
||||
from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
|
||||
from shared_configs.configs import LOG_LEVEL
|
||||
from shared_configs.configs import MODEL_SERVER_PORT
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
@ -209,19 +209,6 @@ DISABLE_DOCUMENT_CLEANUP = (
|
||||
)
|
||||
|
||||
|
||||
#####
|
||||
# Model Server Configs
|
||||
#####
|
||||
MODEL_SERVER_HOST = os.environ.get("MODEL_SERVER_HOST") or "localhost"
|
||||
MODEL_SERVER_ALLOWED_HOST = os.environ.get("MODEL_SERVER_HOST") or "0.0.0.0"
|
||||
MODEL_SERVER_PORT = int(os.environ.get("MODEL_SERVER_PORT") or "9000")
|
||||
# Model server for indexing should use a separate one to not allow indexing to introduce delay
|
||||
# for inference
|
||||
INDEXING_MODEL_SERVER_HOST = (
|
||||
os.environ.get("INDEXING_MODEL_SERVER_HOST") or MODEL_SERVER_HOST
|
||||
)
|
||||
|
||||
|
||||
#####
|
||||
# Miscellaneous
|
||||
#####
|
||||
@ -246,8 +233,7 @@ LOG_VESPA_TIMING_INFORMATION = (
|
||||
)
|
||||
# Anonymous usage telemetry
|
||||
DISABLE_TELEMETRY = os.environ.get("DISABLE_TELEMETRY", "").lower() == "true"
|
||||
# notset, debug, info, warning, error, or critical
|
||||
LOG_LEVEL = os.environ.get("LOG_LEVEL", "info")
|
||||
|
||||
TOKEN_BUDGET_GLOBALLY_ENABLED = (
|
||||
os.environ.get("TOKEN_BUDGET_GLOBALLY_ENABLED", "").lower() == "true"
|
||||
)
|
||||
|
@ -51,7 +51,7 @@ from danswer.search.models import BaseFilters
|
||||
from danswer.search.models import OptionalSearchSetting
|
||||
from danswer.search.models import RetrievalDetails
|
||||
from danswer.utils.logger import setup_logger
|
||||
from shared_configs.nlp_model_configs import ENABLE_RERANKING_ASYNC_FLOW
|
||||
from shared_configs.configs import ENABLE_RERANKING_ASYNC_FLOW
|
||||
|
||||
logger_base = setup_logger()
|
||||
|
||||
|
@ -10,8 +10,6 @@ from slack_sdk.socket_mode.request import SocketModeRequest
|
||||
from slack_sdk.socket_mode.response import SocketModeResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.configs.app_configs import MODEL_SERVER_HOST
|
||||
from danswer.configs.app_configs import MODEL_SERVER_PORT
|
||||
from danswer.configs.constants import MessageType
|
||||
from danswer.configs.danswerbot_configs import DANSWER_BOT_RESPOND_EVERY_CHANNEL
|
||||
from danswer.configs.danswerbot_configs import NOTIFY_SLACKBOT_NO_ANSWER
|
||||
@ -47,6 +45,8 @@ from danswer.one_shot_answer.models import ThreadMessage
|
||||
from danswer.search.search_nlp_models import warm_up_encoders
|
||||
from danswer.server.manage.models import SlackBotTokens
|
||||
from danswer.utils.logger import setup_logger
|
||||
from shared_configs.configs import MODEL_SERVER_HOST
|
||||
from shared_configs.configs import MODEL_SERVER_PORT
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
@ -4,8 +4,6 @@ from abc import abstractmethod
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.configs.app_configs import ENABLE_MINI_CHUNK
|
||||
from danswer.configs.app_configs import INDEXING_MODEL_SERVER_HOST
|
||||
from danswer.configs.app_configs import MODEL_SERVER_PORT
|
||||
from danswer.configs.model_configs import BATCH_SIZE_ENCODE_CHUNKS
|
||||
from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
|
||||
from danswer.db.embedding_model import get_current_db_embedding_model
|
||||
@ -20,6 +18,8 @@ from danswer.search.enums import EmbedTextType
|
||||
from danswer.search.search_nlp_models import EmbeddingModel
|
||||
from danswer.utils.batching import batch_list
|
||||
from danswer.utils.logger import setup_logger
|
||||
from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
|
||||
from shared_configs.configs import MODEL_SERVER_PORT
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
@ -20,7 +20,6 @@ from langchain.schema.messages import HumanMessage
|
||||
from langchain.schema.messages import SystemMessage
|
||||
from tiktoken.core import Encoding
|
||||
|
||||
from danswer.configs.app_configs import LOG_LEVEL
|
||||
from danswer.configs.constants import GEN_AI_API_KEY_STORAGE_KEY
|
||||
from danswer.configs.constants import GEN_AI_DETECTED_MODEL
|
||||
from danswer.configs.constants import MessageType
|
||||
@ -37,6 +36,7 @@ from danswer.dynamic_configs.interface import ConfigNotFoundError
|
||||
from danswer.indexing.models import InferenceChunk
|
||||
from danswer.llm.interfaces import LLM
|
||||
from danswer.utils.logger import setup_logger
|
||||
from shared_configs.configs import LOG_LEVEL
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from danswer.llm.answering.models import PreviousMessage
|
||||
|
@ -28,8 +28,6 @@ from danswer.configs.app_configs import APP_PORT
|
||||
from danswer.configs.app_configs import AUTH_TYPE
|
||||
from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
|
||||
from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP
|
||||
from danswer.configs.app_configs import MODEL_SERVER_HOST
|
||||
from danswer.configs.app_configs import MODEL_SERVER_PORT
|
||||
from danswer.configs.app_configs import OAUTH_CLIENT_ID
|
||||
from danswer.configs.app_configs import OAUTH_CLIENT_SECRET
|
||||
from danswer.configs.app_configs import SECRET
|
||||
@ -81,7 +79,9 @@ from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.telemetry import optional_telemetry
|
||||
from danswer.utils.telemetry import RecordType
|
||||
from danswer.utils.variable_functionality import fetch_versioned_implementation
|
||||
from shared_configs.nlp_model_configs import ENABLE_RERANKING_REAL_TIME_FLOW
|
||||
from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW
|
||||
from shared_configs.configs import MODEL_SERVER_HOST
|
||||
from shared_configs.configs import MODEL_SERVER_PORT
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
@ -11,7 +11,7 @@ from danswer.configs.constants import DocumentSource
|
||||
from danswer.db.models import Persona
|
||||
from danswer.search.enums import OptionalSearchSetting
|
||||
from danswer.search.enums import SearchType
|
||||
from shared_configs.nlp_model_configs import ENABLE_RERANKING_REAL_TIME_FLOW
|
||||
from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW
|
||||
|
||||
|
||||
MAX_METRICS_CONTENT = (
|
||||
|
@ -21,7 +21,7 @@ from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.threadpool_concurrency import FunctionCall
|
||||
from danswer.utils.threadpool_concurrency import run_functions_in_parallel
|
||||
from danswer.utils.timing import log_function_time
|
||||
from shared_configs.nlp_model_configs import ENABLE_RERANKING_REAL_TIME_FLOW
|
||||
from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
@ -7,8 +7,6 @@ from nltk.tokenize import word_tokenize # type:ignore
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.chat.models import LlmDoc
|
||||
from danswer.configs.app_configs import MODEL_SERVER_HOST
|
||||
from danswer.configs.app_configs import MODEL_SERVER_PORT
|
||||
from danswer.configs.chat_configs import HYBRID_ALPHA
|
||||
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
||||
from danswer.db.embedding_model import get_current_db_embedding_model
|
||||
@ -26,6 +24,8 @@ from danswer.secondary_llm_flows.query_expansion import multilingual_query_expan
|
||||
from danswer.utils.logger import setup_logger
|
||||
from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel
|
||||
from danswer.utils.timing import log_function_time
|
||||
from shared_configs.configs import MODEL_SERVER_HOST
|
||||
from shared_configs.configs import MODEL_SERVER_PORT
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
@ -7,12 +7,12 @@ from typing import TYPE_CHECKING
|
||||
import requests
|
||||
from transformers import logging as transformer_logging # type:ignore
|
||||
|
||||
from danswer.configs.app_configs import MODEL_SERVER_HOST
|
||||
from danswer.configs.app_configs import MODEL_SERVER_PORT
|
||||
from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE
|
||||
from danswer.configs.model_configs import DOCUMENT_ENCODER_MODEL
|
||||
from danswer.search.enums import EmbedTextType
|
||||
from danswer.utils.logger import setup_logger
|
||||
from shared_configs.configs import MODEL_SERVER_HOST
|
||||
from shared_configs.configs import MODEL_SERVER_PORT
|
||||
from shared_configs.model_server_models import EmbedRequest
|
||||
from shared_configs.model_server_models import EmbedResponse
|
||||
from shared_configs.model_server_models import IntentRequest
|
||||
|
@ -3,7 +3,7 @@ import os
|
||||
from collections.abc import MutableMapping
|
||||
from typing import Any
|
||||
|
||||
from danswer.configs.app_configs import LOG_LEVEL
|
||||
from shared_configs.configs import LOG_LEVEL
|
||||
|
||||
|
||||
class IndexAttemptSingleton:
|
||||
|
@ -8,11 +8,11 @@ from transformers import TFDistilBertForSequenceClassification
|
||||
|
||||
from model_server.constants import MODEL_WARM_UP_STRING
|
||||
from model_server.utils import simple_log_function_time
|
||||
from shared_configs.configs import INDEXING_ONLY
|
||||
from shared_configs.configs import INTENT_MODEL_CONTEXT_SIZE
|
||||
from shared_configs.configs import INTENT_MODEL_VERSION
|
||||
from shared_configs.model_server_models import IntentRequest
|
||||
from shared_configs.model_server_models import IntentResponse
|
||||
from shared_configs.nlp_model_configs import INDEXING_ONLY
|
||||
from shared_configs.nlp_model_configs import INTENT_MODEL_CONTEXT_SIZE
|
||||
from shared_configs.nlp_model_configs import INTENT_MODEL_VERSION
|
||||
|
||||
|
||||
router = APIRouter(prefix="/custom")
|
||||
|
@ -9,13 +9,13 @@ from sentence_transformers import SentenceTransformer # type: ignore
|
||||
from danswer.utils.logger import setup_logger
|
||||
from model_server.constants import MODEL_WARM_UP_STRING
|
||||
from model_server.utils import simple_log_function_time
|
||||
from shared_configs.configs import CROSS_EMBED_CONTEXT_SIZE
|
||||
from shared_configs.configs import CROSS_ENCODER_MODEL_ENSEMBLE
|
||||
from shared_configs.configs import INDEXING_ONLY
|
||||
from shared_configs.model_server_models import EmbedRequest
|
||||
from shared_configs.model_server_models import EmbedResponse
|
||||
from shared_configs.model_server_models import RerankRequest
|
||||
from shared_configs.model_server_models import RerankResponse
|
||||
from shared_configs.nlp_model_configs import CROSS_EMBED_CONTEXT_SIZE
|
||||
from shared_configs.nlp_model_configs import CROSS_ENCODER_MODEL_ENSEMBLE
|
||||
from shared_configs.nlp_model_configs import INDEXING_ONLY
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
@ -8,17 +8,17 @@ from fastapi import FastAPI
|
||||
from transformers import logging as transformer_logging # type:ignore
|
||||
|
||||
from danswer import __version__
|
||||
from danswer.configs.app_configs import MODEL_SERVER_ALLOWED_HOST
|
||||
from danswer.configs.app_configs import MODEL_SERVER_PORT
|
||||
from danswer.utils.logger import setup_logger
|
||||
from model_server.custom_models import router as custom_models_router
|
||||
from model_server.custom_models import warm_up_intent_model
|
||||
from model_server.encoders import router as encoders_router
|
||||
from model_server.encoders import warm_up_cross_encoders
|
||||
from shared_configs.nlp_model_configs import ENABLE_RERANKING_ASYNC_FLOW
|
||||
from shared_configs.nlp_model_configs import ENABLE_RERANKING_REAL_TIME_FLOW
|
||||
from shared_configs.nlp_model_configs import INDEXING_ONLY
|
||||
from shared_configs.nlp_model_configs import MIN_THREADS_ML_MODELS
|
||||
from shared_configs.configs import ENABLE_RERANKING_ASYNC_FLOW
|
||||
from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW
|
||||
from shared_configs.configs import INDEXING_ONLY
|
||||
from shared_configs.configs import MIN_THREADS_ML_MODELS
|
||||
from shared_configs.configs import MODEL_SERVER_ALLOWED_HOST
|
||||
from shared_configs.configs import MODEL_SERVER_PORT
|
||||
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
|
||||
|
@ -1,4 +1,5 @@
|
||||
fastapi==0.109.2
|
||||
h5py==3.9.0
|
||||
pydantic==1.10.7
|
||||
safetensors==0.4.2
|
||||
sentence-transformers==2.6.1
|
||||
|
@ -1,6 +1,15 @@
|
||||
import os
|
||||
|
||||
|
||||
MODEL_SERVER_HOST = os.environ.get("MODEL_SERVER_HOST") or "localhost"
|
||||
MODEL_SERVER_ALLOWED_HOST = os.environ.get("MODEL_SERVER_HOST") or "0.0.0.0"
|
||||
MODEL_SERVER_PORT = int(os.environ.get("MODEL_SERVER_PORT") or "9000")
|
||||
# Model server for indexing should use a separate one to not allow indexing to introduce delay
|
||||
# for inference
|
||||
INDEXING_MODEL_SERVER_HOST = (
|
||||
os.environ.get("INDEXING_MODEL_SERVER_HOST") or MODEL_SERVER_HOST
|
||||
)
|
||||
|
||||
# Danswer custom Deep Learning Models
|
||||
INTENT_MODEL_VERSION = "danswer/intent-model"
|
||||
INTENT_MODEL_CONTEXT_SIZE = 256
|
||||
@ -23,4 +32,9 @@ CROSS_EMBED_CONTEXT_SIZE = 512
|
||||
# model. If torch finds more threads on its own, this value is not used.
|
||||
MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)
|
||||
|
||||
# Model server that has indexing only set will throw exception if used for reranking
|
||||
# or intent classification
|
||||
INDEXING_ONLY = os.environ.get("INDEXING_ONLY", "").lower() == "true"
|
||||
|
||||
# notset, debug, info, warning, error, or critical
|
||||
LOG_LEVEL = os.environ.get("LOG_LEVEL", "info")
|
Loading…
x
Reference in New Issue
Block a user