mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-24 06:50:57 +02:00
Add env variable to disable streaming for the DefaultMultiLLM class
This commit is contained in:
parent
66d95690cb
commit
648f2d06bf
@ -91,3 +91,9 @@ GEN_AI_HISTORY_CUTOFF = 3000
|
||||
# error if the total # of tokens exceeds the max input tokens.
|
||||
GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS = 512
|
||||
GEN_AI_TEMPERATURE = float(os.environ.get("GEN_AI_TEMPERATURE") or 0)
|
||||
|
||||
# should be used if you are using a custom LLM inference provider that doesn't support
|
||||
# streaming format AND you are still using the langchain/litellm LLM class
|
||||
DISABLE_LITELLM_STREAMING = (
|
||||
os.environ.get("DISABLE_LITELLM_STREAMING") or "false"
|
||||
).lower() == "true"
|
||||
|
@ -7,6 +7,7 @@ from langchain.schema.language_model import LanguageModelInput
|
||||
from langchain_community.chat_models import ChatLiteLLM
|
||||
|
||||
from danswer.configs.app_configs import LOG_ALL_MODEL_INTERACTIONS
|
||||
from danswer.configs.model_configs import DISABLE_LITELLM_STREAMING
|
||||
from danswer.configs.model_configs import GEN_AI_API_ENDPOINT
|
||||
from danswer.configs.model_configs import GEN_AI_API_VERSION
|
||||
from danswer.configs.model_configs import GEN_AI_LLM_PROVIDER_TYPE
|
||||
@ -70,6 +71,9 @@ class LangChainChatLLM(LLM, abc.ABC):
|
||||
if LOG_ALL_MODEL_INTERACTIONS:
|
||||
self._log_prompt(prompt)
|
||||
|
||||
if DISABLE_LITELLM_STREAMING:
|
||||
return [self.invoke(prompt)]
|
||||
|
||||
output_tokens = []
|
||||
for token in message_generator_to_string_generator(self.llm.stream(prompt)):
|
||||
output_tokens.append(token)
|
||||
|
@ -44,6 +44,7 @@ services:
|
||||
- DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
|
||||
- DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
|
||||
- DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
|
||||
- DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
|
||||
# if set, allows for the use of the token budget system
|
||||
- TOKEN_BUDGET_GLOBALLY_ENABLED=${TOKEN_BUDGET_GLOBALLY_ENABLED:-}
|
||||
# Enables the use of bedrock models
|
||||
@ -117,6 +118,7 @@ services:
|
||||
- DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
|
||||
- DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
|
||||
- GENERATIVE_MODEL_ACCESS_CHECK_FREQ=${GENERATIVE_MODEL_ACCESS_CHECK_FREQ:-}
|
||||
- DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
|
||||
# Query Options
|
||||
- DOC_TIME_DECAY=${DOC_TIME_DECAY:-} # Recency Bias for search results, decay at 1 / (1 + DOC_TIME_DECAY * x years)
|
||||
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
||||
|
Loading…
x
Reference in New Issue
Block a user