mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-28 00:40:58 +02:00
Add env variable to disable streaming for the DefaultMultiLLM class
This commit is contained in:
parent
66d95690cb
commit
648f2d06bf
@ -91,3 +91,9 @@ GEN_AI_HISTORY_CUTOFF = 3000
|
|||||||
# error if the total # of tokens exceeds the max input tokens.
|
# error if the total # of tokens exceeds the max input tokens.
|
||||||
GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS = 512
|
GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS = 512
|
||||||
GEN_AI_TEMPERATURE = float(os.environ.get("GEN_AI_TEMPERATURE") or 0)
|
GEN_AI_TEMPERATURE = float(os.environ.get("GEN_AI_TEMPERATURE") or 0)
|
||||||
|
|
||||||
|
# should be used if you are using a custom LLM inference provider that doesn't support
|
||||||
|
# streaming format AND you are still using the langchain/litellm LLM class
|
||||||
|
DISABLE_LITELLM_STREAMING = (
|
||||||
|
os.environ.get("DISABLE_LITELLM_STREAMING") or "false"
|
||||||
|
).lower() == "true"
|
||||||
|
@ -7,6 +7,7 @@ from langchain.schema.language_model import LanguageModelInput
|
|||||||
from langchain_community.chat_models import ChatLiteLLM
|
from langchain_community.chat_models import ChatLiteLLM
|
||||||
|
|
||||||
from danswer.configs.app_configs import LOG_ALL_MODEL_INTERACTIONS
|
from danswer.configs.app_configs import LOG_ALL_MODEL_INTERACTIONS
|
||||||
|
from danswer.configs.model_configs import DISABLE_LITELLM_STREAMING
|
||||||
from danswer.configs.model_configs import GEN_AI_API_ENDPOINT
|
from danswer.configs.model_configs import GEN_AI_API_ENDPOINT
|
||||||
from danswer.configs.model_configs import GEN_AI_API_VERSION
|
from danswer.configs.model_configs import GEN_AI_API_VERSION
|
||||||
from danswer.configs.model_configs import GEN_AI_LLM_PROVIDER_TYPE
|
from danswer.configs.model_configs import GEN_AI_LLM_PROVIDER_TYPE
|
||||||
@ -70,6 +71,9 @@ class LangChainChatLLM(LLM, abc.ABC):
|
|||||||
if LOG_ALL_MODEL_INTERACTIONS:
|
if LOG_ALL_MODEL_INTERACTIONS:
|
||||||
self._log_prompt(prompt)
|
self._log_prompt(prompt)
|
||||||
|
|
||||||
|
if DISABLE_LITELLM_STREAMING:
|
||||||
|
return [self.invoke(prompt)]
|
||||||
|
|
||||||
output_tokens = []
|
output_tokens = []
|
||||||
for token in message_generator_to_string_generator(self.llm.stream(prompt)):
|
for token in message_generator_to_string_generator(self.llm.stream(prompt)):
|
||||||
output_tokens.append(token)
|
output_tokens.append(token)
|
||||||
|
@ -44,6 +44,7 @@ services:
|
|||||||
- DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
|
- DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-}
|
||||||
- DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
|
- DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
|
||||||
- DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
|
- DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
|
||||||
|
- DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
|
||||||
# if set, allows for the use of the token budget system
|
# if set, allows for the use of the token budget system
|
||||||
- TOKEN_BUDGET_GLOBALLY_ENABLED=${TOKEN_BUDGET_GLOBALLY_ENABLED:-}
|
- TOKEN_BUDGET_GLOBALLY_ENABLED=${TOKEN_BUDGET_GLOBALLY_ENABLED:-}
|
||||||
# Enables the use of bedrock models
|
# Enables the use of bedrock models
|
||||||
@ -117,6 +118,7 @@ services:
|
|||||||
- DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
|
- DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-}
|
||||||
- DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
|
- DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-}
|
||||||
- GENERATIVE_MODEL_ACCESS_CHECK_FREQ=${GENERATIVE_MODEL_ACCESS_CHECK_FREQ:-}
|
- GENERATIVE_MODEL_ACCESS_CHECK_FREQ=${GENERATIVE_MODEL_ACCESS_CHECK_FREQ:-}
|
||||||
|
- DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-}
|
||||||
# Query Options
|
# Query Options
|
||||||
- DOC_TIME_DECAY=${DOC_TIME_DECAY:-} # Recency Bias for search results, decay at 1 / (1 + DOC_TIME_DECAY * x years)
|
- DOC_TIME_DECAY=${DOC_TIME_DECAY:-} # Recency Bias for search results, decay at 1 / (1 + DOC_TIME_DECAY * x years)
|
||||||
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user