mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-21 02:23:31 +02:00
Touchup for Multilingual Users (#1725)
This commit is contained in:
@ -12,8 +12,8 @@ import fastapi_users_db_sqlalchemy
|
|||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision = "bc9771dccadf"
|
revision = "bc9771dccadf"
|
||||||
down_revision = "0568ccf46a6b"
|
down_revision = "0568ccf46a6b"
|
||||||
branch_labels = None
|
branch_labels: None = None
|
||||||
depends_on = None
|
depends_on: None = None
|
||||||
|
|
||||||
|
|
||||||
def upgrade() -> None:
|
def upgrade() -> None:
|
||||||
|
@ -64,6 +64,14 @@ TITLE_CONTENT_RATIO = max(
|
|||||||
# A list of languages passed to the LLM to rephase the query
|
# A list of languages passed to the LLM to rephase the query
|
||||||
# For example "English,French,Spanish", be sure to use the "," separator
|
# For example "English,French,Spanish", be sure to use the "," separator
|
||||||
MULTILINGUAL_QUERY_EXPANSION = os.environ.get("MULTILINGUAL_QUERY_EXPANSION") or None
|
MULTILINGUAL_QUERY_EXPANSION = os.environ.get("MULTILINGUAL_QUERY_EXPANSION") or None
|
||||||
|
LANGUAGE_HINT = "\n" + (
|
||||||
|
os.environ.get("LANGUAGE_HINT")
|
||||||
|
or "IMPORTANT: Respond in the same language as my query!"
|
||||||
|
)
|
||||||
|
LANGUAGE_CHAT_NAMING_HINT = (
|
||||||
|
os.environ.get("LANGUAGE_CHAT_NAMING_HINT")
|
||||||
|
or "The name of the conversation must be in the same language as the user query."
|
||||||
|
)
|
||||||
|
|
||||||
# Stops streaming answers back to the UI if this pattern is seen:
|
# Stops streaming answers back to the UI if this pattern is seen:
|
||||||
STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None
|
STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
from langchain.schema.messages import HumanMessage
|
from langchain.schema.messages import HumanMessage
|
||||||
|
|
||||||
from danswer.chat.models import LlmDoc
|
from danswer.chat.models import LlmDoc
|
||||||
|
from danswer.configs.chat_configs import LANGUAGE_HINT
|
||||||
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
||||||
from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE
|
from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE
|
||||||
from danswer.llm.answering.models import PromptConfig
|
from danswer.llm.answering.models import PromptConfig
|
||||||
from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK
|
from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK
|
||||||
from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK
|
from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK
|
||||||
from danswer.prompts.direct_qa_prompts import JSON_PROMPT
|
from danswer.prompts.direct_qa_prompts import JSON_PROMPT
|
||||||
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
|
|
||||||
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
|
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
|
||||||
from danswer.prompts.prompt_utils import add_date_time_to_prompt
|
from danswer.prompts.prompt_utils import add_date_time_to_prompt
|
||||||
from danswer.prompts.prompt_utils import build_complete_context_str
|
from danswer.prompts.prompt_utils import build_complete_context_str
|
||||||
|
@ -188,7 +188,7 @@ Query:
|
|||||||
|
|
||||||
|
|
||||||
CHAT_NAMING = f"""
|
CHAT_NAMING = f"""
|
||||||
Given the following conversation, provide a SHORT name for the conversation.
|
Given the following conversation, provide a SHORT name for the conversation.{{language_hint_or_empty}}
|
||||||
IMPORTANT: TRY NOT TO USE MORE THAN 5 WORDS, MAKE IT AS CONCISE AS POSSIBLE.
|
IMPORTANT: TRY NOT TO USE MORE THAN 5 WORDS, MAKE IT AS CONCISE AS POSSIBLE.
|
||||||
Focus the name on the important keywords to convey the topic of the conversation.
|
Focus the name on the important keywords to convey the topic of the conversation.
|
||||||
|
|
||||||
|
@ -41,12 +41,6 @@ Hint: Make the answer as DETAILED as possible and respond in JSON format! \
|
|||||||
Quotes MUST be EXACT substrings from provided documents!
|
Quotes MUST be EXACT substrings from provided documents!
|
||||||
""".strip()
|
""".strip()
|
||||||
|
|
||||||
|
|
||||||
LANGUAGE_HINT = """
|
|
||||||
IMPORTANT: Respond in the same language as my query!
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
CONTEXT_BLOCK = f"""
|
CONTEXT_BLOCK = f"""
|
||||||
REFERENCE DOCUMENTS:
|
REFERENCE DOCUMENTS:
|
||||||
{GENERAL_SEP_PAT}
|
{GENERAL_SEP_PAT}
|
||||||
|
@ -2,9 +2,7 @@
|
|||||||
|
|
||||||
LANGUAGE_REPHRASE_PROMPT = """
|
LANGUAGE_REPHRASE_PROMPT = """
|
||||||
Translate query to {target_language}.
|
Translate query to {target_language}.
|
||||||
If the query at the end is already in {target_language}, \
|
If the query at the end is already in {target_language}, simply repeat the ORIGINAL query back to me, EXACTLY as is with no edits.
|
||||||
simply repeat the ORIGINAL query back to me, EXACTLY as is with no edits.
|
|
||||||
|
|
||||||
If the query below is not in {target_language}, translate it into {target_language}.
|
If the query below is not in {target_language}, translate it into {target_language}.
|
||||||
|
|
||||||
Query:
|
Query:
|
||||||
|
@ -5,6 +5,7 @@ from typing import cast
|
|||||||
from langchain_core.messages import BaseMessage
|
from langchain_core.messages import BaseMessage
|
||||||
|
|
||||||
from danswer.chat.models import LlmDoc
|
from danswer.chat.models import LlmDoc
|
||||||
|
from danswer.configs.chat_configs import LANGUAGE_HINT
|
||||||
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
||||||
from danswer.configs.constants import DocumentSource
|
from danswer.configs.constants import DocumentSource
|
||||||
from danswer.db.models import Prompt
|
from danswer.db.models import Prompt
|
||||||
@ -12,7 +13,6 @@ from danswer.llm.answering.models import PromptConfig
|
|||||||
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
|
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
|
||||||
from danswer.prompts.chat_prompts import CITATION_REMINDER
|
from danswer.prompts.chat_prompts import CITATION_REMINDER
|
||||||
from danswer.prompts.constants import CODE_BLOCK_PAT
|
from danswer.prompts.constants import CODE_BLOCK_PAT
|
||||||
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
|
|
||||||
from danswer.search.models import InferenceChunk
|
from danswer.search.models import InferenceChunk
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,14 +1,13 @@
|
|||||||
|
from danswer.configs.chat_configs import LANGUAGE_HINT
|
||||||
from danswer.llm.utils import check_number_of_tokens
|
from danswer.llm.utils import check_number_of_tokens
|
||||||
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
|
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
|
||||||
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
|
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
|
||||||
from danswer.prompts.chat_prompts import CITATION_REMINDER
|
from danswer.prompts.chat_prompts import CITATION_REMINDER
|
||||||
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
|
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
|
||||||
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
|
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
|
||||||
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
|
|
||||||
from danswer.prompts.prompt_utils import get_current_llm_day_time
|
from danswer.prompts.prompt_utils import get_current_llm_day_time
|
||||||
|
|
||||||
# tokens outside of the actual persona's "user_prompt" that make up the end
|
# tokens outside of the actual persona's "user_prompt" that make up the end user message
|
||||||
# user message
|
|
||||||
CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT = check_number_of_tokens(
|
CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT = check_number_of_tokens(
|
||||||
CHAT_USER_PROMPT.format(
|
CHAT_USER_PROMPT.format(
|
||||||
context_docs_str="",
|
context_docs_str="",
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
from danswer.chat.chat_utils import combine_message_chain
|
from danswer.chat.chat_utils import combine_message_chain
|
||||||
|
from danswer.configs.chat_configs import LANGUAGE_CHAT_NAMING_HINT
|
||||||
|
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
||||||
from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF
|
from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF
|
||||||
from danswer.db.models import ChatMessage
|
from danswer.db.models import ChatMessage
|
||||||
from danswer.llm.interfaces import LLM
|
from danswer.llm.interfaces import LLM
|
||||||
@ -18,10 +20,18 @@ def get_renamed_conversation_name(
|
|||||||
messages=full_history, token_limit=GEN_AI_HISTORY_CUTOFF
|
messages=full_history, token_limit=GEN_AI_HISTORY_CUTOFF
|
||||||
)
|
)
|
||||||
|
|
||||||
|
language_hint = (
|
||||||
|
f"\n{LANGUAGE_CHAT_NAMING_HINT.strip()}"
|
||||||
|
if bool(MULTILINGUAL_QUERY_EXPANSION)
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
|
||||||
prompt_msgs = [
|
prompt_msgs = [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": CHAT_NAMING.format(chat_history=history_str),
|
"content": CHAT_NAMING.format(
|
||||||
|
language_hint_or_empty=language_hint, chat_history=history_str
|
||||||
|
),
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -63,6 +63,8 @@ services:
|
|||||||
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
||||||
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
|
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
|
||||||
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
|
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
|
||||||
|
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
|
||||||
|
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
|
||||||
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
|
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
|
||||||
# Other services
|
# Other services
|
||||||
- POSTGRES_HOST=relational_db
|
- POSTGRES_HOST=relational_db
|
||||||
@ -140,6 +142,8 @@ services:
|
|||||||
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
||||||
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
|
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
|
||||||
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
|
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
|
||||||
|
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
|
||||||
|
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
|
||||||
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
|
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
|
||||||
# Other Services
|
# Other Services
|
||||||
- POSTGRES_HOST=relational_db
|
- POSTGRES_HOST=relational_db
|
||||||
|
@ -59,6 +59,8 @@ services:
|
|||||||
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
||||||
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
|
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
|
||||||
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
|
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
|
||||||
|
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
|
||||||
|
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
|
||||||
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
|
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
|
||||||
# Other services
|
# Other services
|
||||||
- POSTGRES_HOST=relational_db
|
- POSTGRES_HOST=relational_db
|
||||||
@ -132,6 +134,8 @@ services:
|
|||||||
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
||||||
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
|
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
|
||||||
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
|
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
|
||||||
|
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
|
||||||
|
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
|
||||||
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
|
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
|
||||||
# Other Services
|
# Other Services
|
||||||
- POSTGRES_HOST=relational_db
|
- POSTGRES_HOST=relational_db
|
||||||
|
@ -6,6 +6,9 @@
|
|||||||
|
|
||||||
# Rephrase the user query in specified languages using LLM, use comma separated values
|
# Rephrase the user query in specified languages using LLM, use comma separated values
|
||||||
MULTILINGUAL_QUERY_EXPANSION="English, French"
|
MULTILINGUAL_QUERY_EXPANSION="English, French"
|
||||||
|
# Change the below to suit your specific needs, can be more explicit about the language of the response
|
||||||
|
LANGUAGE_HINT="IMPORTANT: Respond in the same language as my query!"
|
||||||
|
LANGUAGE_CHAT_NAMING_HINT="The name of the conversation must be in the same language as the user query."
|
||||||
|
|
||||||
# A recent MIT license multilingual model: https://huggingface.co/intfloat/multilingual-e5-small
|
# A recent MIT license multilingual model: https://huggingface.co/intfloat/multilingual-e5-small
|
||||||
DOCUMENT_ENCODER_MODEL="intfloat/multilingual-e5-small"
|
DOCUMENT_ENCODER_MODEL="intfloat/multilingual-e5-small"
|
||||||
|
@ -411,6 +411,8 @@ configMap:
|
|||||||
HYBRID_ALPHA: ""
|
HYBRID_ALPHA: ""
|
||||||
EDIT_KEYWORD_QUERY: ""
|
EDIT_KEYWORD_QUERY: ""
|
||||||
MULTILINGUAL_QUERY_EXPANSION: ""
|
MULTILINGUAL_QUERY_EXPANSION: ""
|
||||||
|
LANGUAGE_HINT: ""
|
||||||
|
LANGUAGE_CHAT_NAMING_HINT: ""
|
||||||
QA_PROMPT_OVERRIDE: ""
|
QA_PROMPT_OVERRIDE: ""
|
||||||
# Internet Search Tool
|
# Internet Search Tool
|
||||||
BING_API_KEY: ""
|
BING_API_KEY: ""
|
||||||
|
@ -33,6 +33,8 @@ data:
|
|||||||
HYBRID_ALPHA: ""
|
HYBRID_ALPHA: ""
|
||||||
EDIT_KEYWORD_QUERY: ""
|
EDIT_KEYWORD_QUERY: ""
|
||||||
MULTILINGUAL_QUERY_EXPANSION: ""
|
MULTILINGUAL_QUERY_EXPANSION: ""
|
||||||
|
LANGUAGE_HINT: ""
|
||||||
|
LANGUAGE_CHAT_NAMING_HINT: ""
|
||||||
QA_PROMPT_OVERRIDE: ""
|
QA_PROMPT_OVERRIDE: ""
|
||||||
# Other Services
|
# Other Services
|
||||||
POSTGRES_HOST: "relational-db-service"
|
POSTGRES_HOST: "relational-db-service"
|
||||||
|
Reference in New Issue
Block a user