mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-09 20:39:29 +02:00
Touchup for Multilingual Users (#1725)
This commit is contained in:
parent
062dc98719
commit
8be42a5f98
@ -12,8 +12,8 @@ import fastapi_users_db_sqlalchemy
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "bc9771dccadf"
|
||||
down_revision = "0568ccf46a6b"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
branch_labels: None = None
|
||||
depends_on: None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
|
@ -64,6 +64,14 @@ TITLE_CONTENT_RATIO = max(
|
||||
# A list of languages passed to the LLM to rephase the query
|
||||
# For example "English,French,Spanish", be sure to use the "," separator
|
||||
MULTILINGUAL_QUERY_EXPANSION = os.environ.get("MULTILINGUAL_QUERY_EXPANSION") or None
|
||||
LANGUAGE_HINT = "\n" + (
|
||||
os.environ.get("LANGUAGE_HINT")
|
||||
or "IMPORTANT: Respond in the same language as my query!"
|
||||
)
|
||||
LANGUAGE_CHAT_NAMING_HINT = (
|
||||
os.environ.get("LANGUAGE_CHAT_NAMING_HINT")
|
||||
or "The name of the conversation must be in the same language as the user query."
|
||||
)
|
||||
|
||||
# Stops streaming answers back to the UI if this pattern is seen:
|
||||
STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None
|
||||
|
@ -1,13 +1,13 @@
|
||||
from langchain.schema.messages import HumanMessage
|
||||
|
||||
from danswer.chat.models import LlmDoc
|
||||
from danswer.configs.chat_configs import LANGUAGE_HINT
|
||||
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
||||
from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE
|
||||
from danswer.llm.answering.models import PromptConfig
|
||||
from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK
|
||||
from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK
|
||||
from danswer.prompts.direct_qa_prompts import JSON_PROMPT
|
||||
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
|
||||
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
|
||||
from danswer.prompts.prompt_utils import add_date_time_to_prompt
|
||||
from danswer.prompts.prompt_utils import build_complete_context_str
|
||||
|
@ -188,7 +188,7 @@ Query:
|
||||
|
||||
|
||||
CHAT_NAMING = f"""
|
||||
Given the following conversation, provide a SHORT name for the conversation.
|
||||
Given the following conversation, provide a SHORT name for the conversation.{{language_hint_or_empty}}
|
||||
IMPORTANT: TRY NOT TO USE MORE THAN 5 WORDS, MAKE IT AS CONCISE AS POSSIBLE.
|
||||
Focus the name on the important keywords to convey the topic of the conversation.
|
||||
|
||||
|
@ -41,12 +41,6 @@ Hint: Make the answer as DETAILED as possible and respond in JSON format! \
|
||||
Quotes MUST be EXACT substrings from provided documents!
|
||||
""".strip()
|
||||
|
||||
|
||||
LANGUAGE_HINT = """
|
||||
IMPORTANT: Respond in the same language as my query!
|
||||
"""
|
||||
|
||||
|
||||
CONTEXT_BLOCK = f"""
|
||||
REFERENCE DOCUMENTS:
|
||||
{GENERAL_SEP_PAT}
|
||||
|
@ -2,9 +2,7 @@
|
||||
|
||||
LANGUAGE_REPHRASE_PROMPT = """
|
||||
Translate query to {target_language}.
|
||||
If the query at the end is already in {target_language}, \
|
||||
simply repeat the ORIGINAL query back to me, EXACTLY as is with no edits.
|
||||
|
||||
If the query at the end is already in {target_language}, simply repeat the ORIGINAL query back to me, EXACTLY as is with no edits.
|
||||
If the query below is not in {target_language}, translate it into {target_language}.
|
||||
|
||||
Query:
|
||||
|
@ -5,6 +5,7 @@ from typing import cast
|
||||
from langchain_core.messages import BaseMessage
|
||||
|
||||
from danswer.chat.models import LlmDoc
|
||||
from danswer.configs.chat_configs import LANGUAGE_HINT
|
||||
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
||||
from danswer.configs.constants import DocumentSource
|
||||
from danswer.db.models import Prompt
|
||||
@ -12,7 +13,6 @@ from danswer.llm.answering.models import PromptConfig
|
||||
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
|
||||
from danswer.prompts.chat_prompts import CITATION_REMINDER
|
||||
from danswer.prompts.constants import CODE_BLOCK_PAT
|
||||
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
|
||||
from danswer.search.models import InferenceChunk
|
||||
|
||||
|
||||
|
@ -1,14 +1,13 @@
|
||||
from danswer.configs.chat_configs import LANGUAGE_HINT
|
||||
from danswer.llm.utils import check_number_of_tokens
|
||||
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
|
||||
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
|
||||
from danswer.prompts.chat_prompts import CITATION_REMINDER
|
||||
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
|
||||
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
|
||||
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
|
||||
from danswer.prompts.prompt_utils import get_current_llm_day_time
|
||||
|
||||
# tokens outside of the actual persona's "user_prompt" that make up the end
|
||||
# user message
|
||||
# tokens outside of the actual persona's "user_prompt" that make up the end user message
|
||||
CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT = check_number_of_tokens(
|
||||
CHAT_USER_PROMPT.format(
|
||||
context_docs_str="",
|
||||
|
@ -1,4 +1,6 @@
|
||||
from danswer.chat.chat_utils import combine_message_chain
|
||||
from danswer.configs.chat_configs import LANGUAGE_CHAT_NAMING_HINT
|
||||
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
|
||||
from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF
|
||||
from danswer.db.models import ChatMessage
|
||||
from danswer.llm.interfaces import LLM
|
||||
@ -18,10 +20,18 @@ def get_renamed_conversation_name(
|
||||
messages=full_history, token_limit=GEN_AI_HISTORY_CUTOFF
|
||||
)
|
||||
|
||||
language_hint = (
|
||||
f"\n{LANGUAGE_CHAT_NAMING_HINT.strip()}"
|
||||
if bool(MULTILINGUAL_QUERY_EXPANSION)
|
||||
else ""
|
||||
)
|
||||
|
||||
prompt_msgs = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": CHAT_NAMING.format(chat_history=history_str),
|
||||
"content": CHAT_NAMING.format(
|
||||
language_hint_or_empty=language_hint, chat_history=history_str
|
||||
),
|
||||
},
|
||||
]
|
||||
|
||||
|
@ -63,6 +63,8 @@ services:
|
||||
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
||||
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
|
||||
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
|
||||
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
|
||||
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
|
||||
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
|
||||
# Other services
|
||||
- POSTGRES_HOST=relational_db
|
||||
@ -140,6 +142,8 @@ services:
|
||||
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
||||
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
|
||||
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
|
||||
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
|
||||
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
|
||||
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
|
||||
# Other Services
|
||||
- POSTGRES_HOST=relational_db
|
||||
|
@ -59,6 +59,8 @@ services:
|
||||
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
||||
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
|
||||
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
|
||||
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
|
||||
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
|
||||
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
|
||||
# Other services
|
||||
- POSTGRES_HOST=relational_db
|
||||
@ -132,6 +134,8 @@ services:
|
||||
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
|
||||
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
|
||||
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
|
||||
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
|
||||
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
|
||||
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
|
||||
# Other Services
|
||||
- POSTGRES_HOST=relational_db
|
||||
|
@ -6,6 +6,9 @@
|
||||
|
||||
# Rephrase the user query in specified languages using LLM, use comma separated values
|
||||
MULTILINGUAL_QUERY_EXPANSION="English, French"
|
||||
# Change the below to suit your specific needs, can be more explicit about the language of the response
|
||||
LANGUAGE_HINT="IMPORTANT: Respond in the same language as my query!"
|
||||
LANGUAGE_CHAT_NAMING_HINT="The name of the conversation must be in the same language as the user query."
|
||||
|
||||
# A recent MIT license multilingual model: https://huggingface.co/intfloat/multilingual-e5-small
|
||||
DOCUMENT_ENCODER_MODEL="intfloat/multilingual-e5-small"
|
||||
|
@ -411,6 +411,8 @@ configMap:
|
||||
HYBRID_ALPHA: ""
|
||||
EDIT_KEYWORD_QUERY: ""
|
||||
MULTILINGUAL_QUERY_EXPANSION: ""
|
||||
LANGUAGE_HINT: ""
|
||||
LANGUAGE_CHAT_NAMING_HINT: ""
|
||||
QA_PROMPT_OVERRIDE: ""
|
||||
# Internet Search Tool
|
||||
BING_API_KEY: ""
|
||||
|
@ -33,6 +33,8 @@ data:
|
||||
HYBRID_ALPHA: ""
|
||||
EDIT_KEYWORD_QUERY: ""
|
||||
MULTILINGUAL_QUERY_EXPANSION: ""
|
||||
LANGUAGE_HINT: ""
|
||||
LANGUAGE_CHAT_NAMING_HINT: ""
|
||||
QA_PROMPT_OVERRIDE: ""
|
||||
# Other Services
|
||||
POSTGRES_HOST: "relational-db-service"
|
||||
|
Loading…
x
Reference in New Issue
Block a user