Touchup for Multilingual Users (#1725)

This commit is contained in:
Yuhong Sun 2024-06-26 22:44:06 -07:00 committed by GitHub
parent 062dc98719
commit 8be42a5f98
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 42 additions and 18 deletions

View File

@ -12,8 +12,8 @@ import fastapi_users_db_sqlalchemy
# revision identifiers, used by Alembic.
revision = "bc9771dccadf"
down_revision = "0568ccf46a6b"
branch_labels = None
depends_on = None
branch_labels: None = None
depends_on: None = None
def upgrade() -> None:

View File

@ -64,6 +64,14 @@ TITLE_CONTENT_RATIO = max(
# A list of languages passed to the LLM to rephase the query
# For example "English,French,Spanish", be sure to use the "," separator
MULTILINGUAL_QUERY_EXPANSION = os.environ.get("MULTILINGUAL_QUERY_EXPANSION") or None
LANGUAGE_HINT = "\n" + (
os.environ.get("LANGUAGE_HINT")
or "IMPORTANT: Respond in the same language as my query!"
)
LANGUAGE_CHAT_NAMING_HINT = (
os.environ.get("LANGUAGE_CHAT_NAMING_HINT")
or "The name of the conversation must be in the same language as the user query."
)
# Stops streaming answers back to the UI if this pattern is seen:
STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None

View File

@ -1,13 +1,13 @@
from langchain.schema.messages import HumanMessage
from danswer.chat.models import LlmDoc
from danswer.configs.chat_configs import LANGUAGE_HINT
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE
from danswer.llm.answering.models import PromptConfig
from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK
from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK
from danswer.prompts.direct_qa_prompts import JSON_PROMPT
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
from danswer.prompts.prompt_utils import add_date_time_to_prompt
from danswer.prompts.prompt_utils import build_complete_context_str

View File

@ -188,7 +188,7 @@ Query:
CHAT_NAMING = f"""
Given the following conversation, provide a SHORT name for the conversation.
Given the following conversation, provide a SHORT name for the conversation.{{language_hint_or_empty}}
IMPORTANT: TRY NOT TO USE MORE THAN 5 WORDS, MAKE IT AS CONCISE AS POSSIBLE.
Focus the name on the important keywords to convey the topic of the conversation.

View File

@ -41,12 +41,6 @@ Hint: Make the answer as DETAILED as possible and respond in JSON format! \
Quotes MUST be EXACT substrings from provided documents!
""".strip()
LANGUAGE_HINT = """
IMPORTANT: Respond in the same language as my query!
"""
CONTEXT_BLOCK = f"""
REFERENCE DOCUMENTS:
{GENERAL_SEP_PAT}

View File

@ -2,9 +2,7 @@
LANGUAGE_REPHRASE_PROMPT = """
Translate query to {target_language}.
If the query at the end is already in {target_language}, \
simply repeat the ORIGINAL query back to me, EXACTLY as is with no edits.
If the query at the end is already in {target_language}, simply repeat the ORIGINAL query back to me, EXACTLY as is with no edits.
If the query below is not in {target_language}, translate it into {target_language}.
Query:

View File

@ -5,6 +5,7 @@ from typing import cast
from langchain_core.messages import BaseMessage
from danswer.chat.models import LlmDoc
from danswer.configs.chat_configs import LANGUAGE_HINT
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
from danswer.configs.constants import DocumentSource
from danswer.db.models import Prompt
@ -12,7 +13,6 @@ from danswer.llm.answering.models import PromptConfig
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
from danswer.prompts.chat_prompts import CITATION_REMINDER
from danswer.prompts.constants import CODE_BLOCK_PAT
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
from danswer.search.models import InferenceChunk

View File

@ -1,14 +1,13 @@
from danswer.configs.chat_configs import LANGUAGE_HINT
from danswer.llm.utils import check_number_of_tokens
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
from danswer.prompts.chat_prompts import CITATION_REMINDER
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
from danswer.prompts.prompt_utils import get_current_llm_day_time
# tokens outside of the actual persona's "user_prompt" that make up the end
# user message
# tokens outside of the actual persona's "user_prompt" that make up the end user message
CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT = check_number_of_tokens(
CHAT_USER_PROMPT.format(
context_docs_str="",

View File

@ -1,4 +1,6 @@
from danswer.chat.chat_utils import combine_message_chain
from danswer.configs.chat_configs import LANGUAGE_CHAT_NAMING_HINT
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF
from danswer.db.models import ChatMessage
from danswer.llm.interfaces import LLM
@ -18,10 +20,18 @@ def get_renamed_conversation_name(
messages=full_history, token_limit=GEN_AI_HISTORY_CUTOFF
)
language_hint = (
f"\n{LANGUAGE_CHAT_NAMING_HINT.strip()}"
if bool(MULTILINGUAL_QUERY_EXPANSION)
else ""
)
prompt_msgs = [
{
"role": "user",
"content": CHAT_NAMING.format(chat_history=history_str),
"content": CHAT_NAMING.format(
language_hint_or_empty=language_hint, chat_history=history_str
),
},
]

View File

@ -63,6 +63,8 @@ services:
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
# Other services
- POSTGRES_HOST=relational_db
@ -140,6 +142,8 @@ services:
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
# Other Services
- POSTGRES_HOST=relational_db

View File

@ -59,6 +59,8 @@ services:
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
# Other services
- POSTGRES_HOST=relational_db
@ -132,6 +134,8 @@ services:
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
# Other Services
- POSTGRES_HOST=relational_db

View File

@ -6,6 +6,9 @@
# Rephrase the user query in specified languages using LLM, use comma separated values
MULTILINGUAL_QUERY_EXPANSION="English, French"
# Change the below to suit your specific needs, can be more explicit about the language of the response
LANGUAGE_HINT="IMPORTANT: Respond in the same language as my query!"
LANGUAGE_CHAT_NAMING_HINT="The name of the conversation must be in the same language as the user query."
# A recent MIT license multilingual model: https://huggingface.co/intfloat/multilingual-e5-small
DOCUMENT_ENCODER_MODEL="intfloat/multilingual-e5-small"

View File

@ -411,6 +411,8 @@ configMap:
HYBRID_ALPHA: ""
EDIT_KEYWORD_QUERY: ""
MULTILINGUAL_QUERY_EXPANSION: ""
LANGUAGE_HINT: ""
LANGUAGE_CHAT_NAMING_HINT: ""
QA_PROMPT_OVERRIDE: ""
# Internet Search Tool
BING_API_KEY: ""

View File

@ -33,6 +33,8 @@ data:
HYBRID_ALPHA: ""
EDIT_KEYWORD_QUERY: ""
MULTILINGUAL_QUERY_EXPANSION: ""
LANGUAGE_HINT: ""
LANGUAGE_CHAT_NAMING_HINT: ""
QA_PROMPT_OVERRIDE: ""
# Other Services
POSTGRES_HOST: "relational-db-service"