Touchup for Multilingual Users (#1725)

This commit is contained in:
Yuhong Sun
2024-06-26 22:44:06 -07:00
committed by GitHub
parent 062dc98719
commit 8be42a5f98
14 changed files with 42 additions and 18 deletions

View File

@ -12,8 +12,8 @@ import fastapi_users_db_sqlalchemy
# revision identifiers, used by Alembic. # revision identifiers, used by Alembic.
revision = "bc9771dccadf" revision = "bc9771dccadf"
down_revision = "0568ccf46a6b" down_revision = "0568ccf46a6b"
branch_labels = None branch_labels: None = None
depends_on = None depends_on: None = None
def upgrade() -> None: def upgrade() -> None:

View File

@ -64,6 +64,14 @@ TITLE_CONTENT_RATIO = max(
# A list of languages passed to the LLM to rephase the query # A list of languages passed to the LLM to rephase the query
# For example "English,French,Spanish", be sure to use the "," separator # For example "English,French,Spanish", be sure to use the "," separator
MULTILINGUAL_QUERY_EXPANSION = os.environ.get("MULTILINGUAL_QUERY_EXPANSION") or None MULTILINGUAL_QUERY_EXPANSION = os.environ.get("MULTILINGUAL_QUERY_EXPANSION") or None
LANGUAGE_HINT = "\n" + (
os.environ.get("LANGUAGE_HINT")
or "IMPORTANT: Respond in the same language as my query!"
)
LANGUAGE_CHAT_NAMING_HINT = (
os.environ.get("LANGUAGE_CHAT_NAMING_HINT")
or "The name of the conversation must be in the same language as the user query."
)
# Stops streaming answers back to the UI if this pattern is seen: # Stops streaming answers back to the UI if this pattern is seen:
STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None STOP_STREAM_PAT = os.environ.get("STOP_STREAM_PAT") or None

View File

@ -1,13 +1,13 @@
from langchain.schema.messages import HumanMessage from langchain.schema.messages import HumanMessage
from danswer.chat.models import LlmDoc from danswer.chat.models import LlmDoc
from danswer.configs.chat_configs import LANGUAGE_HINT
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE
from danswer.llm.answering.models import PromptConfig from danswer.llm.answering.models import PromptConfig
from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK
from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK
from danswer.prompts.direct_qa_prompts import JSON_PROMPT from danswer.prompts.direct_qa_prompts import JSON_PROMPT
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT
from danswer.prompts.prompt_utils import add_date_time_to_prompt from danswer.prompts.prompt_utils import add_date_time_to_prompt
from danswer.prompts.prompt_utils import build_complete_context_str from danswer.prompts.prompt_utils import build_complete_context_str

View File

@ -188,7 +188,7 @@ Query:
CHAT_NAMING = f""" CHAT_NAMING = f"""
Given the following conversation, provide a SHORT name for the conversation. Given the following conversation, provide a SHORT name for the conversation.{{language_hint_or_empty}}
IMPORTANT: TRY NOT TO USE MORE THAN 5 WORDS, MAKE IT AS CONCISE AS POSSIBLE. IMPORTANT: TRY NOT TO USE MORE THAN 5 WORDS, MAKE IT AS CONCISE AS POSSIBLE.
Focus the name on the important keywords to convey the topic of the conversation. Focus the name on the important keywords to convey the topic of the conversation.

View File

@ -41,12 +41,6 @@ Hint: Make the answer as DETAILED as possible and respond in JSON format! \
Quotes MUST be EXACT substrings from provided documents! Quotes MUST be EXACT substrings from provided documents!
""".strip() """.strip()
LANGUAGE_HINT = """
IMPORTANT: Respond in the same language as my query!
"""
CONTEXT_BLOCK = f""" CONTEXT_BLOCK = f"""
REFERENCE DOCUMENTS: REFERENCE DOCUMENTS:
{GENERAL_SEP_PAT} {GENERAL_SEP_PAT}

View File

@ -2,9 +2,7 @@
LANGUAGE_REPHRASE_PROMPT = """ LANGUAGE_REPHRASE_PROMPT = """
Translate query to {target_language}. Translate query to {target_language}.
If the query at the end is already in {target_language}, \ If the query at the end is already in {target_language}, simply repeat the ORIGINAL query back to me, EXACTLY as is with no edits.
simply repeat the ORIGINAL query back to me, EXACTLY as is with no edits.
If the query below is not in {target_language}, translate it into {target_language}. If the query below is not in {target_language}, translate it into {target_language}.
Query: Query:

View File

@ -5,6 +5,7 @@ from typing import cast
from langchain_core.messages import BaseMessage from langchain_core.messages import BaseMessage
from danswer.chat.models import LlmDoc from danswer.chat.models import LlmDoc
from danswer.configs.chat_configs import LANGUAGE_HINT
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
from danswer.configs.constants import DocumentSource from danswer.configs.constants import DocumentSource
from danswer.db.models import Prompt from danswer.db.models import Prompt
@ -12,7 +13,6 @@ from danswer.llm.answering.models import PromptConfig
from danswer.prompts.chat_prompts import ADDITIONAL_INFO from danswer.prompts.chat_prompts import ADDITIONAL_INFO
from danswer.prompts.chat_prompts import CITATION_REMINDER from danswer.prompts.chat_prompts import CITATION_REMINDER
from danswer.prompts.constants import CODE_BLOCK_PAT from danswer.prompts.constants import CODE_BLOCK_PAT
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
from danswer.search.models import InferenceChunk from danswer.search.models import InferenceChunk

View File

@ -1,14 +1,13 @@
from danswer.configs.chat_configs import LANGUAGE_HINT
from danswer.llm.utils import check_number_of_tokens from danswer.llm.utils import check_number_of_tokens
from danswer.prompts.chat_prompts import ADDITIONAL_INFO from danswer.prompts.chat_prompts import ADDITIONAL_INFO
from danswer.prompts.chat_prompts import CHAT_USER_PROMPT from danswer.prompts.chat_prompts import CHAT_USER_PROMPT
from danswer.prompts.chat_prompts import CITATION_REMINDER from danswer.prompts.chat_prompts import CITATION_REMINDER
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
from danswer.prompts.prompt_utils import get_current_llm_day_time from danswer.prompts.prompt_utils import get_current_llm_day_time
# tokens outside of the actual persona's "user_prompt" that make up the end # tokens outside of the actual persona's "user_prompt" that make up the end user message
# user message
CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT = check_number_of_tokens( CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT = check_number_of_tokens(
CHAT_USER_PROMPT.format( CHAT_USER_PROMPT.format(
context_docs_str="", context_docs_str="",

View File

@ -1,4 +1,6 @@
from danswer.chat.chat_utils import combine_message_chain from danswer.chat.chat_utils import combine_message_chain
from danswer.configs.chat_configs import LANGUAGE_CHAT_NAMING_HINT
from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION
from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF
from danswer.db.models import ChatMessage from danswer.db.models import ChatMessage
from danswer.llm.interfaces import LLM from danswer.llm.interfaces import LLM
@ -18,10 +20,18 @@ def get_renamed_conversation_name(
messages=full_history, token_limit=GEN_AI_HISTORY_CUTOFF messages=full_history, token_limit=GEN_AI_HISTORY_CUTOFF
) )
language_hint = (
f"\n{LANGUAGE_CHAT_NAMING_HINT.strip()}"
if bool(MULTILINGUAL_QUERY_EXPANSION)
else ""
)
prompt_msgs = [ prompt_msgs = [
{ {
"role": "user", "role": "user",
"content": CHAT_NAMING.format(chat_history=history_str), "content": CHAT_NAMING.format(
language_hint_or_empty=language_hint, chat_history=history_str
),
}, },
] ]

View File

@ -63,6 +63,8 @@ services:
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector) - HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-} - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-} - MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-} - QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
# Other services # Other services
- POSTGRES_HOST=relational_db - POSTGRES_HOST=relational_db
@ -140,6 +142,8 @@ services:
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector) - HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-} - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-} - MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-} - QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
# Other Services # Other Services
- POSTGRES_HOST=relational_db - POSTGRES_HOST=relational_db

View File

@ -59,6 +59,8 @@ services:
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector) - HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-} - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-} - MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-} - QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
# Other services # Other services
- POSTGRES_HOST=relational_db - POSTGRES_HOST=relational_db
@ -132,6 +134,8 @@ services:
- HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector) - HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector)
- EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-} - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
- MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-} - MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-}
- LANGUAGE_HINT=${LANGUAGE_HINT:-}
- LANGUAGE_CHAT_NAMING_HINT=${LANGUAGE_CHAT_NAMING_HINT:-}
- QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-} - QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-}
# Other Services # Other Services
- POSTGRES_HOST=relational_db - POSTGRES_HOST=relational_db

View File

@ -6,6 +6,9 @@
# Rephrase the user query in specified languages using LLM, use comma separated values # Rephrase the user query in specified languages using LLM, use comma separated values
MULTILINGUAL_QUERY_EXPANSION="English, French" MULTILINGUAL_QUERY_EXPANSION="English, French"
# Change the below to suit your specific needs, can be more explicit about the language of the response
LANGUAGE_HINT="IMPORTANT: Respond in the same language as my query!"
LANGUAGE_CHAT_NAMING_HINT="The name of the conversation must be in the same language as the user query."
# A recent MIT license multilingual model: https://huggingface.co/intfloat/multilingual-e5-small # A recent MIT license multilingual model: https://huggingface.co/intfloat/multilingual-e5-small
DOCUMENT_ENCODER_MODEL="intfloat/multilingual-e5-small" DOCUMENT_ENCODER_MODEL="intfloat/multilingual-e5-small"

View File

@ -411,6 +411,8 @@ configMap:
HYBRID_ALPHA: "" HYBRID_ALPHA: ""
EDIT_KEYWORD_QUERY: "" EDIT_KEYWORD_QUERY: ""
MULTILINGUAL_QUERY_EXPANSION: "" MULTILINGUAL_QUERY_EXPANSION: ""
LANGUAGE_HINT: ""
LANGUAGE_CHAT_NAMING_HINT: ""
QA_PROMPT_OVERRIDE: "" QA_PROMPT_OVERRIDE: ""
# Internet Search Tool # Internet Search Tool
BING_API_KEY: "" BING_API_KEY: ""

View File

@ -33,6 +33,8 @@ data:
HYBRID_ALPHA: "" HYBRID_ALPHA: ""
EDIT_KEYWORD_QUERY: "" EDIT_KEYWORD_QUERY: ""
MULTILINGUAL_QUERY_EXPANSION: "" MULTILINGUAL_QUERY_EXPANSION: ""
LANGUAGE_HINT: ""
LANGUAGE_CHAT_NAMING_HINT: ""
QA_PROMPT_OVERRIDE: "" QA_PROMPT_OVERRIDE: ""
# Other Services # Other Services
POSTGRES_HOST: "relational-db-service" POSTGRES_HOST: "relational-db-service"