Improve English rephrasing for multilingual use case (#808)

This commit is contained in:
Yuhong Sun 2023-12-03 14:34:12 -08:00 committed by GitHub
parent 5629ca7d96
commit 651de071f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 8 deletions

View File

@ -1,11 +1,11 @@
# Prompts that aren't part of a particular configurable feature
LANGUAGE_REPHRASE_PROMPT = """
Rephrase the query in {target_language}.
If the query is already in the correct language, \
simply repeat the ORIGINAL query back to me, EXACTLY as is with no rephrasing.
NEVER change proper nouns, technical terms, acronyms, or terms you are not familiar with.
IMPORTANT, if the query is already in the target language, DO NOT REPHRASE OR EDIT the query!
Translate query to {target_language}.
If the query at the end is already in {target_language}, \
simply repeat the ORIGINAL query back to me, EXACTLY as is with no edits.
If the query below is not in {target_language}, translate it into {target_language}.
Query:
{query}

View File

@ -332,6 +332,12 @@ def apply_boost(
return final_chunks
def _simplify_text(text: str) -> str:
return "".join(
char for char in text if char not in string.punctuation and not char.isspace()
).lower()
def retrieve_chunks(
query: SearchQuery,
document_index: DocumentIndex,
@ -347,13 +353,22 @@ def retrieve_chunks(
query=query, document_index=document_index, hybrid_alpha=hybrid_alpha
)
else:
simplified_queries = set()
run_queries: list[tuple[Callable, tuple]] = []
# Currently only uses query expansion on multilingual use cases
query_rephrases = rephrase_query(query.query, multilingual_query_expansion)
# Just to be extra sure, add the original query.
query_rephrases.append(query.query)
for rephrase in set(query_rephrases):
q_copy = query.copy(update={'query': rephrase}, deep=True)
# Sometimes the model rephrases the query in the same language with minor changes
# Avoid doing an extra search with the minor changes as this biases the results
simplified_rephrase = _simplify_text(rephrase)
if simplified_rephrase in simplified_queries:
continue
simplified_queries.add(simplified_rephrase)
q_copy = query.copy(update={"query": rephrase}, deep=True)
run_queries.append(
(doc_index_retrieval, (q_copy, document_index, hybrid_alpha))
)

View File

@ -42,7 +42,11 @@ def rephrase_query(
(llm_rephrase_query, (query, language)) for language in languages
]
return run_functions_tuples_in_parallel(functions_with_args)
query_rephrases = run_functions_tuples_in_parallel(functions_with_args)
return query_rephrases
else:
return [llm_rephrase_query(query, language) for language in languages]
query_rephrases = [
llm_rephrase_query(query, language) for language in languages
]
return query_rephrases