mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-26 20:08:38 +02:00
Fix multilingual .env embedding dimension (#1976)
This commit is contained in:
@@ -1,33 +1,8 @@
|
|||||||
# This env template shows how to configure Danswer for multilingual use
|
# This env template shows how to configure Danswer for custom multilingual use
|
||||||
# In this case, it is configured for French and English
|
# Note that for most use cases it will be enough to configure Danswer multilingual purely through the UI
|
||||||
# To use it, copy it to .env in the docker_compose directory.
|
# See "Search Settings" -> "Advanced" for UI options.
|
||||||
# Feel free to combine it with the other templates to suit your needs
|
# To use it, copy it to .env in the docker_compose directory (or the equivalent environment settings file for your deployment)
|
||||||
|
|
||||||
|
# The following is included with the user prompt. Here's one example but feel free to customize it to your needs:
|
||||||
# Rephrase the user query in specified languages using LLM, use comma separated values
|
LANGUAGE_HINT="IMPORTANT: ALWAYS RESPOND IN FRENCH! Even if the documents and the user query are in English, your response must be in French."
|
||||||
MULTILINGUAL_QUERY_EXPANSION="English, French"
|
|
||||||
# Change the below to suit your specific needs, can be more explicit about the language of the response
|
|
||||||
LANGUAGE_HINT="IMPORTANT: Respond in the same language as my query!"
|
|
||||||
LANGUAGE_CHAT_NAMING_HINT="The name of the conversation must be in the same language as the user query."
|
LANGUAGE_CHAT_NAMING_HINT="The name of the conversation must be in the same language as the user query."
|
||||||
|
|
||||||
# A recent MIT license multilingual model: https://huggingface.co/intfloat/multilingual-e5-small
|
|
||||||
DOCUMENT_ENCODER_MODEL="intfloat/multilingual-e5-small"
|
|
||||||
|
|
||||||
# The model above is trained with the following prefix for queries and passages to improve retrieval
|
|
||||||
# by letting the model know which of the two type is currently being embedded
|
|
||||||
ASYM_QUERY_PREFIX="query: "
|
|
||||||
ASYM_PASSAGE_PREFIX="passage: "
|
|
||||||
|
|
||||||
# Depends model by model, the one shown above is tuned with this as True
|
|
||||||
NORMALIZE_EMBEDDINGS="True"
|
|
||||||
|
|
||||||
# Use LLM to determine if chunks are relevant to the query
|
|
||||||
# May not work well for languages that do not have much training data in the LLM training set
|
|
||||||
# If using a common language like Spanish, French, Chinese, etc. this can be kept turned on
|
|
||||||
DISABLE_LLM_DOC_RELEVANCE="True"
|
|
||||||
|
|
||||||
# Enables fine-grained embeddings for better retrieval
|
|
||||||
# At the cost of indexing speed (~5x slower), query time is same speed
|
|
||||||
# Since reranking is turned off and multilingual retrieval is generally harder
|
|
||||||
# it is advised to turn this one on
|
|
||||||
ENABLE_MULTIPASS_INDEXING="True"
|
|
||||||
|
Reference in New Issue
Block a user