mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-28 18:52:31 +01:00
41 lines
1.9 KiB
Plaintext
41 lines
1.9 KiB
Plaintext
# This env template shows how to configure Danswer for multilingual use
|
|
# In this case, it is configured for French and English
|
|
# To use it, copy it to .env in the docker_compose directory.
|
|
# Feel free to combine it with the other templates to suit your needs
|
|
|
|
|
|
# Rephrase the user query in specified languages using LLM, use comma separated values
|
|
MULTILINGUAL_QUERY_EXPANSION="English, French"
|
|
|
|
# A recent MIT license multilingual model: https://huggingface.co/intfloat/multilingual-e5-small
|
|
DOCUMENT_ENCODER_MODEL="intfloat/multilingual-e5-small"
|
|
|
|
# The model above is trained with the following prefix for queries and passages to improve retrieval
|
|
# by letting the model know which of the two type is currently being embedded
|
|
ASYM_QUERY_PREFIX="query: "
|
|
ASYM_PASSAGE_PREFIX="passage: "
|
|
|
|
# Depends model by model, the one shown above is tuned with this as True
|
|
NORMALIZE_EMBEDDINGS="True"
|
|
|
|
# Use LLM to determine if chunks are relevant to the query
|
|
# May not work well for languages that do not have much training data in the LLM training set
|
|
# If using a common language like Spanish, French, Chinese, etc. this can be kept turned on
|
|
DISABLE_LLM_CHUNK_FILTER="True"
|
|
|
|
# The default reranking models are English first
|
|
# There are no great quality French/English reranking models currently so turning this off
|
|
ENABLE_RERANKING_ASYNC_FLOW="False"
|
|
ENABLE_RERANKING_REAL_TIME_FLOW="False"
|
|
|
|
# Enables fine-grained embeddings for better retrieval
|
|
# At the cost of indexing speed (~5x slower), query time is same speed
|
|
# Since reranking is turned off and multilingual retrieval is generally harder
|
|
# it is advised to turn this one on
|
|
ENABLE_MINI_CHUNK="True"
|
|
|
|
# Using a stronger LLM will help with multilingual tasks
|
|
# Since documents may be in multiple languages, and there are additional instructions to respond
|
|
# in the user query's language, it is advised to use the best model possible
|
|
GEN_AI_MODEL_VERSION="gpt-4"
|