# This env template shows how to configure Danswer for multilingual use # In this case, it is configured for French and English # To use it, copy it to .env in the docker_compose directory. # Feel free to combine it with the other templates to suit your needs # Rephrase the user query in specified languages using LLM, use comma separated values MULTILINGUAL_QUERY_EXPANSION="English, French" # A recent MIT license multilingual model: https://huggingface.co/intfloat/multilingual-e5-small DOCUMENT_ENCODER_MODEL="intfloat/multilingual-e5-small" # The model above is trained with the following prefix for queries and passages to improve retrieval # by letting the model know which of the two type is currently being embedded ASYM_QUERY_PREFIX="query: " ASYM_PASSAGE_PREFIX="passage: " # Depends model by model, the one shown above is tuned with this as True NORMALIZE_EMBEDDINGS="True" # Use LLM to determine if chunks are relevant to the query # May not work well for languages that do not have much training data in the LLM training set # If using a common language like Spanish, French, Chinese, etc. this can be kept turned on DISABLE_LLM_CHUNK_FILTER="True" # The default reranking models are English first # There are no great quality French/English reranking models currently so turning this off ENABLE_RERANKING_ASYNC_FLOW="False" ENABLE_RERANKING_REAL_TIME_FLOW="False" # Enables fine-grained embeddings for better retrieval # At the cost of indexing speed (~5x slower), query time is same speed # Since reranking is turned off and multilingual retrieval is generally harder # it is advised to turn this one on ENABLE_MINI_CHUNK="True" # Using a stronger LLM will help with multilingual tasks # Since documents may be in multiple languages, and there are additional instructions to respond # in the user query's language, it is advised to use the best model possible GEN_AI_MODEL_VERSION="gpt-4"