From a2ec1e2cda7e577ee1dcbb4a17c8b5f2f2626777 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sat, 26 Aug 2023 15:35:19 -0700 Subject: [PATCH] Vespa Deployment (#330) Large Change! --- CONTRIBUTING.md | 45 +++-- backend/Dockerfile | 6 +- backend/danswer/background/update.py | 11 +- backend/danswer/configs/app_configs.py | 12 +- backend/danswer/configs/model_configs.py | 13 +- .../danswer/datastores/indexing_pipeline.py | 4 +- backend/danswer/datastores/qdrant/store.py | 8 +- .../vespa/app_config/schemas/danswer_chunk.sd | 12 +- .../datastores/vespa/app_config/services.xml | 10 ++ backend/danswer/datastores/vespa/store.py | 111 +++++++++---- backend/danswer/datastores/vespa/utils.py | 12 ++ backend/danswer/search/semantic_search.py | 32 +++- backend/danswer/server/manage.py | 4 +- .../docker-compose.dev.legacy.yml | 155 ++++++++++++++++++ .../docker_compose/docker-compose.dev.yml | 37 ++--- .../docker-compose.prod.legacy.yml | 128 +++++++++++++++ .../docker_compose/docker-compose.prod.yml | 37 ++--- .../api_server-service-deployment.yaml | 11 +- .../kubernetes/background-deployment.yaml | 11 +- deployment/kubernetes/persistent-volumes.yaml | 33 ---- .../postgres-service-deployment.yaml | 22 ++- .../kubernetes/qdrant-service-deployment.yaml | 43 ----- deployment/kubernetes/secrets.yaml | 5 +- .../typesense-service-deployment.yaml | 48 ------ .../kubernetes/vespa-service-deployment.yaml | 63 +++++++ 25 files changed, 584 insertions(+), 289 deletions(-) create mode 100644 backend/danswer/datastores/vespa/utils.py create mode 100644 deployment/docker_compose/docker-compose.dev.legacy.yml create mode 100644 deployment/docker_compose/docker-compose.prod.legacy.yml delete mode 100644 deployment/kubernetes/qdrant-service-deployment.yaml delete mode 100644 deployment/kubernetes/typesense-service-deployment.yaml create mode 100644 deployment/kubernetes/vespa-service-deployment.yaml diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b20c7a15e..3b9375244 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -45,9 +45,8 @@ We would love to see you there! ## Get Started 🚀 Danswer being a fully functional app, relies on several external pieces of software, specifically: -- Postgres -- Vector DB ([Qdrant](https://github.com/qdrant/qdrant)) -- Search Engine ([Typesense](https://github.com/typesense/typesense)) +- Postgres (Relational DB) +- [Vespa](https://vespa.ai/) (Vector DB/Search Engine) This guide provides instructions to set up the Danswer specific services outside of Docker because it's easier for development purposes but also feel free to just use the containers and update with local changes by providing the @@ -101,14 +100,9 @@ Postgres: docker compose -f docker-compose.dev.yml -p danswer-stack up -d relational_db ``` -Qdrant: +Vespa: ```bash -docker compose -f docker-compose.dev.yml -p danswer-stack up -d vector_db -``` - -Typesense: -```bash -docker compose -f docker-compose.dev.yml -p danswer-stack up -d search_engine +docker compose -f docker-compose.dev.yml -p danswer-stack up -d index ``` @@ -129,37 +123,52 @@ _for Windows, run:_ ``` -The first time running Danswer, you will need to run the DB migrations. +The first time running Danswer, you will need to run the DB migrations for Postgres. Navigate to `danswer/backend` and with the venv active, run: ```bash alembic upgrade head ``` -To run the backend API server, navigate to `danswer/backend` and run: +Additionally, we have to package the Vespa schema deployment: +Nagivate to `danswer/backend/danswer/datastores/vespa/app_config` and run: ```bash -DISABLE_AUTH=True TYPESENSE_API_KEY=typesense_api_key DYNAMIC_CONFIG_DIR_PATH=./dynamic_config_storage uvicorn danswer.main:app --reload --port 8080 +zip -r ../vespa-app.zip . +``` +- Note: If you don't have the `zip` utility, you will need to install it prior to running the above + +To run the backend API server, navigate back to `danswer/backend` and run: +```bash +DISABLE_AUTH=True \ +DYNAMIC_CONFIG_DIR_PATH=./dynamic_config_storage \ +VESPA_DEPLOYMENT_ZIP=./danswer/datastores/vespa/vespa-app.zip \ +uvicorn danswer.main:app --reload --port 8080 ``` _For Windows (for compatibility with both PowerShell and Command Prompt):_ ```bash -powershell -Command " $env:DISABLE_AUTH='True'; $env:TYPESENSE_API_KEY='typesense_api_key'; $env:DYNAMIC_CONFIG_DIR_PATH='./dynamic_config_storage'; uvicorn danswer.main:app --reload --port 8080 " +powershell -Command " + $env:DISABLE_AUTH='True' + $env:DYNAMIC_CONFIG_DIR_PATH='./dynamic_config_storage' + $env:VESPA_DEPLOYMENT_ZIP='./danswer/datastores/vespa/vespa-app.zip' + uvicorn danswer.main:app --reload --port 8080 +" ``` To run the background job to check for connector updates and index documents, navigate to `danswer/backend` and run: ```bash -PYTHONPATH=. TYPESENSE_API_KEY=typesense_api_key DYNAMIC_CONFIG_DIR_PATH=./dynamic_config_storage python danswer/background/update.py +PYTHONPATH=. DYNAMIC_CONFIG_DIR_PATH=./dynamic_config_storage python danswer/background/update.py ``` _For Windows:_ ```bash -powershell -Command " $env:PYTHONPATH='.'; $env:TYPESENSE_API_KEY='typesense_api_key'; $env:DYNAMIC_CONFIG_DIR_PATH='./dynamic_config_storage'; python danswer/background/update.py " +powershell -Command " $env:PYTHONPATH='.'; $env:DYNAMIC_CONFIG_DIR_PATH='./dynamic_config_storage'; python danswer/background/update.py " ``` To run the background job which handles deletion of connectors, navigate to `danswer/backend` and run: ```bash -PYTHONPATH=. TYPESENSE_API_KEY=typesense_api_key DYNAMIC_CONFIG_DIR_PATH=./dynamic_config_storage python danswer/background/connector_deletion.py +PYTHONPATH=. DYNAMIC_CONFIG_DIR_PATH=./dynamic_config_storage python danswer/background/connector_deletion.py ``` _For Windows:_ ```bash -powershell -Command " $env:PYTHONPATH='.'; $env:TYPESENSE_API_KEY='typesense_api_key'; $env:DYNAMIC_CONFIG_DIR_PATH='./dynamic_config_storage'; python danswer/background/connector_deletion.py " +powershell -Command " $env:PYTHONPATH='.'; $env:DYNAMIC_CONFIG_DIR_PATH='./dynamic_config_storage'; python danswer/background/connector_deletion.py " ``` Note: if you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services. diff --git a/backend/Dockerfile b/backend/Dockerfile index 48c8a1b54..cdcdfbe5d 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -3,7 +3,7 @@ FROM python:3.11.4-slim-bullseye RUN apt-get update \ && apt-get install -y git cmake pkg-config libprotobuf-c-dev protobuf-compiler \ libprotobuf-dev libgoogle-perftools-dev libpq-dev build-essential cron curl \ - supervisor \ + supervisor zip \ && rm -rf /var/lib/apt/lists/* COPY ./requirements/default.txt /tmp/requirements.txt @@ -17,6 +17,10 @@ COPY ./alembic /app/alembic COPY ./alembic.ini /app/alembic.ini COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf +WORKDIR /app/danswer/datastores/vespa/app_config +RUN zip -r /app/danswer/vespa-app.zip . +WORKDIR /app + ENV PYTHONPATH /app # By default this container does nothing, it is used by api server and background which specify their own CMD diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index 91bea0dba..e5a2a1515 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -230,6 +230,14 @@ def _run_indexing( db_connector = attempt.connector db_credential = attempt.credential + update_connector_credential_pair( + db_session=db_session, + connector_id=db_connector.id, + credential_id=db_credential.id, + attempt_status=IndexingStatus.IN_PROGRESS, + run_dt=run_dt, + ) + try: net_doc_change = 0 document_count = 0 @@ -238,7 +246,6 @@ def _run_indexing( logger.debug( f"Indexing batch of documents: {[doc.to_short_descriptor() for doc in doc_batch]}" ) - index_user_id = ( None if db_credential.public_doc else db_credential.user_id ) @@ -286,6 +293,8 @@ def _run_indexing( f"Failed connector elapsed time: {time.time() - run_time} seconds" ) mark_attempt_failed(attempt, db_session, failure_reason=str(e)) + # The last attempt won't be marked failed until the next cycle's check for still in-progress attempts + # The connector_credential_pair is marked failed here though to reflect correctly in UI asap update_connector_credential_pair( db_session=db_session, connector_id=attempt.connector.id, diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index fdb3d11ff..c927c538c 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -67,7 +67,7 @@ MASK_CREDENTIAL_PREFIX = ( DOCUMENT_INDEX_NAME = "danswer_index" # Shared by vector/keyword indices # Vespa is now the default document index store for both keyword and vector DOCUMENT_INDEX_TYPE = os.environ.get( - "DOCUMENT_INDEX_TYPE", DocumentIndexType.SPLIT.value + "DOCUMENT_INDEX_TYPE", DocumentIndexType.COMBINED.value ) VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost" VESPA_PORT = os.environ.get("VESPA_PORT") or "8081" @@ -92,11 +92,11 @@ INDEX_BATCH_SIZE = 16 # below are intended to match the env variables names used by the official postgres docker image # https://hub.docker.com/_/postgres -POSTGRES_USER = os.environ.get("POSTGRES_USER", "postgres") -POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD", "password") -POSTGRES_HOST = os.environ.get("POSTGRES_HOST", "localhost") -POSTGRES_PORT = os.environ.get("POSTGRES_PORT", "5432") -POSTGRES_DB = os.environ.get("POSTGRES_DB", "postgres") +POSTGRES_USER = os.environ.get("POSTGRES_USER") or "postgres" +POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD") or "password" +POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "localhost" +POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432" +POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres" ##### diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py index c2df7c346..bbae67bf3 100644 --- a/backend/danswer/configs/model_configs.py +++ b/backend/danswer/configs/model_configs.py @@ -9,20 +9,21 @@ from danswer.configs.constants import ModelHostType # Models used must be MIT or Apache license # Inference/Indexing speed -# https://www.sbert.net/docs/pretrained_models.html -# Use 'multi-qa-MiniLM-L6-cos-v1' if license is added because it is 3x faster (384 dimensional embedding) -# Context size is 256 for above though -DOCUMENT_ENCODER_MODEL = "sentence-transformers/all-distilroberta-v1" -DOC_EMBEDDING_DIM = 768 # Depends on the document encoder model +# https://huggingface.co/thenlper/gte-small +DOCUMENT_ENCODER_MODEL = "thenlper/gte-small" +DOC_EMBEDDING_DIM = 384 # Depends on the document encoder model +NORMALIZE_EMBEDDINGS = False +# Certain models like BGE use a prefix for asymmetric retrievals (query generally shorter than docs) +ASYMMETRIC_PREFIX = "" # https://www.sbert.net/docs/pretrained-models/ce-msmarco.html -# Previously using "cross-encoder/ms-marco-MiniLM-L-6-v2" alone CROSS_ENCODER_MODEL_ENSEMBLE = [ "cross-encoder/ms-marco-MiniLM-L-4-v2", "cross-encoder/ms-marco-TinyBERT-L-2-v2", ] # Better to keep it loose, surfacing more results better than missing results +# Currently unused by Vespa SEARCH_DISTANCE_CUTOFF = 0.1 # Cosine similarity (currently), range of -1 to 1 with -1 being completely opposite QUERY_MAX_CONTEXT_SIZE = 256 diff --git a/backend/danswer/datastores/indexing_pipeline.py b/backend/danswer/datastores/indexing_pipeline.py index 11d00f714..5bcd9d3f1 100644 --- a/backend/danswer/datastores/indexing_pipeline.py +++ b/backend/danswer/datastores/indexing_pipeline.py @@ -102,7 +102,9 @@ def _indexing_pipeline( ) raise e - return len(insertion_records), len(chunks) + return len([r for r in insertion_records if r.already_existed is False]), len( + chunks + ) def build_indexing_pipeline( diff --git a/backend/danswer/datastores/qdrant/store.py b/backend/danswer/datastores/qdrant/store.py index b2e0c403b..c45ea951e 100644 --- a/backend/danswer/datastores/qdrant/store.py +++ b/backend/danswer/datastores/qdrant/store.py @@ -25,7 +25,7 @@ from danswer.datastores.interfaces import VectorIndex from danswer.datastores.qdrant.indexing import index_qdrant_chunks from danswer.datastores.qdrant.utils import create_qdrant_collection from danswer.datastores.qdrant.utils import list_qdrant_collections -from danswer.search.search_utils import get_default_embedding_model +from danswer.search.semantic_search import embed_query from danswer.utils.batching import batch_generator from danswer.utils.clients import get_qdrant_client from danswer.utils.logger import setup_logger @@ -175,11 +175,7 @@ class QdrantIndex(VectorIndex): distance_cutoff: float | None = SEARCH_DISTANCE_CUTOFF, page_size: int = NUM_RETURNED_HITS, ) -> list[InferenceChunk]: - query_embedding = get_default_embedding_model().encode( - query - ) # TODO: make this part of the embedder interface - if not isinstance(query_embedding, list): - query_embedding = query_embedding.tolist() + query_embedding = embed_query(query) filter_conditions = _build_qdrant_filters(user_id, filters) diff --git a/backend/danswer/datastores/vespa/app_config/schemas/danswer_chunk.sd b/backend/danswer/datastores/vespa/app_config/schemas/danswer_chunk.sd index 43afc4da1..2805b3ed3 100644 --- a/backend/danswer/datastores/vespa/app_config/schemas/danswer_chunk.sd +++ b/backend/danswer/datastores/vespa/app_config/schemas/danswer_chunk.sd @@ -14,7 +14,11 @@ schema danswer_chunk { # Need to consider that not every doc has a separable title (ie. slack message) # Set summary options to enable bolding field content type string { - indexing: summary | attribute | index + indexing: summary | index + match { + gram + gram-size: 3 + } index: enable-bm25 } # https://docs.vespa.ai/en/attributes.html potential enum store for speed, but probably not worth it @@ -38,7 +42,7 @@ schema danswer_chunk { field metadata type string { indexing: summary | attribute } - field embeddings type tensor(t{},x[768]) { + field embeddings type tensor(t{},x[384]) { indexing: attribute attribute { distance-metric: angular @@ -66,7 +70,7 @@ schema danswer_chunk { rank-profile semantic_search inherits default { inputs { - query(query_embedding) tensor(x[768]) + query(query_embedding) tensor(x[384]) } first-phase { expression: closeness(field, embeddings) * attribute(boost) @@ -76,7 +80,7 @@ schema danswer_chunk { rank-profile hybrid_search inherits default { inputs { - query(query_embedding) tensor(x[768]) + query(query_embedding) tensor(x[384]) } first-phase { expression: bm25(content) diff --git a/backend/danswer/datastores/vespa/app_config/services.xml b/backend/danswer/datastores/vespa/app_config/services.xml index 1eab58c24..d6b3333d8 100644 --- a/backend/danswer/datastores/vespa/app_config/services.xml +++ b/backend/danswer/datastores/vespa/app_config/services.xml @@ -3,6 +3,9 @@ + + + @@ -15,5 +18,12 @@ + + + + + 0.98 + + diff --git a/backend/danswer/datastores/vespa/store.py b/backend/danswer/datastores/vespa/store.py index b2665559a..dba887951 100644 --- a/backend/danswer/datastores/vespa/store.py +++ b/backend/danswer/datastores/vespa/store.py @@ -1,10 +1,12 @@ import json from collections.abc import Mapping -from functools import partial +from typing import Any from typing import cast from uuid import UUID import requests +from requests import HTTPError +from requests import Response from danswer.chunking.models import IndexChunk from danswer.chunking.models import InferenceChunk @@ -39,7 +41,9 @@ from danswer.datastores.interfaces import DocumentIndex from danswer.datastores.interfaces import DocumentInsertionRecord from danswer.datastores.interfaces import IndexFilter from danswer.datastores.interfaces import UpdateRequest +from danswer.datastores.vespa.utils import remove_invalid_unicode_chars from danswer.search.search_utils import get_default_embedding_model +from danswer.search.semantic_search import embed_query from danswer.utils.logger import setup_logger logger = setup_logger() @@ -123,7 +127,9 @@ def _index_vespa_chunks( chunks: list[IndexChunk], index_attempt_metadata: IndexAttemptMetadata, ) -> set[DocumentInsertionRecord]: - json_header = {"Content-Type": "application/json"} + json_header = { + "Content-Type": "application/json", + } insertion_records: set[DocumentInsertionRecord] = set() cross_connector_document_metadata_map: dict[ str, CrossConnectorDocumentMetadata @@ -151,6 +157,7 @@ def _index_vespa_chunks( ) already_existing_documents.add(document.id) + # No minichunk documents in vespa, minichunk vectors are stored in the chunk itself vespa_chunk_id = str(get_uuid_from_chunk(chunk)) embeddings = chunk.embeddings @@ -159,32 +166,64 @@ def _index_vespa_chunks( for ind, m_c_embed in enumerate(embeddings.mini_chunk_embeddings): embeddings_name_vector_map[f"mini_chunk_{ind}"] = m_c_embed - vespa_document = { - "fields": { - DOCUMENT_ID: document.id, - CHUNK_ID: chunk.chunk_id, - BLURB: chunk.blurb, - CONTENT: chunk.content, - SOURCE_TYPE: str(document.source.value), - SOURCE_LINKS: json.dumps(chunk.source_links), - SEMANTIC_IDENTIFIER: document.semantic_identifier, - SECTION_CONTINUATION: chunk.section_continuation, - METADATA: json.dumps(document.metadata), - EMBEDDINGS: embeddings_name_vector_map, - BOOST: 1, # Boost value always starts at 1 for 0 impact on weight - ALLOWED_USERS: cross_connector_document_metadata_map[ - document.id - ].allowed_users, - ALLOWED_GROUPS: cross_connector_document_metadata_map[ - document.id - ].allowed_user_groups, - } + vespa_document_fields = { + DOCUMENT_ID: document.id, + CHUNK_ID: chunk.chunk_id, + BLURB: chunk.blurb, + CONTENT: chunk.content, + SOURCE_TYPE: str(document.source.value), + SOURCE_LINKS: json.dumps(chunk.source_links), + SEMANTIC_IDENTIFIER: document.semantic_identifier, + SECTION_CONTINUATION: chunk.section_continuation, + METADATA: json.dumps(document.metadata), + EMBEDDINGS: embeddings_name_vector_map, + BOOST: 1, # Boost value always starts at 1 for 0 impact on weight + ALLOWED_USERS: cross_connector_document_metadata_map[ + document.id + ].allowed_users, + ALLOWED_GROUPS: cross_connector_document_metadata_map[ + document.id + ].allowed_user_groups, } - url = f"{DOCUMENT_ID_ENDPOINT}/{vespa_chunk_id}" + def _index_chunk( + url: str, + headers: dict[str, str], + fields: dict[str, Any], + ) -> Response: + logger.debug( + f"Hitting URL '{url}', with headers '{headers}', with fields '{fields}'" + ) + res = requests.post(url, headers=headers, json={"fields": fields}) + try: + res.raise_for_status() + return res + except Exception as e: + logger.error( + f"Failed to index document: '{document.id}'. Got response: '{res.text}'" + ) + raise e - res = requests.post(url, headers=json_header, json=vespa_document) - res.raise_for_status() + vespa_url = f"{DOCUMENT_ID_ENDPOINT}/{vespa_chunk_id}" + try: + _index_chunk(vespa_url, json_header, vespa_document_fields) + except HTTPError as e: + if cast(Response, e.response).status_code != 400: + raise e + + # if it's a 400 response, try again with invalid unicode chars removed + # only doing this on error to avoid having to go through the content + # char by char every time + vespa_document_fields[BLURB] = remove_invalid_unicode_chars( + cast(str, vespa_document_fields[BLURB]) + ) + vespa_document_fields[SEMANTIC_IDENTIFIER] = remove_invalid_unicode_chars( + cast(str, vespa_document_fields[SEMANTIC_IDENTIFIER]) + ) + vespa_document_fields[CONTENT] = remove_invalid_unicode_chars( + cast(str, vespa_document_fields[CONTENT]) + ) + _index_chunk(vespa_url, json_header, vespa_document_fields) insertion_records.add( DocumentInsertionRecord( @@ -218,11 +257,13 @@ def _build_vespa_filters( } for filter_key, filter_val in valid_filters.items(): if isinstance(filter_val, str): - filter_str += f'{filter_key} = "{filter_val}" and ' + filter_str += f'{filter_key} contains "{filter_val}" and ' elif isinstance(filter_val, list): - quoted_elems = [f'"{elem}"' for elem in filter_val] - filters_or = ",".join(quoted_elems) - filter_str += f"{filter_key} in [{filters_or}] and " + eq_elems = [ + f'{filter_key} contains "{elem}"' for elem in filter_val + ] + filters_or = " or ".join(eq_elems) + filter_str += f"({filters_or}) and " else: raise ValueError("Invalid filters provided") return filter_str @@ -271,6 +312,7 @@ class VespaIndex(DocumentIndex): If the changes cannot be applied without conflict with existing data, it will fail with a non 200 """ deploy_url = f"{VESPA_APPLICATION_ENDPOINT}/tenant/default/prepareandactivate" + logger.debug(f"Sending Vespa zip to {deploy_url}") headers = {"Content-Type": "application/zip"} with open(self.deployment_zip, "rb") as f: response = requests.post(deploy_url, headers=headers, data=f) @@ -365,9 +407,7 @@ class VespaIndex(DocumentIndex): + f"({{targetHits: {10 * num_to_retrieve}}}nearestNeighbor(embeddings, query_embedding))" ) - query_embedding = get_default_embedding_model().encode(query) - if not isinstance(query_embedding, list): - query_embedding = query_embedding.tolist() + query_embedding = embed_query(query) params = { "yql": yql, @@ -388,12 +428,11 @@ class VespaIndex(DocumentIndex): yql = ( VespaIndex.yql_base + vespa_where_clauses - + f'{{targetHits: {10 * num_to_retrieve}}}nearestNeighbor(embeddings, query_embedding) or {{grammar: "weakAnd"}}userInput(@query)' + + f"{{targetHits: {10 * num_to_retrieve}}}nearestNeighbor(embeddings, query_embedding) or " + + f'{{grammar: "weakAnd"}}userInput(@query)' ) - query_embedding = get_default_embedding_model().encode(query) - if not isinstance(query_embedding, list): - query_embedding = query_embedding.tolist() + query_embedding = embed_query(query) params = { "yql": yql, diff --git a/backend/danswer/datastores/vespa/utils.py b/backend/danswer/datastores/vespa/utils.py new file mode 100644 index 000000000..228b98dc9 --- /dev/null +++ b/backend/danswer/datastores/vespa/utils.py @@ -0,0 +1,12 @@ +import re + + +_illegal_xml_chars_RE = re.compile( + "[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]" +) + + +def remove_invalid_unicode_chars(text: str) -> str: + """Vespa does not take in unicode chars that aren't valid for XML. + This removes them.""" + return _illegal_xml_chars_RE.sub("", text) diff --git a/backend/danswer/search/semantic_search.py b/backend/danswer/search/semantic_search.py index 6dedaa87c..f7ea4a462 100644 --- a/backend/danswer/search/semantic_search.py +++ b/backend/danswer/search/semantic_search.py @@ -12,7 +12,9 @@ from danswer.configs.app_configs import ENABLE_MINI_CHUNK from danswer.configs.app_configs import MINI_CHUNK_SIZE from danswer.configs.app_configs import NUM_RERANKED_RESULTS from danswer.configs.app_configs import NUM_RETURNED_HITS +from danswer.configs.model_configs import ASYMMETRIC_PREFIX from danswer.configs.model_configs import BATCH_SIZE_ENCODE_CHUNKS +from danswer.configs.model_configs import NORMALIZE_EMBEDDINGS from danswer.datastores.interfaces import DocumentIndex from danswer.datastores.interfaces import IndexFilter from danswer.search.models import Embedder @@ -90,7 +92,10 @@ def retrieve_ranked_documents( for ranked_chunk in ranked_chunks if ranked_chunk.source_links is not None ] - files_log_msg = f"Top links from semantic search: {', '.join(top_docs)}" + + files_log_msg = ( + f"Top links from semantic search: {', '.join(list(dict.fromkeys(top_docs)))}" + ) logger.info(files_log_msg) return ranked_chunks, top_chunks[num_rerank:] @@ -155,7 +160,12 @@ def encode_chunks( embeddings_np: list[numpy.ndarray] = [] for text_batch in text_batches: - embeddings_np.extend(embedding_model.encode(text_batch)) + # Normalize embeddings is only configured via model_configs.py, be sure to use right value for the set loss + embeddings_np.extend( + embedding_model.encode( + text_batch, normalize_embeddings=NORMALIZE_EMBEDDINGS + ) + ) embeddings: list[list[float]] = [embedding.tolist() for embedding in embeddings_np] embedding_ind_start = 0 @@ -177,6 +187,24 @@ def encode_chunks( return embedded_chunks +def embed_query( + query: str, + embedding_model: SentenceTransformer | None = None, + prefix: str = ASYMMETRIC_PREFIX, + normalize_embeddings: bool = NORMALIZE_EMBEDDINGS, +) -> list[float]: + model = embedding_model or get_default_embedding_model() + prefixed_query = prefix + query + query_embedding = model.encode( + prefixed_query, normalize_embeddings=normalize_embeddings + ) + + if not isinstance(query_embedding, list): + query_embedding = query_embedding.tolist() + + return query_embedding + + class DefaultEmbedder(Embedder): def embed(self, chunks: list[DocAwareChunk]) -> list[IndexChunk]: return encode_chunks(chunks) diff --git a/backend/danswer/server/manage.py b/backend/danswer/server/manage.py index 6dbcea67a..695b79821 100644 --- a/backend/danswer/server/manage.py +++ b/backend/danswer/server/manage.py @@ -287,7 +287,7 @@ def get_connector_indexing_status( latest_index_attempt = cc_pair_to_latest_index_attempt.get( (connector.id, credential.id) ) - deletion_attemts = deletion_attempts_by_connector.get(connector.id, []) + deletion_attempts = deletion_attempts_by_connector.get(connector.id, []) indexing_statuses.append( ConnectorIndexingStatus( connector=ConnectorSnapshot.from_connector_db_model(connector), @@ -309,7 +309,7 @@ def get_connector_indexing_status( DeletionAttemptSnapshot.from_deletion_attempt_db_model( deletion_attempt ) - for deletion_attempt in deletion_attemts + for deletion_attempt in deletion_attempts ], is_deletable=check_deletion_attempt_is_allowed( connector_credential_pair=cc_pair diff --git a/deployment/docker_compose/docker-compose.dev.legacy.yml b/deployment/docker_compose/docker-compose.dev.legacy.yml new file mode 100644 index 000000000..1982891f8 --- /dev/null +++ b/deployment/docker_compose/docker-compose.dev.legacy.yml @@ -0,0 +1,155 @@ +# This legacy version runs the app with typesense and qdrant together as the document indices +# Danswer is moving forward with Vespa to offer a consolidated index and a better search experience +version: '3' +services: + api_server: + image: danswer/danswer-backend:latest + build: + context: ../../backend + dockerfile: Dockerfile + command: > + /bin/sh -c "alembic upgrade head && + echo \"Starting Danswer Api Server\" && + uvicorn danswer.main:app --host 0.0.0.0 --port 8080" + depends_on: + - relational_db + - vector_db + - search_engine + restart: always + ports: + - "8080:8080" + environment: + - DOCUMENT_INDEX_TYPE=split + - INTERNAL_MODEL_VERSION=${INTERNAL_MODEL_VERSION:-openai-chat-completion} + - GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-gpt-3.5-turbo} + - GEN_AI_API_KEY=${GEN_AI_API_KEY:-} + - GEN_AI_ENDPOINT=${GEN_AI_ENDPOINT:-} + - GEN_AI_HOST_TYPE=${GEN_AI_HOST_TYPE:-} + - POSTGRES_HOST=relational_db + - QDRANT_HOST=vector_db + - TYPESENSE_HOST=search_engine + - TYPESENSE_API_KEY=${TYPESENSE_API_KEY:-typesense_api_key} + - LOG_LEVEL=${LOG_LEVEL:-info} + - DISABLE_AUTH=${DISABLE_AUTH:-True} + - QA_TIMEOUT=${QA_TIMEOUT:-} + - OAUTH_TYPE=${OAUTH_TYPE:-google} + - OPENID_CONFIG_URL=${OPENID_CONFIG_URL:-} + - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-} + - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-} + - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} + - API_BASE_OPENAI=${API_BASE_OPENAI:-} + - API_TYPE_OPENAI=${API_TYPE_OPENAI:-} + - API_VERSION_OPENAI=${API_VERSION_OPENAI:-} + - AZURE_DEPLOYMENT_ID=${AZURE_DEPLOYMENT_ID:-} + volumes: + - local_dynamic_storage:/home/storage + - file_connector_tmp_storage:/home/file_connector_storage + - model_cache_torch:/root/.cache/torch/ + - model_cache_nltk:/root/nltk_data/ + - model_cache_huggingface:/root/.cache/huggingface/ + background: + image: danswer/danswer-backend:latest + build: + context: ../../backend + dockerfile: Dockerfile + command: /usr/bin/supervisord + depends_on: + - relational_db + - vector_db + restart: always + environment: + - DOCUMENT_INDEX_TYPE=split + - INTERNAL_MODEL_VERSION=${INTERNAL_MODEL_VERSION:-openai-chat-completion} + - GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-gpt-3.5-turbo} + - GEN_AI_API_KEY=${GEN_AI_API_KEY:-} + - GEN_AI_ENDPOINT=${GEN_AI_ENDPOINT:-} + - GEN_AI_HOST_TYPE=${GEN_AI_HOST_TYPE:-} + - POSTGRES_HOST=relational_db + - QDRANT_HOST=vector_db + - TYPESENSE_HOST=search_engine + - TYPESENSE_API_KEY=${TYPESENSE_API_KEY:-typesense_api_key} + - API_BASE_OPENAI=${API_BASE_OPENAI:-} + - API_TYPE_OPENAI=${API_TYPE_OPENAI:-} + - API_VERSION_OPENAI=${API_VERSION_OPENAI:-} + - AZURE_DEPLOYMENT_ID=${AZURE_DEPLOYMENT_ID:-} + - CONTINUE_ON_CONNECTOR_FAILURE=${CONTINUE_ON_CONNECTOR_FAILURE:-} + - NUM_INDEXING_WORKERS=${NUM_INDEXING_WORKERS:-} + - DANSWER_BOT_SLACK_APP_TOKEN=${DANSWER_BOT_SLACK_APP_TOKEN:-} + - DANSWER_BOT_SLACK_BOT_TOKEN=${DANSWER_BOT_SLACK_BOT_TOKEN:-} + - LOG_LEVEL=${LOG_LEVEL:-info} + volumes: + - local_dynamic_storage:/home/storage + - file_connector_tmp_storage:/home/file_connector_storage + - model_cache_torch:/root/.cache/torch/ + - model_cache_nltk:/root/nltk_data/ + - model_cache_huggingface:/root/.cache/huggingface/ + web_server: + image: danswer/danswer-web-server:latest + build: + context: ../../web + dockerfile: Dockerfile + args: + - NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING:-false} + depends_on: + - api_server + restart: always + environment: + - INTERNAL_URL=http://api_server:8080 + - DISABLE_AUTH=${DISABLE_AUTH:-True} + - OAUTH_NAME=${OAUTH_NAME:-} + relational_db: + image: postgres:15.2-alpine + restart: always + environment: + - POSTGRES_USER=${POSTGRES_USER:-postgres} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} + ports: + - "5432:5432" + volumes: + - db_volume:/var/lib/postgresql/data + vector_db: + image: qdrant/qdrant:v1.3.0 + restart: always + environment: + - QDRANT__TELEMETRY_DISABLED=true + ports: + - "6333:6333" + volumes: + - qdrant_volume:/qdrant/storage + search_engine: + image: typesense/typesense:0.24.1 + restart: always + environment: + - TYPESENSE_API_KEY=${TYPESENSE_API_KEY:-typesense_api_key} + - TYPESENSE_DATA_DIR=/typesense/storage + ports: + - "8108:8108" + volumes: + - typesense_volume:/typesense/storage + nginx: + image: nginx:1.23.4-alpine + restart: always + # nginx will immediately crash with `nginx: [emerg] host not found in upstream` + # if api_server / web_server are not up + depends_on: + - api_server + - web_server + environment: + - DOMAIN=localhost + ports: + - "80:80" + - "3000:80" # allow for localhost:3000 usage, since that is the norm + volumes: + - ../data/nginx:/etc/nginx/conf.d + command: > + /bin/sh -c "envsubst '$$\{DOMAIN\}' < /etc/nginx/conf.d/app.conf.template.dev > /etc/nginx/conf.d/app.conf && + while :; do sleep 6h & wait $${!}; nginx -s reload; done & nginx -g \"daemon off;\"" +volumes: + local_dynamic_storage: + file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them + db_volume: + qdrant_volume: + typesense_volume: + model_cache_torch: + model_cache_nltk: + model_cache_huggingface: diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 5bf92a2b9..e5a3873d8 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -11,8 +11,7 @@ services: uvicorn danswer.main:app --host 0.0.0.0 --port 8080" depends_on: - relational_db - - vector_db - - search_engine + - index restart: always ports: - "8080:8080" @@ -23,9 +22,7 @@ services: - GEN_AI_ENDPOINT=${GEN_AI_ENDPOINT:-} - GEN_AI_HOST_TYPE=${GEN_AI_HOST_TYPE:-} - POSTGRES_HOST=relational_db - - QDRANT_HOST=vector_db - - TYPESENSE_HOST=search_engine - - TYPESENSE_API_KEY=${TYPESENSE_API_KEY:-typesense_api_key} + - VESPA_HOST=index - LOG_LEVEL=${LOG_LEVEL:-info} - DISABLE_AUTH=${DISABLE_AUTH:-True} - QA_TIMEOUT=${QA_TIMEOUT:-} @@ -52,7 +49,7 @@ services: command: /usr/bin/supervisord depends_on: - relational_db - - vector_db + - index restart: always environment: - INTERNAL_MODEL_VERSION=${INTERNAL_MODEL_VERSION:-openai-chat-completion} @@ -61,9 +58,7 @@ services: - GEN_AI_ENDPOINT=${GEN_AI_ENDPOINT:-} - GEN_AI_HOST_TYPE=${GEN_AI_HOST_TYPE:-} - POSTGRES_HOST=relational_db - - QDRANT_HOST=vector_db - - TYPESENSE_HOST=search_engine - - TYPESENSE_API_KEY=${TYPESENSE_API_KEY:-typesense_api_key} + - VESPA_HOST=index - API_BASE_OPENAI=${API_BASE_OPENAI:-} - API_TYPE_OPENAI=${API_TYPE_OPENAI:-} - API_VERSION_OPENAI=${API_VERSION_OPENAI:-} @@ -103,25 +98,14 @@ services: - "5432:5432" volumes: - db_volume:/var/lib/postgresql/data - vector_db: - image: qdrant/qdrant:v1.3.0 + index: + image: vespaengine/vespa:8 restart: always - environment: - - QDRANT__TELEMETRY_DISABLED=true ports: - - "6333:6333" + - "19071:19071" + - "8081:8081" volumes: - - qdrant_volume:/qdrant/storage - search_engine: - image: typesense/typesense:0.24.1 - restart: always - environment: - - TYPESENSE_API_KEY=${TYPESENSE_API_KEY:-typesense_api_key} - - TYPESENSE_DATA_DIR=/typesense/storage - ports: - - "8108:8108" - volumes: - - typesense_volume:/typesense/storage + - vespa_volume:/opt/vespa/var nginx: image: nginx:1.23.4-alpine restart: always @@ -144,8 +128,7 @@ volumes: local_dynamic_storage: file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them db_volume: - qdrant_volume: - typesense_volume: + vespa_volume: model_cache_torch: model_cache_nltk: model_cache_huggingface: diff --git a/deployment/docker_compose/docker-compose.prod.legacy.yml b/deployment/docker_compose/docker-compose.prod.legacy.yml new file mode 100644 index 000000000..38a528df6 --- /dev/null +++ b/deployment/docker_compose/docker-compose.prod.legacy.yml @@ -0,0 +1,128 @@ +version: '3' +services: + api_server: + image: danswer/danswer-backend:latest + build: + context: ../../backend + dockerfile: Dockerfile + command: > + /bin/sh -c "alembic upgrade head && + echo \"Starting Danswer Api Server\" && + uvicorn danswer.main:app --host 0.0.0.0 --port 8080" + depends_on: + - relational_db + - vector_db + - search_engine + restart: always + env_file: + - .env + environment: + - DOCUMENT_INDEX_TYPE=split + - POSTGRES_HOST=relational_db + - QDRANT_HOST=vector_db + - TYPESENSE_HOST=search_engine + volumes: + - local_dynamic_storage:/home/storage + - file_connector_tmp_storage:/home/file_connector_storage + - model_cache_torch:/root/.cache/torch/ + - model_cache_nltk:/root/nltk_data/ + - model_cache_huggingface:/root/.cache/huggingface/ + background: + image: danswer/danswer-backend:latest + build: + context: ../../backend + dockerfile: Dockerfile + command: /usr/bin/supervisord + depends_on: + - relational_db + - vector_db + restart: always + env_file: + - .env + environment: + - DOCUMENT_INDEX_TYPE=split + - POSTGRES_HOST=relational_db + - QDRANT_HOST=vector_db + - TYPESENSE_HOST=search_engine + volumes: + - local_dynamic_storage:/home/storage + - file_connector_tmp_storage:/home/file_connector_storage + - model_cache_torch:/root/.cache/torch/ + - model_cache_nltk:/root/nltk_data/ + - model_cache_huggingface:/root/.cache/huggingface/ + web_server: + image: danswer/danswer-web-server:latest + build: + context: ../../web + dockerfile: Dockerfile + depends_on: + - api_server + restart: always + env_file: + - .env + environment: + - INTERNAL_URL=http://api_server:8080 + relational_db: + image: postgres:15.2-alpine + restart: always + # POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file + env_file: + - .env + volumes: + - db_volume:/var/lib/postgresql/data + vector_db: + image: qdrant/qdrant:v1.3.0 + restart: always + env_file: + - .env + environment: + - QDRANT__TELEMETRY_DISABLED=true + volumes: + - qdrant_volume:/qdrant/storage + search_engine: + image: typesense/typesense:0.24.1 + restart: always + # TYPESENSE_API_KEY must be set in .env file + environment: + - TYPESENSE_DATA_DIR=/typesense/storage + env_file: + - .env + volumes: + - typesense_volume:/typesense/storage + nginx: + image: nginx:1.23.4-alpine + restart: always + # nginx will immediately crash with `nginx: [emerg] host not found in upstream` + # if api_server / web_server are not up + depends_on: + - api_server + - web_server + ports: + - "80:80" + - "443:443" + volumes: + - ../data/nginx:/etc/nginx/conf.d + - ../data/certbot/conf:/etc/letsencrypt + - ../data/certbot/www:/var/www/certbot + command: > + /bin/sh -c "envsubst '$$\{DOMAIN\}' < /etc/nginx/conf.d/app.conf.template > /etc/nginx/conf.d/app.conf + && while :; do sleep 6h & wait $${!}; nginx -s reload; done & nginx -g \"daemon off;\"" + env_file: + - .env.nginx + # follows https://pentacent.medium.com/nginx-and-lets-encrypt-with-docker-in-less-than-5-minutes-b4b8a60d3a71 + certbot: + image: certbot/certbot + restart: always + volumes: + - ../data/certbot/conf:/etc/letsencrypt + - ../data/certbot/www:/var/www/certbot + entrypoint: "/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'" +volumes: + local_dynamic_storage: + file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them + db_volume: + qdrant_volume: + typesense_volume: + model_cache_torch: + model_cache_nltk: + model_cache_huggingface: diff --git a/deployment/docker_compose/docker-compose.prod.yml b/deployment/docker_compose/docker-compose.prod.yml index 40349515a..883337592 100644 --- a/deployment/docker_compose/docker-compose.prod.yml +++ b/deployment/docker_compose/docker-compose.prod.yml @@ -11,15 +11,13 @@ services: uvicorn danswer.main:app --host 0.0.0.0 --port 8080" depends_on: - relational_db - - vector_db - - search_engine + - index restart: always env_file: - .env environment: - POSTGRES_HOST=relational_db - - QDRANT_HOST=vector_db - - TYPESENSE_HOST=search_engine + - VESPA_HOST=index volumes: - local_dynamic_storage:/home/storage - file_connector_tmp_storage:/home/file_connector_storage @@ -34,14 +32,13 @@ services: command: /usr/bin/supervisord depends_on: - relational_db - - vector_db + - index restart: always env_file: - .env environment: - POSTGRES_HOST=relational_db - - QDRANT_HOST=vector_db - - TYPESENSE_HOST=search_engine + - VESPA_HOST=index volumes: - local_dynamic_storage:/home/storage - file_connector_tmp_storage:/home/file_connector_storage @@ -68,25 +65,14 @@ services: - .env volumes: - db_volume:/var/lib/postgresql/data - vector_db: - image: qdrant/qdrant:v1.3.0 + index: + image: vespaengine/vespa:8 restart: always - env_file: - - .env - environment: - - QDRANT__TELEMETRY_DISABLED=true + ports: + - "19071:19071" + - "8081:8081" volumes: - - qdrant_volume:/qdrant/storage - search_engine: - image: typesense/typesense:0.24.1 - restart: always - # TYPESENSE_API_KEY must be set in .env file - environment: - - TYPESENSE_DATA_DIR=/typesense/storage - env_file: - - .env - volumes: - - typesense_volume:/typesense/storage + - vespa_volume:/opt/vespa/var nginx: image: nginx:1.23.4-alpine restart: always @@ -119,8 +105,7 @@ volumes: local_dynamic_storage: file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them db_volume: - qdrant_volume: - typesense_volume: + vespa_volume: model_cache_torch: model_cache_nltk: model_cache_huggingface: diff --git a/deployment/kubernetes/api_server-service-deployment.yaml b/deployment/kubernetes/api_server-service-deployment.yaml index 0632690ef..8c7ec4337 100644 --- a/deployment/kubernetes/api_server-service-deployment.yaml +++ b/deployment/kubernetes/api_server-service-deployment.yaml @@ -42,15 +42,8 @@ spec: env: - name: POSTGRES_HOST value: relational-db-service - - name: QDRANT_HOST - value: vector-db-service - - name: TYPESENSE_HOST - value: search-engine-service - - name: TYPESENSE_API_KEY - valueFrom: - secretKeyRef: - name: danswer-secrets - key: typesense_api_key + - name: VESPA_HOST + value: document-index-service - name: GOOGLE_OAUTH_CLIENT_ID valueFrom: secretKeyRef: diff --git a/deployment/kubernetes/background-deployment.yaml b/deployment/kubernetes/background-deployment.yaml index 92905c710..84501fb13 100644 --- a/deployment/kubernetes/background-deployment.yaml +++ b/deployment/kubernetes/background-deployment.yaml @@ -20,15 +20,8 @@ spec: env: - name: POSTGRES_HOST value: relational-db-service - - name: QDRANT_HOST - value: vector-db-service - - name: TYPESENSE_HOST - value: search-engine-service - - name: TYPESENSE_API_KEY - valueFrom: - secretKeyRef: - name: danswer-secrets - key: typesense_api_key + - name: VESPA_HOST + value: document-index-service volumeMounts: - name: dynamic-storage mountPath: /home/storage diff --git a/deployment/kubernetes/persistent-volumes.yaml b/deployment/kubernetes/persistent-volumes.yaml index df8321379..8376b98e6 100644 --- a/deployment/kubernetes/persistent-volumes.yaml +++ b/deployment/kubernetes/persistent-volumes.yaml @@ -19,36 +19,3 @@ spec: resources: requests: storage: 1Gi ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: db-volume-claim -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 1Gi ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: qdrant-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 1Gi ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: typesense-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 1Gi \ No newline at end of file diff --git a/deployment/kubernetes/postgres-service-deployment.yaml b/deployment/kubernetes/postgres-service-deployment.yaml index 5b78aebe6..1cd00fa1b 100644 --- a/deployment/kubernetes/postgres-service-deployment.yaml +++ b/deployment/kubernetes/postgres-service-deployment.yaml @@ -9,13 +9,14 @@ spec: - protocol: TCP port: 5432 targetPort: 5432 - type: ClusterIP + clusterIP: None --- apiVersion: apps/v1 -kind: Deployment +kind: StatefulSet metadata: - name: relational-db-deployment + name: relational-db-statefulset spec: + serviceName: relational-db-service replicas: 1 selector: matchLabels: @@ -43,8 +44,13 @@ spec: - containerPort: 5432 volumeMounts: - mountPath: /var/lib/postgresql/data - name: db-volume - volumes: - - name: db-volume - persistentVolumeClaim: - claimName: db-volume-claim + name: db-storage + volumeClaimTemplates: + - metadata: + name: db-storage + spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + # Adjust the storage request size as needed. + storage: 1Gi diff --git a/deployment/kubernetes/qdrant-service-deployment.yaml b/deployment/kubernetes/qdrant-service-deployment.yaml deleted file mode 100644 index 5fca0ffa9..000000000 --- a/deployment/kubernetes/qdrant-service-deployment.yaml +++ /dev/null @@ -1,43 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: vector-db-service -spec: - selector: - app: qdrant - ports: - - name: qdrant-port - protocol: TCP - port: 6333 - targetPort: 6333 - type: LoadBalancer ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: qdrant-deployment -spec: - replicas: 1 - selector: - matchLabels: - app: qdrant - template: - metadata: - labels: - app: qdrant - spec: - containers: - - name: qdrant - image: qdrant/qdrant:v1.1.3 - ports: - - containerPort: 6333 - volumeMounts: - - name: qdrant-storage - mountPath: /qdrant/storage - env: - - name: QDRANT__TELEMETRY_DISABLED - value: "true" - volumes: - - name: qdrant-storage - persistentVolumeClaim: - claimName: qdrant-pvc diff --git a/deployment/kubernetes/secrets.yaml b/deployment/kubernetes/secrets.yaml index a107e11f5..4935eb843 100644 --- a/deployment/kubernetes/secrets.yaml +++ b/deployment/kubernetes/secrets.yaml @@ -7,6 +7,5 @@ type: Opaque data: postgres_user: cG9zdGdyZXM= # "postgres" base64 encoded postgres_password: cGFzc3dvcmQ= # "password" base64 encoded - typesense_api_key: dHlwZXNlbnNlX2FwaV9rZXk= # "typesense_api_key" base64 encoded - google_oauth_client_id: REPLACE-THIS # You will need to provide this, use echo -n "your-client-id" | base64 - google_oauth_client_secret: REPLACE-THIS # You will need to provide this, use echo -n "your-client-id" | base64 + google_oauth_client_id: MjcwNjk3ODEzMi1iMzZnb20wa2Fhb3I2MmlwYWt2dmRxdm91OGRic2d1cC5hcHBzLmdvb2dsZXVzZXJjb250ZW50LmNvbQ== # You will need to provide this, use echo -n "your-client-id" | base64 + google_oauth_client_secret: R09DU1BYLWlZbDBSN1ZvYnk0cjZJRUFmekRqdjhad0pnOGI= # You will need to provide this, use echo -n "your-client-id" | base64 diff --git a/deployment/kubernetes/typesense-service-deployment.yaml b/deployment/kubernetes/typesense-service-deployment.yaml deleted file mode 100644 index b2ea6231f..000000000 --- a/deployment/kubernetes/typesense-service-deployment.yaml +++ /dev/null @@ -1,48 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: search-engine-service -spec: - selector: - app: typesense - ports: - - name: typesense-port - protocol: TCP - port: 8108 - targetPort: 8108 - type: LoadBalancer ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: typesense-deployment -spec: - replicas: 1 - selector: - matchLabels: - app: typesense - template: - metadata: - labels: - app: typesense - spec: - containers: - - name: typesense - image: typesense/typesense:0.24.1 - ports: - - containerPort: 8108 - env: - - name: TYPESENSE_API_KEY - valueFrom: - secretKeyRef: - name: danswer-secrets - key: typesense_api_key - - name: TYPESENSE_DATA_DIR - value: /typesense/storage - volumeMounts: - - name: typesense-storage - mountPath: /typesense/storage - volumes: - - name: typesense-storage - persistentVolumeClaim: - claimName: typesense-pvc diff --git a/deployment/kubernetes/vespa-service-deployment.yaml b/deployment/kubernetes/vespa-service-deployment.yaml new file mode 100644 index 000000000..03345181d --- /dev/null +++ b/deployment/kubernetes/vespa-service-deployment.yaml @@ -0,0 +1,63 @@ +apiVersion: v1 +kind: Service +metadata: + name: document-index-service +spec: + selector: + app: vespa + ports: + - name: vespa-tenant-port + protocol: TCP + port: 19071 + targetPort: 19071 + - name: vespa-port + protocol: TCP + port: 8081 + targetPort: 8081 + type: LoadBalancer +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: vespa + labels: + app: vespa +spec: + replicas: 1 + serviceName: vespa + selector: + matchLabels: + app: vespa + template: + metadata: + labels: + app: vespa + spec: + containers: + - name: vespa + image: vespaengine/vespa:8 + imagePullPolicy: IfNotPresent + securityContext: + privileged: true + runAsUser: 0 + ports: + - containerPort: 19071 + - containerPort: 8081 + readinessProbe: + httpGet: + path: /state/v1/health + port: 19071 + scheme: HTTP + volumeMounts: + - name: vespa-storage + mountPath: /opt/vespa/var/ + volumeClaimTemplates: + - metadata: + name: vespa-storage + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + # Adjust the storage request size as needed. + storage: 1Gi