Cohere (#3111)

* add cohere default * finalize * minor improvement * update * update * update configs * ensure we properly expose name(space) for slackbot * update config * config
2025-03-26 17:51:54 +01:00 · 2024-11-13 17:58:54 -08:00 · 2024-11-13 17:58:54 -08:00 · 7c841051ed
commit 7c841051ed
parent 6e91964924
15 changed files with 3852 additions and 10849 deletions
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@ -503,3 +503,7 @@ _API_KEY_HASH_ROUNDS_RAW = os.environ.get("API_KEY_HASH_ROUNDS")
 API_KEY_HASH_ROUNDS = (
    int(_API_KEY_HASH_ROUNDS_RAW) if _API_KEY_HASH_ROUNDS_RAW else None
 )
+
+
+POD_NAME = os.environ.get("POD_NAME")
+POD_NAMESPACE = os.environ.get("POD_NAMESPACE")
--- a/backend/danswer/danswerbot/slack/listener.py
+++ b/backend/danswer/danswerbot/slack/listener.py
@ -17,6 +17,8 @@ from slack_sdk import WebClient
 from slack_sdk.socket_mode.request import SocketModeRequest
 from slack_sdk.socket_mode.response import SocketModeResponse

+from danswer.configs.app_configs import POD_NAME
+from danswer.configs.app_configs import POD_NAMESPACE
 from danswer.configs.constants import DanswerRedisLocks
 from danswer.configs.constants import MessageType
 from danswer.configs.danswerbot_configs import DANSWER_BOT_REPHRASE_MESSAGE
@ -85,7 +87,9 @@ logger = setup_logger()

 # Prometheus metric for HPA
 active_tenants_gauge = Gauge(
-    "active_tenants", "Number of active tenants handled by this pod"
+    "active_tenants",
+    "Number of active tenants handled by this pod",
+    ["namespace", "pod"],
 )

 # In rare cases, some users have been experiencing a massive amount of trivial messages coming through
@ -148,7 +152,9 @@ class SlackbotHandler:
        while not self._shutdown_event.is_set():
            try:
                self.acquire_tenants()
-                active_tenants_gauge.set(len(self.tenant_ids))
+                active_tenants_gauge.labels(namespace=POD_NAMESPACE, pod=POD_NAME).set(
+                    len(self.tenant_ids)
+                )
                logger.debug(f"Current active tenants: {len(self.tenant_ids)}")
            except Exception as e:
                logger.exception(f"Error in Slack acquisition: {e}")
--- a/backend/danswer/seeding/initial_docs.json
+++ b/backend/danswer/seeding/initial_docs.json
--- a/backend/danswer/seeding/initial_docs_cohere.json
+++ b/backend/danswer/seeding/initial_docs_cohere.json
@ -0,0 +1,44 @@
+[
+  {
+    "url": "https://docs.danswer.dev/more/use_cases/overview",
+    "title": "Use Cases Overview",
+    "content": "How to leverage Danswer in your organization\n\nDanswer Overview\nDanswer is the AI Assistant connected to your organization's docs, apps, and people. Danswer makes Generative AI more versatile for work by enabling new types of questions like \"What is the most common feature request we've heard from customers this month\". Whereas other AI systems have no context of your team and are generally unhelpful with work related questions, Danswer makes it possible to ask these questions in natural language and get back answers in seconds.\n\nDanswer can connect to +30 different tools and the use cases are not limited to the ones in the following pages. The highlighted use cases are for inspiration and come from feedback gathered from our users and customers.\n\n\nCommon Getting Started Questions:\n\nWhy are these docs connected in my Danswer deployment?\nAnswer: This is just an example of how connectors work in Danswer. You can connect up your own team's knowledge and you will be able to ask questions unique to your organization. Danswer will keep all of the knowledge up to date and in sync with your connected applications.\n\nIs my data being sent anywhere when I connect it up to Danswer?\nAnswer: No! Danswer is built with data security as our highest priority. We open sourced it so our users can know exactly what is going on with their data. By default all of the document processing happens within Danswer. The only time it is sent outward is for the GenAI call to generate answers.\n\nWhere is the feature for auto sync-ing document level access permissions from all connected sources?\nAnswer: This falls under the Enterprise Edition set of Danswer features built on top of the MIT/community edition. If you are on Danswer Cloud, you have access to them by default. If you're running it yourself, reach out to the Danswer team to receive access.",
+    "chunk_ind": 0
+  },
+  {
+    "url": "https://docs.danswer.dev/more/use_cases/enterprise_search",
+    "title": "Enterprise Search",
+    "content": "Value of Enterprise Search with Danswer\n\nWhat is Enterprise Search and why is it Important?\nAn Enterprise Search system gives team members a single place to access all of the disparate knowledge of an organization. Critical information is saved across a host of channels like call transcripts with prospects, engineering design docs, IT runbooks, customer support email exchanges, project management tickets, and more. As fast moving teams scale up, information gets spread out and more disorganized.\n\nSince it quickly becomes infeasible to check across every source, decisions get made on incomplete information, employee satisfaction decreases, and the most valuable members of your team are tied up with constant distractions as junior teammates are unable to unblock themselves. Danswer solves this problem by letting anyone on the team access all of the knowledge across your organization in a permissioned and secure way. Users can ask questions in natural language and get back answers and documents across all of the connected sources instantly.\n\nWhat's the real cost?\nA typical knowledge worker spends over 2 hours a week on search, but more than that, the cost of incomplete or incorrect information can be extremely high. Customer support/success that isn't able to find the reference to similar cases could cause hours or even days of delay leading to lower customer satisfaction or in the worst case - churn. An account exec not realizing that a prospect had previously mentioned a specific need could lead to lost deals. An engineer not realizing a similar feature had previously been built could result in weeks of wasted development time and tech debt with duplicate implementation. With a lack of knowledge, your whole organization is navigating in the dark - inefficient and mistake prone.",
+    "chunk_ind": 0
+  },
+  {
+    "url": "https://docs.danswer.dev/more/use_cases/enterprise_search",
+    "title": "Enterprise Search",
+    "content": "More than Search\nWhen analyzing the entire corpus of knowledge within your company is as easy as asking a question in a search bar, your entire team can stay informed and up to date. Danswer also makes it trivial to identify where knowledge is well documented and where it is lacking. Team members who are centers of knowledge can begin to effectively document their expertise since it is no longer being thrown into a black hole. All of this allows the organization to achieve higher efficiency and drive business outcomes.\n\nWith Generative AI, the entire user experience has evolved as well. For example, instead of just finding similar cases for your customer support team to reference, Danswer breaks down the issue and explains it so that even the most junior members can understand it. This in turn lets them give the most holistic and technically accurate response possible to your customers. On the other end, even the super stars of your sales team will not be able to review 10 hours of transcripts before hopping on that critical call, but Danswer can easily parse through it in mere seconds and give crucial context to help your team close.",
+    "chunk_ind": 0
+  },
+  {
+    "url": "https://docs.danswer.dev/more/use_cases/ai_platform",
+    "title": "AI Platform",
+    "content": "Build AI Agents powered by the knowledge and workflows specific to your organization.\n\nBeyond Answers\nAgents enabled by generative AI and reasoning capable models are helping teams to automate their work. Danswer is helping teams make it happen. Danswer provides out of the box user chat sessions, attaching custom tools, handling LLM reasoning, code execution, data analysis, referencing internal knowledge, and much more.\n\nDanswer as a platform is not a no-code agent builder. We are made by developers for developers and this gives your team the full flexibility and power to create agents not constrained by blocks and simple logic paths.\n\nFlexibility and Extensibility\nDanswer is open source and completely whitebox. This not only gives transparency to what happens within the system but also means that your team can directly modify the source code to suit your unique needs.",
+    "chunk_ind": 0
+  },
+  {
+    "url": "https://docs.danswer.dev/more/use_cases/customer_support",
+    "title": "Customer Support",
+    "content": "Help your customer support team instantly answer any question across your entire product.\n\nAI Enabled Support\nCustomer support agents have one of the highest breadth jobs. They field requests that cover the entire surface area of the product and need to help your users find success on extremely short timelines. Because they're not the same people who designed or built the system, they often lack the depth of understanding needed - resulting in delays and escalations to other teams. Modern teams are leveraging AI to help their CS team optimize the speed and quality of these critical customer-facing interactions.\n\nThe Importance of Context\nThere are two critical components of AI copilots for customer support. The first is that the AI system needs to be connected with as much information as possible (not just support tools like Zendesk or Intercom) and that the knowledge needs to be as fresh as possible. Sometimes a fix might even be in places rarely checked by CS such as pull requests in a code repository. The second critical component is the ability of the AI system to break down difficult concepts and convoluted processes into more digestible descriptions and for your team members to be able to chat back and forth with the system to build a better understanding.\n\nDanswer takes care of both of these. The system connects up to over 30+ different applications and the knowledge is pulled in constantly so that the information access is always up to date.",
+    "chunk_ind": 0
+  },
+  {
+    "url": "https://docs.danswer.dev/more/use_cases/sales",
+    "title": "Sales",
+    "content": "Keep your team up to date on every conversation and update so they can close.\n\nRecall Every Detail\nBeing able to instantly revisit every detail of any call without reading transcripts is helping Sales teams provide more tailored pitches, build stronger relationships, and close more deals. Instead of searching and reading through hours of transcripts in preparation for a call, your team can now ask Danswer \"What specific features was ACME interested in seeing for the demo\". Since your team doesn't have time to read every transcript prior to a call, Danswer provides a more thorough summary because it can instantly parse hundreds of pages and distill out the relevant information. Even for fast lookups it becomes much more convenient - for example to brush up on connection building topics by asking \"What rapport building topic did we chat about in the last call with ACME\".\n\nKnow Every Product Update\nIt is impossible for Sales teams to keep up with every product update. Because of this, when a prospect has a question that the Sales team does not know, they have no choice but to rely on the Product and Engineering orgs to get an authoritative answer. Not only is this distracting to the other teams, it also slows down the time to respond to the prospect (and as we know, time is the biggest killer of deals). With Danswer, it is even possible to get answers live on call because of how fast accessing information becomes. A question like \"Have we shipped the Microsoft AD integration yet?\" can now be answered in seconds meaning that prospects can get answers while on the call instead of asynchronously and sales cycles are reduced as a result.",
+    "chunk_ind": 0
+  },
+  {
+    "url": "https://docs.danswer.dev/more/use_cases/operations",
+    "title": "Operations",
+    "content": "Double the productivity of your Ops teams like IT, HR, etc.\n\nAutomatically Resolve Tickets\nModern teams are leveraging AI to auto-resolve up to 50% of tickets. Whether it is an employee asking about benefits details or how to set up the VPN for remote work, Danswer can help your team help themselves. This frees up your team to do the real impactful work of landing star candidates or improving your internal processes.\n\nAI Aided Onboarding\nOne of the periods where your team needs the most help is when they're just ramping up. Instead of feeling lost in dozens of new tools, Danswer gives them a single place where they can ask about anything in natural language. Whether it's how to set up their work environment or what their onboarding goals are, Danswer can walk them through every step with the help of Generative AI. This lets your team feel more empowered and gives time back to the more seasoned members of your team to focus on moving the needle.",
+    "chunk_ind": 0
+  }
+]
--- a/backend/danswer/seeding/load_docs.py
+++ b/backend/danswer/seeding/load_docs.py
@ -32,7 +32,7 @@ from danswer.key_value_store.interface import KvKeyNotFoundError
 from danswer.server.documents.models import ConnectorBase
 from danswer.utils.logger import setup_logger
 from danswer.utils.retry_wrapper import retry_builder
-
+from danswer.utils.variable_functionality import fetch_versioned_implementation

 logger = setup_logger()

@ -91,7 +91,21 @@ def _create_indexable_chunks(
    return list(ids_to_documents.values()), chunks


-def seed_initial_documents(db_session: Session, tenant_id: str | None) -> None:
+# Cohere is used in EE version
+def load_processed_docs(cohere_enabled: bool) -> list[dict]:
+    initial_docs_path = os.path.join(
+        os.getcwd(),
+        "danswer",
+        "seeding",
+        "initial_docs.json",
+    )
+    processed_docs = json.load(open(initial_docs_path))
+    return processed_docs
+
+
+def seed_initial_documents(
+    db_session: Session, tenant_id: str | None, cohere_enabled: bool = False
+) -> None:
    """
    Seed initial documents so users don't have an empty index to start

@ -132,7 +146,9 @@ def seed_initial_documents(db_session: Session, tenant_id: str | None) -> None:
        return

    search_settings = get_current_search_settings(db_session)
-    if search_settings.model_name != DEFAULT_DOCUMENT_ENCODER_MODEL:
+    if search_settings.model_name != DEFAULT_DOCUMENT_ENCODER_MODEL and not (
+        search_settings.model_name == "embed-english-v3.0" and cohere_enabled
+    ):
        logger.info("Embedding model has been updated, skipping")
        return

@ -172,11 +188,10 @@ def seed_initial_documents(db_session: Session, tenant_id: str | None) -> None:
        last_successful_index_time=last_index_time,
    )
    cc_pair_id = cast(int, result.data)
-
-    initial_docs_path = os.path.join(
-        os.getcwd(), "danswer", "seeding", "initial_docs.json"
-    )
-    processed_docs = json.load(open(initial_docs_path))
+    processed_docs = fetch_versioned_implementation(
+        "danswer.seeding.load_docs",
+        "load_processed_docs",
+    )(cohere_enabled)

    docs, chunks = _create_indexable_chunks(processed_docs, tenant_id)

--- a/backend/danswer/setup.py
+++ b/backend/danswer/setup.py
@ -59,7 +59,9 @@ from shared_configs.model_server_models import SupportedEmbeddingModel
 logger = setup_logger()


-def setup_danswer(db_session: Session, tenant_id: str | None) -> None:
+def setup_danswer(
+    db_session: Session, tenant_id: str | None, cohere_enabled: bool = False
+) -> None:
    """
    Setup Danswer for a particular tenant. In the Single Tenant case, it will set it up for the default schema
    on server startup. In the MT case, it will be called when the tenant is created.
@ -148,7 +150,7 @@ def setup_danswer(db_session: Session, tenant_id: str | None) -> None:
    # update multipass indexing setting based on GPU availability
    update_default_multipass_indexing(db_session)

-    seed_initial_documents(db_session, tenant_id)
+    seed_initial_documents(db_session, tenant_id, cohere_enabled)


 def translate_saved_search_settings(db_session: Session) -> None:
--- a/backend/ee/danswer/seeding/load_docs.py
+++ b/backend/ee/danswer/seeding/load_docs.py
@ -0,0 +1,45 @@
+import json
+import os
+from typing import cast
+from typing import List
+
+from cohere import Client
+
+from ee.danswer.configs.app_configs import COHERE_DEFAULT_API_KEY
+
+Embedding = List[float]
+
+
+def load_processed_docs(cohere_enabled: bool) -> list[dict]:
+    base_path = os.path.join(os.getcwd(), "danswer", "seeding")
+
+    if cohere_enabled and COHERE_DEFAULT_API_KEY:
+        initial_docs_path = os.path.join(base_path, "initial_docs_cohere.json")
+        processed_docs = json.load(open(initial_docs_path))
+
+        cohere_client = Client(api_key=COHERE_DEFAULT_API_KEY)
+        embed_model = "embed-english-v3.0"
+
+        for doc in processed_docs:
+            title_embed_response = cohere_client.embed(
+                texts=[doc["title"]],
+                model=embed_model,
+                input_type="search_document",
+            )
+            content_embed_response = cohere_client.embed(
+                texts=[doc["content"]],
+                model=embed_model,
+                input_type="search_document",
+            )
+
+            doc["title_embedding"] = cast(
+                List[Embedding], title_embed_response.embeddings
+            )[0]
+            doc["content_embedding"] = cast(
+                List[Embedding], content_embed_response.embeddings
+            )[0]
+    else:
+        initial_docs_path = os.path.join(base_path, "initial_docs.json")
+        processed_docs = json.load(open(initial_docs_path))
+
+    return processed_docs
--- a/backend/ee/danswer/server/tenants/provisioning.py
+++ b/backend/ee/danswer/server/tenants/provisioning.py
@ -4,6 +4,7 @@ import uuid

 import aiohttp  # Async HTTP client
 from fastapi import HTTPException
+from sqlalchemy import select
 from sqlalchemy.orm import Session

 from danswer.auth.users import exceptions
@ -13,6 +14,8 @@ from danswer.db.engine import get_sqlalchemy_engine
 from danswer.db.llm import update_default_provider
 from danswer.db.llm import upsert_cloud_embedding_provider
 from danswer.db.llm import upsert_llm_provider
+from danswer.db.models import IndexModelStatus
+from danswer.db.models import SearchSettings
 from danswer.db.models import UserTenantMapping
 from danswer.llm.llm_provider_options import ANTHROPIC_MODEL_NAMES
 from danswer.llm.llm_provider_options import ANTHROPIC_PROVIDER_NAME
@ -104,9 +107,19 @@ async def provision_tenant(tenant_id: str, email: str) -> None:
        await asyncio.to_thread(run_alembic_migrations, tenant_id)

        with get_session_with_tenant(tenant_id) as db_session:
-            setup_danswer(db_session, tenant_id)
            configure_default_api_keys(db_session)

+            current_search_settings = (
+                db_session.query(SearchSettings)
+                .filter_by(status=IndexModelStatus.FUTURE)
+                .first()
+            )
+            cohere_enabled = (
+                current_search_settings is not None
+                and current_search_settings.provider_type == EmbeddingProvider.COHERE
+            )
+            setup_danswer(db_session, tenant_id, cohere_enabled=cohere_enabled)
+
        add_users_to_tenant([email], tenant_id)

    except Exception as e:
@ -206,11 +219,51 @@ def configure_default_api_keys(db_session: Session) -> None:
            provider_type=EmbeddingProvider.COHERE,
            api_key=COHERE_DEFAULT_API_KEY,
        )
+
        try:
+            logger.info("Attempting to upsert Cohere cloud embedding provider")
            upsert_cloud_embedding_provider(db_session, cloud_embedding_provider)
-        except Exception as e:
-            logger.error(f"Failed to configure Cohere embedding provider: {e}")
+            logger.info("Successfully upserted Cohere cloud embedding provider")
+
+            logger.info("Updating search settings with Cohere embedding model details")
+            query = (
+                select(SearchSettings)
+                .where(SearchSettings.status == IndexModelStatus.FUTURE)
+                .order_by(SearchSettings.id.desc())
+            )
+            result = db_session.execute(query)
+            current_search_settings = result.scalars().first()
+
+            if current_search_settings:
+                current_search_settings.model_name = (
+                    "embed-english-v3.0"  # Cohere's latest model as of now
+                )
+                current_search_settings.model_dim = (
+                    1024  # Cohere's embed-english-v3.0 dimension
+                )
+                current_search_settings.provider_type = EmbeddingProvider.COHERE
+                current_search_settings.index_name = (
+                    "danswer_chunk_cohere_embed_english_v3_0"
+                )
+                current_search_settings.query_prefix = ""
+                current_search_settings.passage_prefix = ""
+                db_session.commit()
+            else:
+                raise RuntimeError(
+                    "No search settings specified, DB is not in a valid state"
+                )
+            logger.info("Fetching updated search settings to verify changes")
+            updated_query = (
+                select(SearchSettings)
+                .where(SearchSettings.status == IndexModelStatus.PRESENT)
+                .order_by(SearchSettings.id.desc())
+            )
+            updated_result = db_session.execute(updated_query)
+            updated_result.scalars().first()
+
+        except Exception:
+            logger.exception("Failed to configure Cohere embedding provider")
    else:
-        logger.error(
+        logger.info(
            "COHERE_DEFAULT_API_KEY not set, skipping Cohere embedding provider configuration"
        )
--- a/backend/requirements/dev.txt
+++ b/backend/requirements/dev.txt
@ -26,4 +26,5 @@ lxml==5.3.0
 lxml_html_clean==0.2.2
 boto3-stubs[s3]==1.34.133
 pandas==2.2.3
-pandas-stubs==2.2.3.241009
+pandas-stubs==2.2.3.241009
+cohere==5.6.1
--- a/backend/requirements/ee.txt
+++ b/backend/requirements/ee.txt
@ -1 +1,2 @@
-python3-saml==1.15.0
+python3-saml==1.15.0
+cohere==5.6.1
--- a/backend/shared_configs/configs.py
+++ b/backend/shared_configs/configs.py
@ -142,14 +142,14 @@ async def async_return_default_schema(*args: Any, **kwargs: Any) -> str:
 # Prefix used for all tenant ids
 TENANT_ID_PREFIX = "tenant_"

-ALLOWED_SLACK_BOT_TENANT_IDS = os.environ.get("ALLOWED_SLACK_BOT_TENANT_IDS")
+DISALLOWED_SLACK_BOT_TENANT_IDS = os.environ.get("DISALLOWED_SLACK_BOT_TENANT_IDS")
 DISALLOWED_SLACK_BOT_TENANT_LIST = (
-    [tenant.strip() for tenant in ALLOWED_SLACK_BOT_TENANT_IDS.split(",")]
-    if ALLOWED_SLACK_BOT_TENANT_IDS
+    [tenant.strip() for tenant in DISALLOWED_SLACK_BOT_TENANT_IDS.split(",")]
+    if DISALLOWED_SLACK_BOT_TENANT_IDS
    else None
 )

-IGNORED_SYNCING_TENANT_IDS = os.environ.get("IGNORED_SYNCING_TENANT_ID")
+IGNORED_SYNCING_TENANT_IDS = os.environ.get("IGNORED_SYNCING_TENANT_IDS")
 IGNORED_SYNCING_TENANT_LIST = (
    [tenant.strip() for tenant in IGNORED_SYNCING_TENANT_IDS.split(",")]
    if IGNORED_SYNCING_TENANT_IDS
--- a/deployment/cloud_kubernetes/workers/heavy_worker.yaml
+++ b/deployment/cloud_kubernetes/workers/heavy_worker.yaml
@ -14,7 +14,7 @@ spec:
    spec:
      containers:
        - name: celery-worker-heavy
-          image: danswer/danswer-backend-cloud:v0.12.0-cloud.beta.10
+          image: danswer/danswer-backend-cloud:v0.12.0-cloud.beta.12
          imagePullPolicy: IfNotPresent
          command:
            [
--- a/deployment/cloud_kubernetes/workers/indexing_worker.yaml
+++ b/deployment/cloud_kubernetes/workers/indexing_worker.yaml
@ -14,7 +14,7 @@ spec:
    spec:
      containers:
        - name: celery-worker-indexing
-          image: danswer/danswer-backend-cloud:v0.12.0-cloud.beta.10
+          image: danswer/danswer-backend-cloud:v0.12.0-cloud.beta.12
          imagePullPolicy: IfNotPresent
          command:
            [
--- a/deployment/cloud_kubernetes/workers/light_worker.yaml
+++ b/deployment/cloud_kubernetes/workers/light_worker.yaml
@ -14,7 +14,7 @@ spec:
    spec:
      containers:
        - name: celery-worker-light
-          image: danswer/danswer-backend-cloud:v0.12.0-cloud.beta.10
+          image: danswer/danswer-backend-cloud:v0.12.0-cloud.beta.12
          imagePullPolicy: IfNotPresent
          command:
            [
--- a/deployment/cloud_kubernetes/workers/primary.yaml
+++ b/deployment/cloud_kubernetes/workers/primary.yaml
@ -14,7 +14,7 @@ spec:
    spec:
      containers:
        - name: celery-worker-primary
-          image: danswer/danswer-backend-cloud:v0.12.0-cloud.beta.10
+          image: danswer/danswer-backend-cloud:v0.12.0-cloud.beta.12
          imagePullPolicy: IfNotPresent
          command:
            [