From 870b59a1ccd2de0bf3fe55ba1b988c27546f03d2 Mon Sep 17 00:00:00 2001 From: rkuo-danswer Date: Tue, 4 Mar 2025 17:59:46 -0800 Subject: [PATCH] Bugfix/vertex crash (#4181) * Update text embedding model to version 005 and enhance embedding retrieval process * re * Fix formatting issues * Add support for Bedrock reranking provider and AWS credentials handling * fix: improve AWS key format validation and error messages * Fix vertex embedding model crash * feat: add environment template for local development setup * Add display name for Claude 3.7 Sonnet model * Add display names for Gemini 2.0 models and update Claude 3.7 Sonnet entry * Fix ruff errors by ensuring lines are within 130 characters * revert to currently default onyx browser settings * add / fix boto requirements --------- Co-authored-by: ferdinand loesch Co-authored-by: Ferdinand Loesch Co-authored-by: Richard Kuo (Danswer) --- backend/model_server/constants.py | 2 +- backend/model_server/encoders.py | 92 ++++++++++++++++--- backend/model_server/utils.py | 29 ++++++ backend/onyx/connectors/web/connector.py | 1 + backend/requirements/default.txt | 3 +- backend/requirements/model_server.txt | 3 +- backend/shared_configs/configs.py | 10 +- backend/shared_configs/enums.py | 1 + .../admin/embeddings/RerankingFormPage.tsx | 21 ++++- web/src/app/admin/embeddings/interfaces.ts | 10 ++ web/src/components/embedding/interfaces.tsx | 2 +- web/src/lib/hooks.ts | 8 +- 12 files changed, 160 insertions(+), 22 deletions(-) diff --git a/backend/model_server/constants.py b/backend/model_server/constants.py index fac57cb734e..d026d4a76e0 100644 --- a/backend/model_server/constants.py +++ b/backend/model_server/constants.py @@ -6,7 +6,7 @@ MODEL_WARM_UP_STRING = "hi " * 512 DEFAULT_OPENAI_MODEL = "text-embedding-3-small" DEFAULT_COHERE_MODEL = "embed-english-light-v3.0" DEFAULT_VOYAGE_MODEL = "voyage-large-2-instruct" -DEFAULT_VERTEX_MODEL = "text-embedding-004" +DEFAULT_VERTEX_MODEL = "text-embedding-005" class EmbeddingModelTextType: diff --git a/backend/model_server/encoders.py b/backend/model_server/encoders.py index 9215042f1f4..6b5044af6c2 100644 --- a/backend/model_server/encoders.py +++ b/backend/model_server/encoders.py @@ -5,6 +5,7 @@ from types import TracebackType from typing import cast from typing import Optional +import aioboto3 # type: ignore import httpx import openai import vertexai # type: ignore @@ -28,11 +29,13 @@ from model_server.constants import DEFAULT_VERTEX_MODEL from model_server.constants import DEFAULT_VOYAGE_MODEL from model_server.constants import EmbeddingModelTextType from model_server.constants import EmbeddingProvider +from model_server.utils import pass_aws_key from model_server.utils import simple_log_function_time from onyx.utils.logger import setup_logger from shared_configs.configs import API_BASED_EMBEDDING_TIMEOUT from shared_configs.configs import INDEXING_ONLY from shared_configs.configs import OPENAI_EMBEDDING_TIMEOUT +from shared_configs.configs import VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE from shared_configs.enums import EmbedTextType from shared_configs.enums import RerankerProvider from shared_configs.model_server_models import Embedding @@ -182,17 +185,24 @@ class CloudEmbedding: vertexai.init(project=project_id, credentials=credentials) client = TextEmbeddingModel.from_pretrained(model) - embeddings = await client.get_embeddings_async( - [ - TextEmbeddingInput( - text, - embedding_type, - ) - for text in texts - ], - auto_truncate=True, # This is the default - ) - return [embedding.values for embedding in embeddings] + inputs = [TextEmbeddingInput(text, embedding_type) for text in texts] + + # Split into batches of 25 texts + max_texts_per_batch = VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE + batches = [ + inputs[i : i + max_texts_per_batch] + for i in range(0, len(inputs), max_texts_per_batch) + ] + + # Dispatch all embedding calls asynchronously at once + tasks = [ + client.get_embeddings_async(batch, auto_truncate=True) for batch in batches + ] + + # Wait for all tasks to complete in parallel + results = await asyncio.gather(*tasks) + + return [embedding.values for batch in results for embedding in batch] async def _embed_litellm_proxy( self, texts: list[str], model_name: str | None @@ -447,7 +457,7 @@ async def local_rerank(query: str, docs: list[str], model_name: str) -> list[flo ) -async def cohere_rerank( +async def cohere_rerank_api( query: str, docs: list[str], model_name: str, api_key: str ) -> list[float]: cohere_client = CohereAsyncClient(api_key=api_key) @@ -457,6 +467,45 @@ async def cohere_rerank( return [result.relevance_score for result in sorted_results] +async def cohere_rerank_aws( + query: str, + docs: list[str], + model_name: str, + region_name: str, + aws_access_key_id: str, + aws_secret_access_key: str, +) -> list[float]: + session = aioboto3.Session( + aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key + ) + async with session.client( + "bedrock-runtime", region_name=region_name + ) as bedrock_client: + body = json.dumps( + { + "query": query, + "documents": docs, + "api_version": 2, + } + ) + # Invoke the Bedrock model asynchronously + response = await bedrock_client.invoke_model( + modelId=model_name, + accept="application/json", + contentType="application/json", + body=body, + ) + + # Read the response asynchronously + response_body = json.loads(await response["body"].read()) + + # Extract and sort the results + results = response_body.get("results", []) + sorted_results = sorted(results, key=lambda item: item["index"]) + + return [result["relevance_score"] for result in sorted_results] + + async def litellm_rerank( query: str, docs: list[str], api_url: str, model_name: str, api_key: str | None ) -> list[float]: @@ -572,15 +621,32 @@ async def process_rerank_request(rerank_request: RerankRequest) -> RerankRespons elif rerank_request.provider_type == RerankerProvider.COHERE: if rerank_request.api_key is None: raise RuntimeError("Cohere Rerank Requires an API Key") - sim_scores = await cohere_rerank( + sim_scores = await cohere_rerank_api( query=rerank_request.query, docs=rerank_request.documents, model_name=rerank_request.model_name, api_key=rerank_request.api_key, ) return RerankResponse(scores=sim_scores) + + elif rerank_request.provider_type == RerankerProvider.BEDROCK: + if rerank_request.api_key is None: + raise RuntimeError("Bedrock Rerank Requires an API Key") + aws_access_key_id, aws_secret_access_key, aws_region = pass_aws_key( + rerank_request.api_key + ) + sim_scores = await cohere_rerank_aws( + query=rerank_request.query, + docs=rerank_request.documents, + model_name=rerank_request.model_name, + region_name=aws_region, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + ) + return RerankResponse(scores=sim_scores) else: raise ValueError(f"Unsupported provider: {rerank_request.provider_type}") + except Exception as e: logger.exception(f"Error during reranking process:\n{str(e)}") raise HTTPException( diff --git a/backend/model_server/utils.py b/backend/model_server/utils.py index b53431fda38..f04e189b127 100644 --- a/backend/model_server/utils.py +++ b/backend/model_server/utils.py @@ -70,3 +70,32 @@ def get_gpu_type() -> str: return GPUStatus.MAC_MPS return GPUStatus.NONE + + +def pass_aws_key(api_key: str) -> tuple[str, str, str]: + """Parse AWS API key string into components. + + Args: + api_key: String in format 'aws_ACCESSKEY_SECRETKEY_REGION' + + Returns: + Tuple of (access_key, secret_key, region) + + Raises: + ValueError: If key format is invalid + """ + if not api_key.startswith("aws"): + raise ValueError("API key must start with 'aws' prefix") + + parts = api_key.split("_") + if len(parts) != 4: + raise ValueError( + f"API key must be in format 'aws_ACCESSKEY_SECRETKEY_REGION', got {len(parts) - 1} parts" + "this is an onyx specific format for formatting the aws secrets for bedrock" + ) + + try: + _, aws_access_key_id, aws_secret_access_key, aws_region = parts + return aws_access_key_id, aws_secret_access_key, aws_region + except Exception as e: + raise ValueError(f"Failed to parse AWS key components: {str(e)}") diff --git a/backend/onyx/connectors/web/connector.py b/backend/onyx/connectors/web/connector.py index e72721ae69d..115ff6fdf4d 100644 --- a/backend/onyx/connectors/web/connector.py +++ b/backend/onyx/connectors/web/connector.py @@ -157,6 +157,7 @@ def get_internal_links( def start_playwright() -> Tuple[Playwright, BrowserContext]: playwright = sync_playwright().start() + browser = playwright.chromium.launch(headless=True) context = browser.new_context() diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index 339a214328b..016d14c2325 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -1,9 +1,10 @@ +aioboto3==14.0.0 aiohttp==3.10.2 alembic==1.10.4 asyncpg==0.27.0 atlassian-python-api==3.41.16 beautifulsoup4==4.12.3 -boto3==1.34.84 +boto3==1.36.23 celery==5.5.0b4 chardet==5.2.0 dask==2023.8.1 diff --git a/backend/requirements/model_server.txt b/backend/requirements/model_server.txt index 0afbc16eba4..b4d4a9f066a 100644 --- a/backend/requirements/model_server.txt +++ b/backend/requirements/model_server.txt @@ -13,4 +13,5 @@ transformers==4.39.2 uvicorn==0.21.1 voyageai==0.2.3 litellm==1.61.16 -sentry-sdk[fastapi,celery,starlette]==2.14.0 \ No newline at end of file +sentry-sdk[fastapi,celery,starlette]==2.14.0 +aioboto3==13.4.0 \ No newline at end of file diff --git a/backend/shared_configs/configs.py b/backend/shared_configs/configs.py index ecf57358188..b21c53d6914 100644 --- a/backend/shared_configs/configs.py +++ b/backend/shared_configs/configs.py @@ -68,6 +68,12 @@ LOG_LEVEL = os.environ.get("LOG_LEVEL", "info") # allow us to specify a custom timeout API_BASED_EMBEDDING_TIMEOUT = int(os.environ.get("API_BASED_EMBEDDING_TIMEOUT", "600")) +# Local batch size for VertexAI embedding models currently calibrated for item size of 512 tokens +# NOTE: increasing this value may lead to API errors due to token limit exhaustion per call. +VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE = int( + os.environ.get("VERTEXAI_EMBEDDING_LOCAL_BATCH_SIZE", "25") +) + # Only used for OpenAI OPENAI_EMBEDDING_TIMEOUT = int( os.environ.get("OPENAI_EMBEDDING_TIMEOUT", API_BASED_EMBEDDING_TIMEOUT) @@ -200,12 +206,12 @@ SUPPORTED_EMBEDDING_MODELS = [ index_name="danswer_chunk_text_embedding_3_small", ), SupportedEmbeddingModel( - name="google/text-embedding-004", + name="google/text-embedding-005", dim=768, index_name="danswer_chunk_google_text_embedding_004", ), SupportedEmbeddingModel( - name="google/text-embedding-004", + name="google/text-embedding-005", dim=768, index_name="danswer_chunk_text_embedding_004", ), diff --git a/backend/shared_configs/enums.py b/backend/shared_configs/enums.py index 3fe1cd0bd01..25e440cda43 100644 --- a/backend/shared_configs/enums.py +++ b/backend/shared_configs/enums.py @@ -13,6 +13,7 @@ class EmbeddingProvider(str, Enum): class RerankerProvider(str, Enum): COHERE = "cohere" LITELLM = "litellm" + BEDROCK = "bedrock" class EmbedTextType(str, Enum): diff --git a/web/src/app/admin/embeddings/RerankingFormPage.tsx b/web/src/app/admin/embeddings/RerankingFormPage.tsx index cf0d53933e7..4d026bf003f 100644 --- a/web/src/app/admin/embeddings/RerankingFormPage.tsx +++ b/web/src/app/admin/embeddings/RerankingFormPage.tsx @@ -15,6 +15,7 @@ import { } from "./interfaces"; import { FiExternalLink } from "react-icons/fi"; import { + AmazonIcon, CohereIcon, LiteLLMIcon, MixedBreadIcon, @@ -242,6 +243,11 @@ const RerankingDetailsForm = forwardRef< card.rerank_provider_type == RerankerProvider.COHERE ) { setIsApiKeyModalOpen(true); + } else if ( + card.rerank_provider_type == + RerankerProvider.BEDROCK + ) { + setIsApiKeyModalOpen(true); } else if ( card.rerank_provider_type == RerankerProvider.LITELLM @@ -278,6 +284,9 @@ const RerankingDetailsForm = forwardRef< ) : card.rerank_provider_type === RerankerProvider.COHERE ? ( + ) : card.rerank_provider_type === + RerankerProvider.BEDROCK ? ( + ) : ( )} @@ -437,7 +446,10 @@ const RerankingDetailsForm = forwardRef< placeholder={ values.rerank_api_key ? "*".repeat(values.rerank_api_key.length) - : undefined + : values.rerank_provider_type === + RerankerProvider.BEDROCK + ? "aws_ACCESSKEY_SECRETKEY_REGION" + : "Enter your API key" } onChange={(e: React.ChangeEvent) => { const value = e.target.value; @@ -448,7 +460,12 @@ const RerankingDetailsForm = forwardRef< setFieldValue("api_key", value); }} type="password" - label="Cohere API Key" + label={ + values.rerank_provider_type === + RerankerProvider.BEDROCK + ? "AWS Credentials in format: aws_ACCESSKEY_SECRETKEY_REGION" + : "Cohere API Key" + } name="rerank_api_key" />
diff --git a/web/src/app/admin/embeddings/interfaces.ts b/web/src/app/admin/embeddings/interfaces.ts index cc629454855..f9e3c416174 100644 --- a/web/src/app/admin/embeddings/interfaces.ts +++ b/web/src/app/admin/embeddings/interfaces.ts @@ -18,6 +18,7 @@ export interface RerankingDetails { export enum RerankerProvider { COHERE = "cohere", LITELLM = "litellm", + BEDROCK = "bedrock", } export enum EmbeddingPrecision { @@ -100,6 +101,15 @@ export const rerankingModels: RerankingModel[] = [ description: "Powerful multilingual reranking model.", link: "https://docs.cohere.com/v2/reference/rerank", }, + { + cloud: true, + rerank_provider_type: RerankerProvider.BEDROCK, + modelName: "cohere.rerank-v3-5:0", + displayName: "Cohere Rerank 3.5", + description: + "Powerful multilingual reranking model invoked through AWS Bedrock.", + link: "https://aws.amazon.com/blogs/machine-learning/cohere-rerank-3-5-is-now-available-in-amazon-bedrock-through-rerank-api", + }, ]; export const getCurrentModelCopy = ( diff --git a/web/src/components/embedding/interfaces.tsx b/web/src/components/embedding/interfaces.tsx index 1ac3f5da6a0..16d6a189465 100644 --- a/web/src/components/embedding/interfaces.tsx +++ b/web/src/components/embedding/interfaces.tsx @@ -268,7 +268,7 @@ export const AVAILABLE_CLOUD_PROVIDERS: CloudEmbeddingProvider[] = [ embedding_models: [ { provider_type: EmbeddingProvider.GOOGLE, - model_name: "text-embedding-004", + model_name: "text-embedding-005", description: "Google's most recent text embedding model.", pricePerMillion: 0.025, model_dim: 768, diff --git a/web/src/lib/hooks.ts b/web/src/lib/hooks.ts index 2b7f974d0a9..984da34ec16 100644 --- a/web/src/lib/hooks.ts +++ b/web/src/lib/hooks.ts @@ -730,7 +730,10 @@ const MODEL_DISPLAY_NAMES: { [key: string]: string } = { "gemini-1.5-flash-001": "Gemini 1.5 Flash", "gemini-1.5-pro-002": "Gemini 1.5 Pro (v2)", "gemini-1.5-flash-002": "Gemini 1.5 Flash (v2)", - "gemini-2.0-flash-exp": "Gemini 2.0 Flash (Experimental)", + "gemini-2.0-flash-001": "Gemini 2.0 Flash", + "gemini-2.0-flash": "Gemini 2.0 Flash", + "gemini-2.0-pro-exp-02-05": "Gemini 2.0 Pro", + "gemini-2.0-flash-thinking-exp-01-21": "Gemini 2.0 Flash Thinking", // Mistral Models "mistral-large-2411": "Mistral Large 24.11", @@ -755,6 +758,8 @@ const MODEL_DISPLAY_NAMES: { [key: string]: string } = { "anthropic.claude-v2:1": "Claude v2.1", "anthropic.claude-v2": "Claude v2", "anthropic.claude-v1": "Claude v1", + "anthropic.claude-3-7-sonnet-20250219-v1:0": "Claude 3.7 Sonnet", + "us.anthropic.claude-3-7-sonnet-20250219-v1:0": "Claude 3.7 Sonnet", "anthropic.claude-3-opus-20240229-v1:0": "Claude 3 Opus", "anthropic.claude-3-haiku-20240307-v1:0": "Claude 3 Haiku", "anthropic.claude-3-5-sonnet-20240620-v1:0": "Claude 3.5 Sonnet", @@ -788,6 +793,7 @@ export const defaultModelsByProvider: { [name: string]: string[] } = { "anthropic.claude-3-opus-20240229-v1:0", "mistral.mistral-large-2402-v1:0", "anthropic.claude-3-5-sonnet-20241022-v2:0", + "anthropic.claude-3-7-sonnet-20250219-v1:0", ], anthropic: ["claude-3-opus-20240229", "claude-3-5-sonnet-20241022"], };