From 60bd9271f7aaca0f3bb6ecc0d196e8ba72931b6d Mon Sep 17 00:00:00 2001 From: rkuo-danswer Date: Mon, 24 Feb 2025 20:53:51 -0800 Subject: [PATCH] Bugfix/model tests (#4092) * trying out a fix * add ability to manually run model tests * add log dump * check status code, not text? * just the model server * add port mapping to host * pass through more api keys * add azure tests * fix litellm env vars * fix env vars in github workflow * temp disable litellm test --------- Co-authored-by: Richard Kuo (Danswer) --- .github/workflows/pr-python-model-tests.yml | 24 ++++++++++-- .../tests/daily/embedding/test_embeddings.py | 10 ++++- .../docker-compose.model-server-test.yml | 37 +++++++++++++++++++ 3 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 deployment/docker_compose/docker-compose.model-server-test.yml diff --git a/.github/workflows/pr-python-model-tests.yml b/.github/workflows/pr-python-model-tests.yml index 0421e1228eb..fd04a6bddf3 100644 --- a/.github/workflows/pr-python-model-tests.yml +++ b/.github/workflows/pr-python-model-tests.yml @@ -17,8 +17,13 @@ env: AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }} - # OpenAI + # API keys for testing + COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} + LITELLM_API_KEY: ${{ secrets.LITELLM_API_KEY }} + LITELLM_API_URL: ${{ secrets.LITELLM_API_URL }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} + AZURE_API_URL: ${{ secrets.AZURE_API_URL }} jobs: model-check: @@ -72,7 +77,7 @@ jobs: REQUIRE_EMAIL_VERIFICATION=false \ DISABLE_TELEMETRY=true \ IMAGE_TAG=test \ - docker compose -f docker-compose.dev.yml -p onyx-stack up -d indexing_model_server + docker compose -f docker-compose.model-server-test.yml -p onyx-stack up -d indexing_model_server id: start_docker - name: Wait for service to be ready @@ -123,9 +128,22 @@ jobs: --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \ $SLACK_WEBHOOK + - name: Dump all-container logs (optional) + if: always() + run: | + cd deployment/docker_compose + docker compose -f docker-compose.model-server-test.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true + + - name: Upload logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: docker-all-logs + path: ${{ github.workspace }}/docker-compose.log + - name: Stop Docker containers if: always() run: | cd deployment/docker_compose - docker compose -f docker-compose.dev.yml -p onyx-stack down -v + docker compose -f docker-compose.model-server-test.yml -p onyx-stack down -v diff --git a/backend/tests/daily/embedding/test_embeddings.py b/backend/tests/daily/embedding/test_embeddings.py index a87c41c33fe..907da5b3220 100644 --- a/backend/tests/daily/embedding/test_embeddings.py +++ b/backend/tests/daily/embedding/test_embeddings.py @@ -71,12 +71,13 @@ def litellm_embedding_model() -> EmbeddingModel: normalize=True, query_prefix=None, passage_prefix=None, - api_key=os.getenv("LITE_LLM_API_KEY"), + api_key=os.getenv("LITELLM_API_KEY"), provider_type=EmbeddingProvider.LITELLM, - api_url=os.getenv("LITE_LLM_API_URL"), + api_url=os.getenv("LITELLM_API_URL"), ) +@pytest.mark.skip(reason="re-enable when we can get the correct litellm key and url") def test_litellm_embedding(litellm_embedding_model: EmbeddingModel) -> None: _run_embeddings(VALID_SAMPLE, litellm_embedding_model, 1536) _run_embeddings(TOO_LONG_SAMPLE, litellm_embedding_model, 1536) @@ -117,6 +118,11 @@ def azure_embedding_model() -> EmbeddingModel: ) +def test_azure_embedding(azure_embedding_model: EmbeddingModel) -> None: + _run_embeddings(VALID_SAMPLE, azure_embedding_model, 1536) + _run_embeddings(TOO_LONG_SAMPLE, azure_embedding_model, 1536) + + # NOTE (chris): this test doesn't work, and I do not know why # def test_azure_embedding_model_rate_limit(azure_embedding_model: EmbeddingModel): # """NOTE: this test relies on a very low rate limit for the Azure API + diff --git a/deployment/docker_compose/docker-compose.model-server-test.yml b/deployment/docker_compose/docker-compose.model-server-test.yml new file mode 100644 index 00000000000..6a33ac675a9 --- /dev/null +++ b/deployment/docker_compose/docker-compose.model-server-test.yml @@ -0,0 +1,37 @@ +services: + indexing_model_server: + image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest} + build: + context: ../../backend + dockerfile: Dockerfile.model_server + command: > + /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then + echo 'Skipping service...'; + exit 0; + else + exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; + fi" + restart: on-failure + environment: + - INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-} + - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} + - INDEXING_ONLY=True + # Set to debug to get more fine-grained logs + - LOG_LEVEL=${LOG_LEVEL:-info} + - CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-} + + # Analytics Configs + - SENTRY_DSN=${SENTRY_DSN:-} + volumes: + # Not necessary, this is just to reduce download time during startup + - indexing_huggingface_model_cache:/root/.cache/huggingface/ + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + ports: + - "9000:9000" # <-- Add this line to expose the port to the host + +volumes: + indexing_huggingface_model_cache: