Bugfix/model tests (#4092)

* trying out a fix * add ability to manually run model tests * add log dump * check status code, not text? * just the model server * add port mapping to host * pass through more api keys * add azure tests * fix litellm env vars * fix env vars in github workflow * temp disable litellm test --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
2025-06-05 04:31:03 +02:00 · 2025-02-24 20:53:51 -08:00 · 2025-02-24 20:53:51 -08:00 · 60bd9271f7
commit 60bd9271f7
parent 5d58a5e3ea
3 changed files with 66 additions and 5 deletions
--- a/.github/workflows/pr-python-model-tests.yml
+++ b/.github/workflows/pr-python-model-tests.yml
@ -17,8 +17,13 @@ env:
  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
  AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}

-  # OpenAI
+  # API keys for testing
+  COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
+  LITELLM_API_KEY: ${{ secrets.LITELLM_API_KEY }}
+  LITELLM_API_URL: ${{ secrets.LITELLM_API_URL }}
  OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
+  AZURE_API_URL: ${{ secrets.AZURE_API_URL }}

 jobs:
  model-check:
@ -72,7 +77,7 @@ jobs:
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
          IMAGE_TAG=test \
-          docker compose -f docker-compose.dev.yml -p onyx-stack up -d indexing_model_server
+          docker compose -f docker-compose.model-server-test.yml -p onyx-stack up -d indexing_model_server
        id: start_docker

      - name: Wait for service to be ready
@ -123,9 +128,22 @@ jobs:
            --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
            $SLACK_WEBHOOK
            
+      - name: Dump all-container logs (optional)
+        if: always()
+        run: |
+          cd deployment/docker_compose
+          docker compose -f docker-compose.model-server-test.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
+
+      - name: Upload logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: docker-all-logs
+          path: ${{ github.workspace }}/docker-compose.log
+          
      - name: Stop Docker containers
        if: always()
        run: |
          cd deployment/docker_compose
-          docker compose -f docker-compose.dev.yml -p onyx-stack down -v
+          docker compose -f docker-compose.model-server-test.yml -p onyx-stack down -v
          
--- a/backend/tests/daily/embedding/test_embeddings.py
+++ b/backend/tests/daily/embedding/test_embeddings.py
@ -71,12 +71,13 @@ def litellm_embedding_model() -> EmbeddingModel:
        normalize=True,
        query_prefix=None,
        passage_prefix=None,
-        api_key=os.getenv("LITE_LLM_API_KEY"),
+        api_key=os.getenv("LITELLM_API_KEY"),
        provider_type=EmbeddingProvider.LITELLM,
-        api_url=os.getenv("LITE_LLM_API_URL"),
+        api_url=os.getenv("LITELLM_API_URL"),
    )


+@pytest.mark.skip(reason="re-enable when we can get the correct litellm key and url")
 def test_litellm_embedding(litellm_embedding_model: EmbeddingModel) -> None:
    _run_embeddings(VALID_SAMPLE, litellm_embedding_model, 1536)
    _run_embeddings(TOO_LONG_SAMPLE, litellm_embedding_model, 1536)
@ -117,6 +118,11 @@ def azure_embedding_model() -> EmbeddingModel:
    )


+def test_azure_embedding(azure_embedding_model: EmbeddingModel) -> None:
+    _run_embeddings(VALID_SAMPLE, azure_embedding_model, 1536)
+    _run_embeddings(TOO_LONG_SAMPLE, azure_embedding_model, 1536)
+
+
 # NOTE (chris): this test doesn't work, and I do not know why
 # def test_azure_embedding_model_rate_limit(azure_embedding_model: EmbeddingModel):
 #     """NOTE: this test relies on a very low rate limit for the Azure API +
--- a/deployment/docker_compose/docker-compose.model-server-test.yml
+++ b/deployment/docker_compose/docker-compose.model-server-test.yml
@ -0,0 +1,37 @@
+services:
+  indexing_model_server:
+    image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
+    build:
+      context: ../../backend
+      dockerfile: Dockerfile.model_server
+    command: >
+      /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then
+        echo 'Skipping service...';
+        exit 0;
+      else
+        exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
+      fi"
+    restart: on-failure
+    environment:
+      - INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-}
+      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
+      - INDEXING_ONLY=True
+      # Set to debug to get more fine-grained logs
+      - LOG_LEVEL=${LOG_LEVEL:-info}
+      - CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}
+
+      # Analytics Configs
+      - SENTRY_DSN=${SENTRY_DSN:-}
+    volumes:
+      # Not necessary, this is just to reduce download time during startup
+      - indexing_huggingface_model_cache:/root/.cache/huggingface/
+    logging:
+      driver: json-file
+      options:
+        max-size: "50m"
+        max-file: "6"
+    ports:
+      - "9000:9000"  # <-- Add this line to expose the port to the host
+
+volumes:
+  indexing_huggingface_model_cache: