Bugfix/model tests (#4092)

* trying out a fix

* add ability to manually run model tests

* add log dump

* check status code, not text?

* just the model server

* add port mapping to host

* pass through more api keys

* add azure tests

* fix litellm env vars

* fix env vars in github workflow

* temp disable litellm test

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
This commit is contained in:
rkuo-danswer 2025-02-24 20:53:51 -08:00 committed by GitHub
parent 5d58a5e3ea
commit 60bd9271f7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 66 additions and 5 deletions

View File

@ -17,8 +17,13 @@ env:
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}
# OpenAI
# API keys for testing
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
LITELLM_API_KEY: ${{ secrets.LITELLM_API_KEY }}
LITELLM_API_URL: ${{ secrets.LITELLM_API_URL }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
AZURE_API_URL: ${{ secrets.AZURE_API_URL }}
jobs:
model-check:
@ -72,7 +77,7 @@ jobs:
REQUIRE_EMAIL_VERIFICATION=false \
DISABLE_TELEMETRY=true \
IMAGE_TAG=test \
docker compose -f docker-compose.dev.yml -p onyx-stack up -d indexing_model_server
docker compose -f docker-compose.model-server-test.yml -p onyx-stack up -d indexing_model_server
id: start_docker
- name: Wait for service to be ready
@ -123,9 +128,22 @@ jobs:
--data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
$SLACK_WEBHOOK
- name: Dump all-container logs (optional)
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.model-server-test.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
- name: Upload logs
if: always()
uses: actions/upload-artifact@v4
with:
name: docker-all-logs
path: ${{ github.workspace }}/docker-compose.log
- name: Stop Docker containers
if: always()
run: |
cd deployment/docker_compose
docker compose -f docker-compose.dev.yml -p onyx-stack down -v
docker compose -f docker-compose.model-server-test.yml -p onyx-stack down -v

View File

@ -71,12 +71,13 @@ def litellm_embedding_model() -> EmbeddingModel:
normalize=True,
query_prefix=None,
passage_prefix=None,
api_key=os.getenv("LITE_LLM_API_KEY"),
api_key=os.getenv("LITELLM_API_KEY"),
provider_type=EmbeddingProvider.LITELLM,
api_url=os.getenv("LITE_LLM_API_URL"),
api_url=os.getenv("LITELLM_API_URL"),
)
@pytest.mark.skip(reason="re-enable when we can get the correct litellm key and url")
def test_litellm_embedding(litellm_embedding_model: EmbeddingModel) -> None:
_run_embeddings(VALID_SAMPLE, litellm_embedding_model, 1536)
_run_embeddings(TOO_LONG_SAMPLE, litellm_embedding_model, 1536)
@ -117,6 +118,11 @@ def azure_embedding_model() -> EmbeddingModel:
)
def test_azure_embedding(azure_embedding_model: EmbeddingModel) -> None:
_run_embeddings(VALID_SAMPLE, azure_embedding_model, 1536)
_run_embeddings(TOO_LONG_SAMPLE, azure_embedding_model, 1536)
# NOTE (chris): this test doesn't work, and I do not know why
# def test_azure_embedding_model_rate_limit(azure_embedding_model: EmbeddingModel):
# """NOTE: this test relies on a very low rate limit for the Azure API +

View File

@ -0,0 +1,37 @@
services:
indexing_model_server:
image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
build:
context: ../../backend
dockerfile: Dockerfile.model_server
command: >
/bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then
echo 'Skipping service...';
exit 0;
else
exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
fi"
restart: on-failure
environment:
- INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-}
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
- INDEXING_ONLY=True
# Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info}
- CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}
# Analytics Configs
- SENTRY_DSN=${SENTRY_DSN:-}
volumes:
# Not necessary, this is just to reduce download time during startup
- indexing_huggingface_model_cache:/root/.cache/huggingface/
logging:
driver: json-file
options:
max-size: "50m"
max-file: "6"
ports:
- "9000:9000" # <-- Add this line to expose the port to the host
volumes:
indexing_huggingface_model_cache: