mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-11 00:20:55 +02:00
Bugfix/model tests (#4092)
* trying out a fix * add ability to manually run model tests * add log dump * check status code, not text? * just the model server * add port mapping to host * pass through more api keys * add azure tests * fix litellm env vars * fix env vars in github workflow * temp disable litellm test --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
This commit is contained in:
parent
5d58a5e3ea
commit
60bd9271f7
24
.github/workflows/pr-python-model-tests.yml
vendored
24
.github/workflows/pr-python-model-tests.yml
vendored
@ -17,8 +17,13 @@ env:
|
|||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||||
AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}
|
AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}
|
||||||
|
|
||||||
# OpenAI
|
# API keys for testing
|
||||||
|
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
|
||||||
|
LITELLM_API_KEY: ${{ secrets.LITELLM_API_KEY }}
|
||||||
|
LITELLM_API_URL: ${{ secrets.LITELLM_API_URL }}
|
||||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||||
|
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
|
||||||
|
AZURE_API_URL: ${{ secrets.AZURE_API_URL }}
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
model-check:
|
model-check:
|
||||||
@ -72,7 +77,7 @@ jobs:
|
|||||||
REQUIRE_EMAIL_VERIFICATION=false \
|
REQUIRE_EMAIL_VERIFICATION=false \
|
||||||
DISABLE_TELEMETRY=true \
|
DISABLE_TELEMETRY=true \
|
||||||
IMAGE_TAG=test \
|
IMAGE_TAG=test \
|
||||||
docker compose -f docker-compose.dev.yml -p onyx-stack up -d indexing_model_server
|
docker compose -f docker-compose.model-server-test.yml -p onyx-stack up -d indexing_model_server
|
||||||
id: start_docker
|
id: start_docker
|
||||||
|
|
||||||
- name: Wait for service to be ready
|
- name: Wait for service to be ready
|
||||||
@ -123,9 +128,22 @@ jobs:
|
|||||||
--data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
|
--data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \
|
||||||
$SLACK_WEBHOOK
|
$SLACK_WEBHOOK
|
||||||
|
|
||||||
|
- name: Dump all-container logs (optional)
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
cd deployment/docker_compose
|
||||||
|
docker compose -f docker-compose.model-server-test.yml -p onyx-stack logs --no-color > $GITHUB_WORKSPACE/docker-compose.log || true
|
||||||
|
|
||||||
|
- name: Upload logs
|
||||||
|
if: always()
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: docker-all-logs
|
||||||
|
path: ${{ github.workspace }}/docker-compose.log
|
||||||
|
|
||||||
- name: Stop Docker containers
|
- name: Stop Docker containers
|
||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
cd deployment/docker_compose
|
cd deployment/docker_compose
|
||||||
docker compose -f docker-compose.dev.yml -p onyx-stack down -v
|
docker compose -f docker-compose.model-server-test.yml -p onyx-stack down -v
|
||||||
|
|
||||||
|
@ -71,12 +71,13 @@ def litellm_embedding_model() -> EmbeddingModel:
|
|||||||
normalize=True,
|
normalize=True,
|
||||||
query_prefix=None,
|
query_prefix=None,
|
||||||
passage_prefix=None,
|
passage_prefix=None,
|
||||||
api_key=os.getenv("LITE_LLM_API_KEY"),
|
api_key=os.getenv("LITELLM_API_KEY"),
|
||||||
provider_type=EmbeddingProvider.LITELLM,
|
provider_type=EmbeddingProvider.LITELLM,
|
||||||
api_url=os.getenv("LITE_LLM_API_URL"),
|
api_url=os.getenv("LITELLM_API_URL"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skip(reason="re-enable when we can get the correct litellm key and url")
|
||||||
def test_litellm_embedding(litellm_embedding_model: EmbeddingModel) -> None:
|
def test_litellm_embedding(litellm_embedding_model: EmbeddingModel) -> None:
|
||||||
_run_embeddings(VALID_SAMPLE, litellm_embedding_model, 1536)
|
_run_embeddings(VALID_SAMPLE, litellm_embedding_model, 1536)
|
||||||
_run_embeddings(TOO_LONG_SAMPLE, litellm_embedding_model, 1536)
|
_run_embeddings(TOO_LONG_SAMPLE, litellm_embedding_model, 1536)
|
||||||
@ -117,6 +118,11 @@ def azure_embedding_model() -> EmbeddingModel:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_azure_embedding(azure_embedding_model: EmbeddingModel) -> None:
|
||||||
|
_run_embeddings(VALID_SAMPLE, azure_embedding_model, 1536)
|
||||||
|
_run_embeddings(TOO_LONG_SAMPLE, azure_embedding_model, 1536)
|
||||||
|
|
||||||
|
|
||||||
# NOTE (chris): this test doesn't work, and I do not know why
|
# NOTE (chris): this test doesn't work, and I do not know why
|
||||||
# def test_azure_embedding_model_rate_limit(azure_embedding_model: EmbeddingModel):
|
# def test_azure_embedding_model_rate_limit(azure_embedding_model: EmbeddingModel):
|
||||||
# """NOTE: this test relies on a very low rate limit for the Azure API +
|
# """NOTE: this test relies on a very low rate limit for the Azure API +
|
||||||
|
@ -0,0 +1,37 @@
|
|||||||
|
services:
|
||||||
|
indexing_model_server:
|
||||||
|
image: onyxdotapp/onyx-model-server:${IMAGE_TAG:-latest}
|
||||||
|
build:
|
||||||
|
context: ../../backend
|
||||||
|
dockerfile: Dockerfile.model_server
|
||||||
|
command: >
|
||||||
|
/bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then
|
||||||
|
echo 'Skipping service...';
|
||||||
|
exit 0;
|
||||||
|
else
|
||||||
|
exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000;
|
||||||
|
fi"
|
||||||
|
restart: on-failure
|
||||||
|
environment:
|
||||||
|
- INDEX_BATCH_SIZE=${INDEX_BATCH_SIZE:-}
|
||||||
|
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
|
||||||
|
- INDEXING_ONLY=True
|
||||||
|
# Set to debug to get more fine-grained logs
|
||||||
|
- LOG_LEVEL=${LOG_LEVEL:-info}
|
||||||
|
- CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}
|
||||||
|
|
||||||
|
# Analytics Configs
|
||||||
|
- SENTRY_DSN=${SENTRY_DSN:-}
|
||||||
|
volumes:
|
||||||
|
# Not necessary, this is just to reduce download time during startup
|
||||||
|
- indexing_huggingface_model_cache:/root/.cache/huggingface/
|
||||||
|
logging:
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: "50m"
|
||||||
|
max-file: "6"
|
||||||
|
ports:
|
||||||
|
- "9000:9000" # <-- Add this line to expose the port to the host
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
indexing_huggingface_model_cache:
|
Loading…
x
Reference in New Issue
Block a user