don't push integration testing docker images (#2584)

* experiment with build and no push * use slightly more descriptive and consistent tags and names * name integration test workflow consistently with other workflows * put the tag back * try runs-on s3 backend * try adding runs-on cache * add with key * add a dummy path * forget about multiline * maybe we don't need runs-on cache immediately * lower ram slightly, name test with a version bump * don't need to explicitly include runs-on/cache for docker caching * comment out flaky portion of knowledge chat test --------- Co-authored-by: Richard Kuo <rkuo@rkuo.com>
2025-06-28 00:40:58 +02:00 · 2024-09-30 18:00:47 -07:00 · 2024-09-30 18:00:47 -07:00 · 140c5b3957
commit 140c5b3957
parent 3e511497d2
2 changed files with 45 additions and 35 deletions
--- a/.github/workflows/pr-Integration-tests.yml
+++ b/.github/workflows/pr-Integration-tests.yml
@ -1,4 +1,4 @@
-name: Run Integration Tests
+name: Run Integration Tests v2
 concurrency:
  group: Run-Integration-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }}
  cancel-in-progress: true
@ -14,7 +14,7 @@ env:
 jobs:
  integration-tests:
    # See https://runs-on.com/runners/linux/
-    runs-on: [runs-on,runner=8cpu-linux-x64,ram=32,"run-id=${{ github.run_id }}"]
+    runs-on: [runs-on,runner=8cpu-linux-x64,ram=16,"run-id=${{ github.run_id }}"]
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
@ -28,25 +28,35 @@ jobs:
          username: ${{ secrets.DOCKER_USERNAME }}
          password: ${{ secrets.DOCKER_TOKEN }}

-      # NOTE: we don't need to build the Web Docker image since it's not used
-      # during the IT for now. We have a separate action to verify it builds 
-      # succesfully
+      # tag every docker image with "test" so that we can spin up the correct set
+      # of images during testing
+      
+      # We don't need to build the Web Docker image since it's not yet used
+      # in the integration tests. We have a separate action to verify that it builds 
+      # successfully.
      - name: Pull Web Docker image
        run: |
          docker pull danswer/danswer-web-server:latest
-          docker tag danswer/danswer-web-server:latest danswer/danswer-web-server:it
+          docker tag danswer/danswer-web-server:latest danswer/danswer-web-server:test

+      # we use the runs-on cache for docker builds
+      # in conjunction with runs-on runners, it has better speed and unlimited caching
+      # https://runs-on.com/caching/s3-cache-for-github-actions/
+      # https://runs-on.com/caching/docker/
+      # https://github.com/moby/buildkit#s3-cache-experimental
+      
+      # images are built and run locally for testing purposes. Not pushed.
      - name: Build Backend Docker image
        uses: ./.github/actions/custom-build-and-push
        with:
          context: ./backend
          file: ./backend/Dockerfile
          platforms: linux/amd64
-          tags: danswer/danswer-backend:it
-          cache-from: type=registry,ref=danswer/danswer-backend:it
-          cache-to: |
-            type=registry,ref=danswer/danswer-backend:it,mode=max
-            type=inline
+          tags: danswer/danswer-backend:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max

      - name: Build Model Server Docker image
        uses: ./.github/actions/custom-build-and-push
@ -54,11 +64,11 @@ jobs:
          context: ./backend
          file: ./backend/Dockerfile.model_server
          platforms: linux/amd64
-          tags: danswer/danswer-model-server:it
-          cache-from: type=registry,ref=danswer/danswer-model-server:it
-          cache-to: |
-            type=registry,ref=danswer/danswer-model-server:it,mode=max
-            type=inline
+          tags: danswer/danswer-model-server:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max

      - name: Build integration test Docker image
        uses: ./.github/actions/custom-build-and-push
@ -66,11 +76,11 @@ jobs:
          context: ./backend
          file: ./backend/tests/integration/Dockerfile
          platforms: linux/amd64
-          tags: danswer/integration-test-runner:it
-          cache-from: type=registry,ref=danswer/integration-test-runner:it
-          cache-to: |
-            type=registry,ref=danswer/integration-test-runner:it,mode=max
-            type=inline
+          tags: danswer/danswer-integration:test
+          push: false
+          load: true
+          cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }}
+          cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max

      - name: Start Docker containers
        run: |
@ -79,7 +89,7 @@ jobs:
          AUTH_TYPE=basic \
          REQUIRE_EMAIL_VERIFICATION=false \
          DISABLE_TELEMETRY=true \
-          IMAGE_TAG=it \
+          IMAGE_TAG=test \
          docker compose -f docker-compose.dev.yml -p danswer-stack up -d
        id: start_docker

@ -131,7 +141,7 @@ jobs:
            -e API_SERVER_HOST=api_server \
            -e OPENAI_API_KEY=${OPENAI_API_KEY} \
            -e TEST_WEB_HOSTNAME=test-runner \
-            danswer/integration-test-runner:it
+            danswer/danswer-integration:test
        continue-on-error: true
        id: run_tests

--- a/backend/tests/integration/tests/dev_apis/test_knowledge_chat.py
+++ b/backend/tests/integration/tests/dev_apis/test_knowledge_chat.py
@ -71,8 +71,8 @@ def test_all_stream_chat_message_objects_outputs(reset: None) -> None:
    answer_1 = response_json["answer"]
    assert "blue" in answer_1.lower()

-    # check that the llm selected a document
-    assert 0 in response_json["llm_selected_doc_indices"]
+    # FLAKY - check that the llm selected a document
+    # assert 0 in response_json["llm_selected_doc_indices"]

    # check that the final context documents are correct
    # (it should contain all documents because there arent enough to exclude any)
@ -80,8 +80,8 @@ def test_all_stream_chat_message_objects_outputs(reset: None) -> None:
    assert 1 in response_json["final_context_doc_indices"]
    assert 2 in response_json["final_context_doc_indices"]

-    # check that the cited documents are correct
-    assert cc_pair_1.documents[0].id in response_json["cited_documents"].values()
+    # FLAKY - check that the cited documents are correct
+    # assert cc_pair_1.documents[0].id in response_json["cited_documents"].values()

    # check that the top documents are correct
    assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[0].id
@ -117,8 +117,8 @@ def test_all_stream_chat_message_objects_outputs(reset: None) -> None:
    answer_2 = response_json["answer"]
    assert "red" in answer_2.lower()

-    # check that the llm selected a document
-    assert 0 in response_json["llm_selected_doc_indices"]
+    # FLAKY - check that the llm selected a document
+    # assert 0 in response_json["llm_selected_doc_indices"]

    # check that the final context documents are correct
    # (it should contain all documents because there arent enough to exclude any)
@ -126,8 +126,8 @@ def test_all_stream_chat_message_objects_outputs(reset: None) -> None:
    assert 1 in response_json["final_context_doc_indices"]
    assert 2 in response_json["final_context_doc_indices"]

-    # check that the cited documents are correct
-    assert cc_pair_1.documents[1].id in response_json["cited_documents"].values()
+    # FLAKY - check that the cited documents are correct
+    # assert cc_pair_1.documents[1].id in response_json["cited_documents"].values()

    # check that the top documents are correct
    assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[1].id
@ -171,8 +171,8 @@ def test_all_stream_chat_message_objects_outputs(reset: None) -> None:
    answer_3 = response_json["answer"]
    assert "green" in answer_3.lower()

-    # check that the llm selected a document
-    assert 0 in response_json["llm_selected_doc_indices"]
+    # FLAKY - check that the llm selected a document
+    # assert 0 in response_json["llm_selected_doc_indices"]

    # check that the final context documents are correct
    # (it should contain all documents because there arent enough to exclude any)
@ -180,8 +180,8 @@ def test_all_stream_chat_message_objects_outputs(reset: None) -> None:
    assert 1 in response_json["final_context_doc_indices"]
    assert 2 in response_json["final_context_doc_indices"]

-    # check that the cited documents are correct
-    assert cc_pair_1.documents[2].id in response_json["cited_documents"].values()
+    # FLAKY - check that the cited documents are correct
+    # assert cc_pair_1.documents[2].id in response_json["cited_documents"].values()

    # check that the top documents are correct
    assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[2].id