diff --git a/.github/workflows/docker-build-push-backend-container-on-tag.yml b/.github/workflows/docker-build-push-backend-container-on-tag.yml index 9ee9d3152fca..92f3846b2a52 100644 --- a/.github/workflows/docker-build-push-backend-container-on-tag.yml +++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml @@ -8,17 +8,42 @@ on: env: REGISTRY_IMAGE: ${{ contains(github.ref_name, 'cloud') && 'onyxdotapp/onyx-backend-cloud' || 'onyxdotapp/onyx-backend' }} LATEST_TAG: ${{ contains(github.ref_name, 'latest') }} + DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }} jobs: build-and-push: # TODO: investigate a matrix build like the web container # See https://runs-on.com/runners/linux/ - runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}"] - + runs-on: + - runs-on + - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }} + - run-id=${{ github.run_id }} + - tag=platform-${{ matrix.platform }} + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + - name: Checkout code uses: actions/checkout@v4 + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_IMAGE }} + tags: | + type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} + type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }} + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -34,19 +59,75 @@ jobs: sudo apt-get install -y build-essential - name: Backend Image Docker Build and Push + id: build uses: docker/build-push-action@v5 with: context: ./backend file: ./backend/Dockerfile - platforms: linux/amd64,linux/arm64 + platforms: ${{ matrix.platform }} push: true - tags: | - ${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} - ${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }} build-args: | ONYX_VERSION=${{ github.ref_name }} - no-cache: true + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true + cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: backend-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + merge: + runs-on: ubuntu-latest + needs: + - build-and-push + steps: + # Needed for trivyignore + - name: Checkout + uses: actions/checkout@v4 + + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: backend-digests-*-${{ github.run_id }} + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_IMAGE }} + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_TOKEN }} + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }} + # trivy has their own rate limiting issues causing this action to flake # we worked around it by hardcoding to different db repos in env # can re-enable when they figure it out diff --git a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml index 65e72c4aa7c1..05bece322fb3 100644 --- a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml +++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml @@ -9,7 +9,8 @@ on: env: REGISTRY_IMAGE: onyxdotapp/onyx-web-server-cloud LATEST_TAG: ${{ contains(github.ref_name, 'latest') }} - + DEPLOYMENT: cloud + jobs: build: runs-on: @@ -72,8 +73,10 @@ jobs: NODE_OPTIONS=--max-old-space-size=8192 labels: ${{ steps.meta.outputs.labels }} outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true + cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/cloudweb-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max # no-cache needed due to weird interactions with the builds for different platforms - no-cache: true + # NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off - name: Export digest run: | @@ -84,7 +87,7 @@ jobs: - name: Upload digest uses: actions/upload-artifact@v4 with: - name: digests-${{ env.PLATFORM_PAIR }} + name: cloudweb-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }} path: /tmp/digests/* if-no-files-found: error retention-days: 1 @@ -98,7 +101,7 @@ jobs: uses: actions/download-artifact@v4 with: path: /tmp/digests - pattern: digests-* + pattern: cloudweb-digests-*-${{ github.run_id }} merge-multiple: true - name: Set up Docker Buildx diff --git a/.github/workflows/docker-build-push-model-server-container-on-tag.yml b/.github/workflows/docker-build-push-model-server-container-on-tag.yml index 6f663192e388..8a930851edad 100644 --- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml +++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml @@ -10,7 +10,8 @@ env: LATEST_TAG: ${{ contains(github.ref_name, 'latest') }} DOCKER_BUILDKIT: 1 BUILDKIT_PROGRESS: plain - + DEPLOYMENT: ${{ contains(github.ref_name, 'cloud') && 'cloud' || 'standalone' }} + jobs: # Bypassing this for now as the idea of not building is glitching @@ -51,6 +52,8 @@ jobs: if: needs.check_model_server_changes.outputs.changed == 'true' runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-amd64"] + env: + PLATFORM_PAIR: linux-amd64 steps: - name: Checkout code uses: actions/checkout@v4 @@ -86,13 +89,17 @@ jobs: DANSWER_VERSION=${{ github.ref_name }} outputs: type=registry provenance: false - no-cache: true + cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max +# no-cache: true build-arm64: needs: [check_model_server_changes] if: needs.check_model_server_changes.outputs.changed == 'true' runs-on: [runs-on, runner=8cpu-linux-x64, "run-id=${{ github.run_id }}-arm64"] + env: + PLATFORM_PAIR: linux-arm64 steps: - name: Checkout code uses: actions/checkout@v4 @@ -128,7 +135,8 @@ jobs: DANSWER_VERSION=${{ github.ref_name }} outputs: type=registry provenance: false - no-cache: true + cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max merge-and-scan: needs: [build-amd64, build-arm64, check_model_server_changes] diff --git a/.github/workflows/docker-build-push-web-container-on-tag.yml b/.github/workflows/docker-build-push-web-container-on-tag.yml index 20f28ff7c3f5..3700f91d014f 100644 --- a/.github/workflows/docker-build-push-web-container-on-tag.yml +++ b/.github/workflows/docker-build-push-web-container-on-tag.yml @@ -8,7 +8,8 @@ on: env: REGISTRY_IMAGE: onyxdotapp/onyx-web-server LATEST_TAG: ${{ contains(github.ref_name, 'latest') }} - + DEPLOYMENT: standalone + jobs: build: runs-on: @@ -64,9 +65,11 @@ jobs: labels: ${{ steps.meta.outputs.labels }} outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true - # needed due to weird interactions with the builds for different platforms - no-cache: true - + cache-from: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/${{ env.DEPLOYMENT }}/web-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max + # no-cache needed due to weird interactions with the builds for different platforms + # NOTE(rkuo): this may not be true any more with the proper cache prefixing by architecture - currently testing with it off + - name: Export digest run: | mkdir -p /tmp/digests @@ -76,7 +79,7 @@ jobs: - name: Upload digest uses: actions/upload-artifact@v4 with: - name: digests-${{ env.PLATFORM_PAIR }} + name: web-digests-${{ env.PLATFORM_PAIR }}-${{ github.run_id }} path: /tmp/digests/* if-no-files-found: error retention-days: 1 @@ -90,7 +93,7 @@ jobs: uses: actions/download-artifact@v4 with: path: /tmp/digests - pattern: digests-* + pattern: web-digests-*-${{ github.run_id }} merge-multiple: true - name: Set up Docker Buildx diff --git a/.github/workflows/pr-integration-tests.yml b/.github/workflows/pr-integration-tests.yml index d95c4dd81845..62535fd13ec2 100644 --- a/.github/workflows/pr-integration-tests.yml +++ b/.github/workflows/pr-integration-tests.yml @@ -16,6 +16,7 @@ env: CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }} CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }} CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }} + PLATFORM_PAIR: linux-amd64 jobs: integration-tests: @@ -61,9 +62,8 @@ jobs: tags: onyxdotapp/onyx-backend:test push: false load: true - no-cache: true -# cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} - cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max + cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max - name: Build Model Server Docker image uses: ./.github/actions/custom-build-and-push @@ -74,9 +74,8 @@ jobs: tags: onyxdotapp/onyx-model-server:test push: false load: true - no-cache: true -# cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} - cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max + cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max - name: Build integration test Docker image uses: ./.github/actions/custom-build-and-push @@ -87,9 +86,8 @@ jobs: tags: onyxdotapp/onyx-integration:test push: false load: true - no-cache: true -# cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} - cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max + cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max # Start containers for multi-tenant tests - name: Start Docker containers for multi-tenant tests diff --git a/.github/workflows/pr-mit-integration-tests.yml b/.github/workflows/pr-mit-integration-tests.yml index 2f4ede4a697f..b80bb6102b79 100644 --- a/.github/workflows/pr-mit-integration-tests.yml +++ b/.github/workflows/pr-mit-integration-tests.yml @@ -16,7 +16,7 @@ env: CONFLUENCE_TEST_SPACE_URL: ${{ secrets.CONFLUENCE_TEST_SPACE_URL }} CONFLUENCE_USER_NAME: ${{ secrets.CONFLUENCE_USER_NAME }} CONFLUENCE_ACCESS_TOKEN: ${{ secrets.CONFLUENCE_ACCESS_TOKEN }} - + PLATFORM_PAIR: linux-amd64 jobs: integration-tests-mit: # See https://runs-on.com/runners/linux/ @@ -61,9 +61,8 @@ jobs: tags: onyxdotapp/onyx-backend:test push: false load: true - no-cache: true -# cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} - cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max + cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/backend-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max - name: Build Model Server Docker image uses: ./.github/actions/custom-build-and-push @@ -74,9 +73,8 @@ jobs: tags: onyxdotapp/onyx-model-server:test push: false load: true - no-cache: true -# cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} - cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max + cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/model-server-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max - name: Build integration test Docker image uses: ./.github/actions/custom-build-and-push @@ -87,9 +85,8 @@ jobs: tags: onyxdotapp/onyx-integration:test push: false load: true - no-cache: true -# cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} - cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max + cache-from: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/mit-integration-tests/integration-${{ env.PLATFORM_PAIR }}/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max # NOTE: Use pre-ping/null pool to reduce flakiness due to dropped connections - name: Start Docker containers diff --git a/backend/onyx/background/celery/tasks/indexing/tasks.py b/backend/onyx/background/celery/tasks/indexing/tasks.py index d8465f026a39..eb7324455e6f 100644 --- a/backend/onyx/background/celery/tasks/indexing/tasks.py +++ b/backend/onyx/background/celery/tasks/indexing/tasks.py @@ -507,7 +507,7 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None: search_settings_instance.id ) if redis_connector_index.fenced: - task_logger.info( + task_logger.debug( f"check_for_indexing - Skipping fenced connector: " f"cc_pair={cc_pair_id} search_settings={search_settings_instance.id}" ) @@ -529,14 +529,14 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None: secondary_index_building=len(search_settings_list) > 1, db_session=db_session, ): - task_logger.info( + task_logger.debug( f"check_for_indexing - Not indexing cc_pair_id: {cc_pair_id} " f"search_settings={search_settings_instance.id}, " f"secondary_index_building={len(search_settings_list) > 1}" ) continue else: - task_logger.info( + task_logger.debug( f"check_for_indexing - Will index cc_pair_id: {cc_pair_id} " f"search_settings={search_settings_instance.id}, " f"secondary_index_building={len(search_settings_list) > 1}"