diff --git a/.github/workflows/docker-build-push-backend-container-on-tag.yml b/.github/workflows/docker-build-push-backend-container-on-tag.yml index e95c143fb..c8d5112e2 100644 --- a/.github/workflows/docker-build-push-backend-container-on-tag.yml +++ b/.github/workflows/docker-build-push-backend-container-on-tag.yml @@ -14,16 +14,16 @@ jobs: uses: actions/checkout@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Login to Docker Hub - uses: docker/login-action@v1 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_TOKEN }} - name: Backend Image Docker Build and Push - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v5 with: context: ./backend file: ./backend/Dockerfile @@ -38,5 +38,7 @@ jobs: - name: Run Trivy vulnerability scanner uses: aquasecurity/trivy-action@master with: + # To run locally: trivy image --severity HIGH,CRITICAL danswer/danswer-backend image-ref: docker.io/danswer/danswer-backend:${{ github.ref_name }} severity: 'CRITICAL,HIGH' + trivyignores: ./backend/.trivyignore diff --git a/.github/workflows/docker-build-push-model-server-container-on-tag.yml b/.github/workflows/docker-build-push-model-server-container-on-tag.yml index ddc5f5a28..104b891d7 100644 --- a/.github/workflows/docker-build-push-model-server-container-on-tag.yml +++ b/.github/workflows/docker-build-push-model-server-container-on-tag.yml @@ -14,16 +14,16 @@ jobs: uses: actions/checkout@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Login to Docker Hub - uses: docker/login-action@v1 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_TOKEN }} - name: Model Server Image Docker Build and Push - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v5 with: context: ./backend file: ./backend/Dockerfile.model_server diff --git a/.github/workflows/docker-build-push-web-container-on-tag.yml b/.github/workflows/docker-build-push-web-container-on-tag.yml index d848fc189..8f0783f13 100644 --- a/.github/workflows/docker-build-push-web-container-on-tag.yml +++ b/.github/workflows/docker-build-push-web-container-on-tag.yml @@ -5,38 +5,114 @@ on: tags: - '*' +env: + REGISTRY_IMAGE: danswer/danswer-web-server + jobs: - build-and-push: + build: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 steps: - - name: Checkout code - uses: actions/checkout@v2 + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Checkout + uses: actions/checkout@v4 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_IMAGE }} + tags: | + type=raw,value=danswer/danswer-web-server:${{ github.ref_name }} + type=raw,value=danswer/danswer-web-server:latest + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_TOKEN }} + + - name: Build and push by digest + id: build + uses: docker/build-push-action@v5 + with: + context: ./web + file: ./web/Dockerfile + platforms: ${{ matrix.platform }} + push: true + build-args: | + DANSWER_VERSION=${{ github.ref_name }} + # needed due to weird interactions with the builds for different platforms + no-cache: true + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + merge: + runs-on: ubuntu-latest + needs: + - build + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: digests-* + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_IMAGE }} + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_TOKEN }} + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }} - - name: Login to Docker Hub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_TOKEN }} - - - name: Web Image Docker Build and Push - uses: docker/build-push-action@v2 - with: - context: ./web - file: ./web/Dockerfile - platforms: linux/amd64,linux/arm64 - push: true - tags: | - danswer/danswer-web-server:${{ github.ref_name }} - danswer/danswer-web-server:latest - build-args: | - DANSWER_VERSION=${{ github.ref_name }} - - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@master - with: - image-ref: docker.io/danswer/danswer-web-server:${{ github.ref_name }} - severity: 'CRITICAL,HIGH' + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + with: + image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} + severity: 'CRITICAL,HIGH' diff --git a/.github/workflows/pr-python-checks.yml b/.github/workflows/pr-python-checks.yml index 792fe4d46..6c604e93d 100644 --- a/.github/workflows/pr-python-checks.yml +++ b/.github/workflows/pr-python-checks.yml @@ -20,10 +20,12 @@ jobs: cache-dependency-path: | backend/requirements/default.txt backend/requirements/dev.txt + backend/requirements/model_server.txt - run: | python -m pip install --upgrade pip pip install -r backend/requirements/default.txt pip install -r backend/requirements/dev.txt + pip install -r backend/requirements/model_server.txt - name: Run MyPy run: | diff --git a/.vscode/launch.template.jsonc b/.vscode/launch.template.jsonc index 067a420dd..c5780a65a 100644 --- a/.vscode/launch.template.jsonc +++ b/.vscode/launch.template.jsonc @@ -11,62 +11,6 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ - { - "name": "API Server", - "type": "python", - "request": "launch", - "module": "uvicorn", - "cwd": "${workspaceFolder}/backend", - "env": { - "LOG_LEVEL": "DEBUG", - "DISABLE_AUTH": "True", - "TYPESENSE_API_KEY": "typesense_api_key", - "DYNAMIC_CONFIG_DIR_PATH": "./dynamic_config_storage" - }, - "args": [ - "danswer.main:app", - "--reload", - "--port", - "8080" - ] - }, - { - "name": "Indexer", - "type": "python", - "request": "launch", - "program": "danswer/background/update.py", - "cwd": "${workspaceFolder}/backend", - "env": { - "LOG_LEVEL": "DEBUG", - "PYTHONPATH": ".", - "TYPESENSE_API_KEY": "typesense_api_key", - "DYNAMIC_CONFIG_DIR_PATH": "./dynamic_config_storage" - } - }, - { - "name": "Temp File Deletion", - "type": "python", - "request": "launch", - "program": "danswer/background/file_deletion.py", - "cwd": "${workspaceFolder}/backend", - "env": { - "LOG_LEVEL": "DEBUG", - "PYTHONPATH": "${workspaceFolder}/backend" - } - }, - // For the listner to access the Slack API, - // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project - { - "name": "Slack Bot Listener", - "type": "python", - "request": "launch", - "program": "danswer/listeners/slack_listener.py", - "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.env", - "env": { - "LOG_LEVEL": "DEBUG" - } - }, { "name": "Web Server", "type": "node", @@ -77,6 +21,85 @@ "run", "dev" ], "console": "integratedTerminal" + }, + { + "name": "Model Server", + "type": "python", + "request": "launch", + "module": "uvicorn", + "cwd": "${workspaceFolder}/backend", + "env": { + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1" + }, + "args": [ + "model_server.main:app", + "--reload", + "--port", + "9000" + ] + }, + { + "name": "API Server", + "type": "python", + "request": "launch", + "module": "uvicorn", + "cwd": "${workspaceFolder}/backend", + "env": { + "LOG_ALL_MODEL_INTERACTIONS": "True", + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1" + }, + "args": [ + "danswer.main:app", + "--reload", + "--port", + "8080" + ] + }, + { + "name": "Indexing", + "type": "python", + "request": "launch", + "program": "danswer/background/update.py", + "cwd": "${workspaceFolder}/backend", + "env": { + "ENABLE_MINI_CHUNK": "false", + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." + } + }, + // Celery and all async jobs, usually would include indexing as well but this is handled separately above for dev + { + "name": "Background Jobs", + "type": "python", + "request": "launch", + "program": "scripts/dev_run_background_jobs.py", + "cwd": "${workspaceFolder}/backend", + "env": { + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." + }, + "args": [ + "--no-indexing" + ] + }, + // For the listner to access the Slack API, + // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project + { + "name": "Slack Bot", + "type": "python", + "request": "launch", + "program": "danswer/danswerbot/slack/listener.py", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.env", + "env": { + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." + } } ] } \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b891c11f2..7e80baeb2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -72,15 +72,20 @@ For convenience here's a command for it: python -m venv .venv source .venv/bin/activate ``` -_For Windows activate via:_ +_For Windows, activate the virtual environment using Command Prompt:_ ```bash .venv\Scripts\activate ``` +If using PowerShell, the command slightly differs: +```powershell +.venv\Scripts\Activate.ps1 +``` Install the required python dependencies: ```bash pip install -r danswer/backend/requirements/default.txt pip install -r danswer/backend/requirements/dev.txt +pip install -r danswer/backend/requirements/model_server.txt ``` Install [Node.js and npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) for the frontend. @@ -108,26 +113,24 @@ docker compose -f docker-compose.dev.yml -p danswer-stack up -d index relational (index refers to Vespa and relational_db refers to Postgres) #### Running Danswer - -Setup a folder to store config. Navigate to `danswer/backend` and run: -```bash -mkdir dynamic_config_storage -``` - To start the frontend, navigate to `danswer/web` and run: ```bash npm run dev ``` -Package the Vespa schema. This will only need to be done when the Vespa schema is updated locally. - -Navigate to `danswer/backend/danswer/document_index/vespa/app_config` and run: +Next, start the model server which runs the local NLP models. +Navigate to `danswer/backend` and run: ```bash -zip -r ../vespa-app.zip . +uvicorn model_server.main:app --reload --port 9000 +``` +_For Windows (for compatibility with both PowerShell and Command Prompt):_ +```bash +powershell -Command " + uvicorn model_server.main:app --reload --port 9000 +" ``` -- Note: If you don't have the `zip` utility, you will need to install it prior to running the above -The first time running Danswer, you will also need to run the DB migrations for Postgres. +The first time running Danswer, you will need to run the DB migrations for Postgres. After the first time, this is no longer required unless the DB models change. Navigate to `danswer/backend` and with the venv active, run: @@ -145,17 +148,12 @@ python ./scripts/dev_run_background_jobs.py To run the backend API server, navigate back to `danswer/backend` and run: ```bash -AUTH_TYPE=disabled \ -DYNAMIC_CONFIG_DIR_PATH=./dynamic_config_storage \ -VESPA_DEPLOYMENT_ZIP=./danswer/document_index/vespa/vespa-app.zip \ -uvicorn danswer.main:app --reload --port 8080 +AUTH_TYPE=disabled uvicorn danswer.main:app --reload --port 8080 ``` _For Windows (for compatibility with both PowerShell and Command Prompt):_ ```bash powershell -Command " $env:AUTH_TYPE='disabled' - $env:DYNAMIC_CONFIG_DIR_PATH='./dynamic_config_storage' - $env:VESPA_DEPLOYMENT_ZIP='./danswer/document_index/vespa/vespa-app.zip' uvicorn danswer.main:app --reload --port 8080 " ``` @@ -174,20 +172,16 @@ pre-commit install Additionally, we use `mypy` for static type checking. Danswer is fully type-annotated, and we would like to keep it that way! -Right now, there is no automated type checking at the moment (coming soon), but we ask you to manually run it before -creating a pull requests with `python -m mypy .` from the `danswer/backend` directory. +To run the mypy checks manually, run `python -m mypy .` from the `danswer/backend` directory. #### Web We use `prettier` for formatting. The desired version (2.8.8) will be installed via a `npm i` from the `danswer/web` directory. To run the formatter, use `npx prettier --write .` from the `danswer/web` directory. -Like `mypy`, we have no automated formatting yet (coming soon), but we request that, for now, -you run this manually before creating a pull request. +Please double check that prettier passes before creating a pull request. ### Release Process Danswer follows the semver versioning standard. A set of Docker containers will be pushed automatically to DockerHub with every tag. You can see the containers [here](https://hub.docker.com/search?q=danswer%2F). - -As pre-1.0 software, even patch releases may contain breaking or non-backwards-compatible changes. diff --git a/README.md b/README.md index 34945c569..edd8328c3 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@

-

Open Source Unified Search and Gen-AI Chat with your Docs.

+

Open Source Gen-AI Chat + Unified Search.

@@ -22,16 +22,17 @@

-[Danswer](https://www.danswer.ai/) lets you ask questions in natural language questions and get back -answers based on your team specific documents. Think ChatGPT if it had access to your team's unique -knowledge. Connects to all common workplace tools such as Slack, Google Drive, Confluence, etc. +[Danswer](https://www.danswer.ai/) is the AI Assistant connected to your company's docs, apps, and people. +Danswer provides a Chat interface and plugs into any LLM of your choice. Danswer can be deployed anywhere and for any +scale - on a laptop, on-premise, or to cloud. Since you own the deployment, your user data and chats are fully in your +own control. Danswer is MIT licensed and designed to be modular and easily extensible. The system also comes fully ready +for production usage with user authentication, role management (admin/basic users), chat persistence, and a UI for +configuring Personas (AI Assistants) and their Prompts. -Teams have used Danswer to: -- Speedup customer support and escalation turnaround time. -- Improve Engineering efficiency by making documentation and code changelogs easy to find. -- Let sales team get fuller context and faster in preparation for calls. -- Track customer requests and priorities for Product teams. -- Help teams self-serve IT, Onboarding, HR, etc. +Danswer also serves as a Unified Search across all common workplace tools such as Slack, Google Drive, Confluence, etc. +By combining LLMs and team specific knowledge, Danswer becomes a subject matter expert for the team. Imagine ChatGPT if +it had access to your team's unique knowledge! It enables questions such as "A customer wants feature X, is this already +supported?" or "Where's the pull request for feature Y?"

Usage

@@ -57,19 +58,27 @@ We also have built-in support for deployment on Kubernetes. Files for that can b ## 💃 Main Features +* Chat UI with the ability to select documents to chat with. +* Create custom AI Assistants with different prompts and backing knowledge sets. +* Connect Danswer with LLM of your choice (self-host for a fully airgapped solution). * Document Search + AI Answers for natural language queries. * Connectors to all common workplace tools like Google Drive, Confluence, Slack, etc. -* Chat support (think ChatGPT but it has access to your private knowledge sources). -* Create custom AI Assistants with different prompts and backing knowledge sets. * Slack integration to get answers and search results directly in Slack. +## 🚧 Roadmap +* Chat/Prompt sharing with specific teammates and user groups. +* Multi-Model model support, chat with images, video etc. +* Choosing between LLMs and parameters during chat session. +* Tool calling and agent configurations options. +* Organizational understanding and ability to locate and suggest experts from your team. + + ## Other Noteable Benefits of Danswer -* Best in class Hybrid Search across all sources (BM-25 + prefix aware embedding models). * User Authentication with document level access management. +* Best in class Hybrid Search across all sources (BM-25 + prefix aware embedding models). * Admin Dashboard to configure connectors, document-sets, access, etc. * Custom deep learning models + learn from user feedback. -* Connect Danswer with LLM of your choice for a fully airgapped solution. * Easy deployment and ability to host Danswer anywhere of your choosing. @@ -96,11 +105,5 @@ Efficiently pulls the latest changes from: * Websites * And more ... -## 🚧 Roadmap -* Organizational understanding. -* Ability to locate and suggest experts from your team. -* Code Search -* Structured Query Languages (SQL, Excel formulas, etc.) - ## 💡 Contributing Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details. diff --git a/backend/.gitignore b/backend/.gitignore index 5017ee720..6b3219cc3 100644 --- a/backend/.gitignore +++ b/backend/.gitignore @@ -8,4 +8,4 @@ api_keys.py .env vespa-app.zip dynamic_config_storage/ -celerybeat-schedule +celerybeat-schedule* diff --git a/backend/.trivyignore b/backend/.trivyignore new file mode 100644 index 000000000..e8351b407 --- /dev/null +++ b/backend/.trivyignore @@ -0,0 +1,46 @@ +# https://github.com/madler/zlib/issues/868 +# Pulled in with base Debian image, it's part of the contrib folder but unused +# zlib1g is fine +# Will be gone with Debian image upgrade +# No impact in our settings +CVE-2023-45853 + +# krb5 related, worst case is denial of service by resource exhaustion +# Accept the risk +CVE-2024-26458 +CVE-2024-26461 +CVE-2024-26462 +CVE-2024-26458 +CVE-2024-26461 +CVE-2024-26462 +CVE-2024-26458 +CVE-2024-26461 +CVE-2024-26462 +CVE-2024-26458 +CVE-2024-26461 +CVE-2024-26462 + +# Specific to Firefox which we do not use +# No impact in our settings +CVE-2024-0743 + +# bind9 related, worst case is denial of service by CPU resource exhaustion +# Accept the risk +CVE-2023-50387 +CVE-2023-50868 +CVE-2023-50387 +CVE-2023-50868 + +# libexpat1, XML parsing resource exhaustion +# We don't parse any user provided XMLs +# No impact in our settings +CVE-2023-52425 +CVE-2024-28757 + +# sqlite, only used by NLTK library to grab word lemmatizer and stopwords +# No impact in our settings +CVE-2023-7104 + +# libharfbuzz0b, O(n^2) growth, worst case is denial of service +# Accept the risk +CVE-2023-25193 diff --git a/backend/Dockerfile b/backend/Dockerfile index 2d931083b..533a94b24 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,5 +1,10 @@ FROM python:3.11.7-slim-bookworm +LABEL com.danswer.maintainer="founders@danswer.ai" +LABEL com.danswer.description="This image is for the backend of Danswer. It is MIT Licensed and \ +free for all to use. You can find it at https://hub.docker.com/r/danswer/danswer-backend. For \ +more details, visit https://github.com/danswer-ai/danswer." + # Default DANSWER_VERSION, typically overriden during builds by GitHub Actions. ARG DANSWER_VERSION=0.3-dev ENV DANSWER_VERSION=${DANSWER_VERSION} @@ -12,7 +17,9 @@ RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}" # zip for Vespa step futher down # ca-certificates for HTTPS RUN apt-get update && \ - apt-get install -y cmake curl zip ca-certificates && \ + apt-get install -y cmake curl zip ca-certificates libgnutls30=3.7.9-2+deb12u2 \ + libblkid1=2.38.1-5+deb12u1 libmount1=2.38.1-5+deb12u1 libsmartcols1=2.38.1-5+deb12u1 \ + libuuid1=2.38.1-5+deb12u1 && \ rm -rf /var/lib/apt/lists/* && \ apt-get clean @@ -29,15 +36,25 @@ RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt && \ # xserver-common and xvfb included by playwright installation but not needed after # perl-base is part of the base Python Debian image but not needed for Danswer functionality # perl-base could only be removed with --allow-remove-essential -RUN apt-get remove -y --allow-remove-essential perl-base xserver-common xvfb cmake libldap-2.5-0 libldap-2.5-0 && \ +RUN apt-get remove -y --allow-remove-essential perl-base xserver-common xvfb cmake \ + libldap-2.5-0 libldap-2.5-0 && \ apt-get autoremove -y && \ rm -rf /var/lib/apt/lists/* && \ rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key +# Pre-downloading models for setups with limited egress +RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('intfloat/e5-base-v2')" + +# Pre-downloading NLTK for setups with limited egress +RUN python -c "import nltk; \ +nltk.download('stopwords', quiet=True); \ +nltk.download('wordnet', quiet=True); \ +nltk.download('punkt', quiet=True);" + # Set up application files WORKDIR /app COPY ./danswer /app/danswer -COPY ./shared_models /app/shared_models +COPY ./shared_configs /app/shared_configs COPY ./alembic /app/alembic COPY ./alembic.ini /app/alembic.ini COPY supervisord.conf /usr/etc/supervisord.conf diff --git a/backend/Dockerfile.model_server b/backend/Dockerfile.model_server index 624bdd37f..89f24e2ac 100644 --- a/backend/Dockerfile.model_server +++ b/backend/Dockerfile.model_server @@ -1,5 +1,11 @@ FROM python:3.11.7-slim-bookworm +LABEL com.danswer.maintainer="founders@danswer.ai" +LABEL com.danswer.description="This image is for the Danswer model server which runs all of the \ +AI models for Danswer. This container and all the code is MIT Licensed and free for all to use. \ +You can find it at https://hub.docker.com/r/danswer/danswer-model-server. For more details, \ +visit https://github.com/danswer-ai/danswer." + # Default DANSWER_VERSION, typically overriden during builds by GitHub Actions. ARG DANSWER_VERSION=0.3-dev ENV DANSWER_VERSION=${DANSWER_VERSION} @@ -11,25 +17,26 @@ RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt RUN apt-get remove -y --allow-remove-essential perl-base && \ apt-get autoremove -y -WORKDIR /app +# Pre-downloading models for setups with limited egress +RUN python -c "from transformers import AutoModel, AutoTokenizer, TFDistilBertForSequenceClassification; \ +from huggingface_hub import snapshot_download; \ +AutoTokenizer.from_pretrained('danswer/intent-model'); \ +AutoTokenizer.from_pretrained('intfloat/e5-base-v2'); \ +AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \ +snapshot_download('danswer/intent-model'); \ +snapshot_download('intfloat/e5-base-v2'); \ +snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1')" -# Needed for model configs and defaults -COPY ./danswer/configs /app/danswer/configs -COPY ./danswer/dynamic_configs /app/danswer/dynamic_configs +WORKDIR /app # Utils used by model server COPY ./danswer/utils/logger.py /app/danswer/utils/logger.py -COPY ./danswer/utils/timing.py /app/danswer/utils/timing.py -COPY ./danswer/utils/telemetry.py /app/danswer/utils/telemetry.py # Place to fetch version information COPY ./danswer/__init__.py /app/danswer/__init__.py -# Shared implementations for running NLP models locally -COPY ./danswer/search/search_nlp_models.py /app/danswer/search/search_nlp_models.py - -# Request/Response models -COPY ./shared_models /app/shared_models +# Shared between Danswer Backend and Model Server +COPY ./shared_configs /app/shared_configs # Model Server main code COPY ./model_server /app/model_server diff --git a/backend/alembic/versions/0a2b51deb0b8_add_starter_prompts.py b/backend/alembic/versions/0a2b51deb0b8_add_starter_prompts.py new file mode 100644 index 000000000..caade4441 --- /dev/null +++ b/backend/alembic/versions/0a2b51deb0b8_add_starter_prompts.py @@ -0,0 +1,31 @@ +"""Add starter prompts + +Revision ID: 0a2b51deb0b8 +Revises: 5f4b8568a221 +Create Date: 2024-03-02 23:23:49.960309 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "0a2b51deb0b8" +down_revision = "5f4b8568a221" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "persona", + sa.Column( + "starter_messages", + postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + ), + ) + + +def downgrade() -> None: + op.drop_column("persona", "starter_messages") diff --git a/backend/alembic/versions/0a98909f2757_enable_encrypted_fields.py b/backend/alembic/versions/0a98909f2757_enable_encrypted_fields.py new file mode 100644 index 000000000..29993d1e2 --- /dev/null +++ b/backend/alembic/versions/0a98909f2757_enable_encrypted_fields.py @@ -0,0 +1,113 @@ +"""Enable Encrypted Fields + +Revision ID: 0a98909f2757 +Revises: 570282d33c49 +Create Date: 2024-05-05 19:30:34.317972 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.sql import table +from sqlalchemy.dialects import postgresql +import json + +from danswer.utils.encryption import encrypt_string_to_bytes + +# revision identifiers, used by Alembic. +revision = "0a98909f2757" +down_revision = "570282d33c49" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + connection = op.get_bind() + + op.alter_column("key_value_store", "value", nullable=True) + op.add_column( + "key_value_store", + sa.Column( + "encrypted_value", + sa.LargeBinary, + nullable=True, + ), + ) + + # Need a temporary column to translate the JSONB to binary + op.add_column("credential", sa.Column("temp_column", sa.LargeBinary())) + + creds_table = table( + "credential", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "credential_json", + postgresql.JSONB(astext_type=sa.Text()), + nullable=False, + ), + sa.Column( + "temp_column", + sa.LargeBinary(), + nullable=False, + ), + ) + + results = connection.execute(sa.select(creds_table)) + + # This uses the MIT encrypt which does not actually encrypt the credentials + # In other words, this upgrade does not apply the encryption. Porting existing sensitive data + # and key rotation currently is not supported and will come out in the future + for row_id, creds, _ in results: + creds_binary = encrypt_string_to_bytes(json.dumps(creds)) + connection.execute( + creds_table.update() + .where(creds_table.c.id == row_id) + .values(temp_column=creds_binary) + ) + + op.drop_column("credential", "credential_json") + op.alter_column("credential", "temp_column", new_column_name="credential_json") + + op.add_column("llm_provider", sa.Column("temp_column", sa.LargeBinary())) + + llm_table = table( + "llm_provider", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "api_key", + sa.String(), + nullable=False, + ), + sa.Column( + "temp_column", + sa.LargeBinary(), + nullable=False, + ), + ) + results = connection.execute(sa.select(llm_table)) + + for row_id, api_key, _ in results: + llm_key = encrypt_string_to_bytes(api_key) + connection.execute( + llm_table.update() + .where(llm_table.c.id == row_id) + .values(temp_column=llm_key) + ) + + op.drop_column("llm_provider", "api_key") + op.alter_column("llm_provider", "temp_column", new_column_name="api_key") + + +def downgrade() -> None: + # Some information loss but this is ok. Should not allow decryption via downgrade. + op.drop_column("credential", "credential_json") + op.drop_column("llm_provider", "api_key") + + op.add_column("llm_provider", sa.Column("api_key", sa.String())) + op.add_column( + "credential", + sa.Column("credential_json", postgresql.JSONB(astext_type=sa.Text())), + ) + + op.execute("DELETE FROM key_value_store WHERE value IS NULL") + op.alter_column("key_value_store", "value", nullable=False) + op.drop_column("key_value_store", "encrypted_value") diff --git a/backend/alembic/versions/15326fcec57e_introduce_danswer_apis.py b/backend/alembic/versions/15326fcec57e_introduce_danswer_apis.py index fff7411dc..aecb60c21 100644 --- a/backend/alembic/versions/15326fcec57e_introduce_danswer_apis.py +++ b/backend/alembic/versions/15326fcec57e_introduce_danswer_apis.py @@ -13,8 +13,8 @@ from danswer.configs.constants import DocumentSource # revision identifiers, used by Alembic. revision = "15326fcec57e" down_revision = "77d07dffae64" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/173cae5bba26_port_config_store.py b/backend/alembic/versions/173cae5bba26_port_config_store.py new file mode 100644 index 000000000..a879d4d9b --- /dev/null +++ b/backend/alembic/versions/173cae5bba26_port_config_store.py @@ -0,0 +1,29 @@ +"""Port Config Store + +Revision ID: 173cae5bba26 +Revises: e50154680a5c +Create Date: 2024-03-19 15:30:44.425436 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "173cae5bba26" +down_revision = "e50154680a5c" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "key_value_store", + sa.Column("key", sa.String(), nullable=False), + sa.Column("value", postgresql.JSONB(astext_type=sa.Text()), nullable=False), + sa.PrimaryKeyConstraint("key"), + ) + + +def downgrade() -> None: + op.drop_table("key_value_store") diff --git a/backend/alembic/versions/2666d766cb9b_google_oauth2.py b/backend/alembic/versions/2666d766cb9b_google_oauth2.py index b163fe38b..bcdbd531b 100644 --- a/backend/alembic/versions/2666d766cb9b_google_oauth2.py +++ b/backend/alembic/versions/2666d766cb9b_google_oauth2.py @@ -13,8 +13,8 @@ from alembic import op # revision identifiers, used by Alembic. revision = "2666d766cb9b" down_revision = "6d387b3196c2" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/27c6ecc08586_permission_framework.py b/backend/alembic/versions/27c6ecc08586_permission_framework.py index 2f9d45edb..cd869e2ba 100644 --- a/backend/alembic/versions/27c6ecc08586_permission_framework.py +++ b/backend/alembic/versions/27c6ecc08586_permission_framework.py @@ -13,8 +13,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "27c6ecc08586" down_revision = "2666d766cb9b" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/30c1d5744104_persona_datetime_aware.py b/backend/alembic/versions/30c1d5744104_persona_datetime_aware.py index e0bb440e4..7bced1522 100644 --- a/backend/alembic/versions/30c1d5744104_persona_datetime_aware.py +++ b/backend/alembic/versions/30c1d5744104_persona_datetime_aware.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "30c1d5744104" down_revision = "7f99be1cb9f5" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/3879338f8ba1_add_tool_table.py b/backend/alembic/versions/3879338f8ba1_add_tool_table.py new file mode 100644 index 000000000..f4d5cb78e --- /dev/null +++ b/backend/alembic/versions/3879338f8ba1_add_tool_table.py @@ -0,0 +1,45 @@ +"""Add tool table + +Revision ID: 3879338f8ba1 +Revises: f1c6478c3fd8 +Create Date: 2024-05-11 16:11:23.718084 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "3879338f8ba1" +down_revision = "f1c6478c3fd8" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "tool", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("name", sa.String(), nullable=False), + sa.Column("description", sa.Text(), nullable=True), + sa.Column("in_code_tool_id", sa.String(), nullable=True), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "persona__tool", + sa.Column("persona_id", sa.Integer(), nullable=False), + sa.Column("tool_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["persona_id"], + ["persona.id"], + ), + sa.ForeignKeyConstraint( + ["tool_id"], + ["tool.id"], + ), + sa.PrimaryKeyConstraint("persona_id", "tool_id"), + ) + + +def downgrade() -> None: + op.drop_table("persona__tool") + op.drop_table("tool") diff --git a/backend/alembic/versions/38eda64af7fe_add_chat_session_sharing.py b/backend/alembic/versions/38eda64af7fe_add_chat_session_sharing.py new file mode 100644 index 000000000..efa824612 --- /dev/null +++ b/backend/alembic/versions/38eda64af7fe_add_chat_session_sharing.py @@ -0,0 +1,41 @@ +"""Add chat session sharing + +Revision ID: 38eda64af7fe +Revises: 776b3bbe9092 +Create Date: 2024-03-27 19:41:29.073594 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "38eda64af7fe" +down_revision = "776b3bbe9092" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "chat_session", + sa.Column( + "shared_status", + sa.Enum( + "PUBLIC", + "PRIVATE", + name="chatsessionsharedstatus", + native_enum=False, + ), + nullable=True, + ), + ) + op.execute("UPDATE chat_session SET shared_status='PRIVATE'") + op.alter_column( + "chat_session", + "shared_status", + nullable=False, + ) + + +def downgrade() -> None: + op.drop_column("chat_session", "shared_status") diff --git a/backend/alembic/versions/3b25685ff73c_move_is_public_to_cc_pair.py b/backend/alembic/versions/3b25685ff73c_move_is_public_to_cc_pair.py index 937d926e4..2f02f646b 100644 --- a/backend/alembic/versions/3b25685ff73c_move_is_public_to_cc_pair.py +++ b/backend/alembic/versions/3b25685ff73c_move_is_public_to_cc_pair.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "3b25685ff73c" down_revision = "e0a68a81d434" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/3c5e35aa9af0_polling_document_count.py b/backend/alembic/versions/3c5e35aa9af0_polling_document_count.py index 311031f81..1569e639d 100644 --- a/backend/alembic/versions/3c5e35aa9af0_polling_document_count.py +++ b/backend/alembic/versions/3c5e35aa9af0_polling_document_count.py @@ -12,8 +12,8 @@ from alembic import op # revision identifiers, used by Alembic. revision = "3c5e35aa9af0" down_revision = "27c6ecc08586" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/401c1ac29467_add_tables_for_ui_based_llm_.py b/backend/alembic/versions/401c1ac29467_add_tables_for_ui_based_llm_.py new file mode 100644 index 000000000..dcc766fe2 --- /dev/null +++ b/backend/alembic/versions/401c1ac29467_add_tables_for_ui_based_llm_.py @@ -0,0 +1,49 @@ +"""Add tables for UI-based LLM configuration + +Revision ID: 401c1ac29467 +Revises: 703313b75876 +Create Date: 2024-04-13 18:07:29.153817 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "401c1ac29467" +down_revision = "703313b75876" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "llm_provider", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("name", sa.String(), nullable=False), + sa.Column("api_key", sa.String(), nullable=True), + sa.Column("api_base", sa.String(), nullable=True), + sa.Column("api_version", sa.String(), nullable=True), + sa.Column( + "custom_config", + postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + ), + sa.Column("default_model_name", sa.String(), nullable=False), + sa.Column("fast_default_model_name", sa.String(), nullable=True), + sa.Column("is_default_provider", sa.Boolean(), unique=True, nullable=True), + sa.Column("model_names", postgresql.ARRAY(sa.String()), nullable=True), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("name"), + ) + + op.add_column( + "persona", + sa.Column("llm_model_provider_override", sa.String(), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("persona", "llm_model_provider_override") + + op.drop_table("llm_provider") diff --git a/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py b/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py index 7d6f7f873..f7a996c83 100644 --- a/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py +++ b/backend/alembic/versions/465f78d9b7f9_larger_access_tokens_for_oauth.py @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "465f78d9b7f9" down_revision = "3c5e35aa9af0" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/46625e4745d4_remove_native_enum.py b/backend/alembic/versions/46625e4745d4_remove_native_enum.py index 3f065f5cf..53c0ffdd0 100644 --- a/backend/alembic/versions/46625e4745d4_remove_native_enum.py +++ b/backend/alembic/versions/46625e4745d4_remove_native_enum.py @@ -11,8 +11,8 @@ from sqlalchemy import String # revision identifiers, used by Alembic. revision = "46625e4745d4" down_revision = "9d97fecfab7f" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/4738e4b3bae1_pg_file_store.py b/backend/alembic/versions/4738e4b3bae1_pg_file_store.py new file mode 100644 index 000000000..819d94ddb --- /dev/null +++ b/backend/alembic/versions/4738e4b3bae1_pg_file_store.py @@ -0,0 +1,28 @@ +"""PG File Store + +Revision ID: 4738e4b3bae1 +Revises: e91df4e935ef +Create Date: 2024-03-20 18:53:32.461518 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "4738e4b3bae1" +down_revision = "e91df4e935ef" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "file_store", + sa.Column("file_name", sa.String(), nullable=False), + sa.Column("lobj_oid", sa.Integer(), nullable=False), + sa.PrimaryKeyConstraint("file_name"), + ) + + +def downgrade() -> None: + op.drop_table("file_store") diff --git a/backend/alembic/versions/47433d30de82_create_indexattempt_table.py b/backend/alembic/versions/47433d30de82_create_indexattempt_table.py index 597c698ab..a82dfabe9 100644 --- a/backend/alembic/versions/47433d30de82_create_indexattempt_table.py +++ b/backend/alembic/versions/47433d30de82_create_indexattempt_table.py @@ -11,9 +11,9 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "47433d30de82" -down_revision = None -branch_labels = None -depends_on = None +down_revision: None = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/475fcefe8826_add_name_to_api_key.py b/backend/alembic/versions/475fcefe8826_add_name_to_api_key.py new file mode 100644 index 000000000..e8912e19a --- /dev/null +++ b/backend/alembic/versions/475fcefe8826_add_name_to_api_key.py @@ -0,0 +1,23 @@ +"""Add name to api_key + +Revision ID: 475fcefe8826 +Revises: ecab2b3f1a3b +Create Date: 2024-04-11 11:05:18.414438 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "475fcefe8826" +down_revision = "ecab2b3f1a3b" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column("api_key", sa.Column("name", sa.String(), nullable=True)) + + +def downgrade() -> None: + op.drop_column("api_key", "name") diff --git a/backend/alembic/versions/50b683a8295c_add_additional_retrieval_controls_to_.py b/backend/alembic/versions/50b683a8295c_add_additional_retrieval_controls_to_.py index f2b644ed7..7ad038750 100644 --- a/backend/alembic/versions/50b683a8295c_add_additional_retrieval_controls_to_.py +++ b/backend/alembic/versions/50b683a8295c_add_additional_retrieval_controls_to_.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "50b683a8295c" down_revision = "7da0ae5ad583" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/570282d33c49_track_danswerbot_explicitly.py b/backend/alembic/versions/570282d33c49_track_danswerbot_explicitly.py new file mode 100644 index 000000000..f8c0b6472 --- /dev/null +++ b/backend/alembic/versions/570282d33c49_track_danswerbot_explicitly.py @@ -0,0 +1,27 @@ +"""Track Danswerbot Explicitly + +Revision ID: 570282d33c49 +Revises: 7547d982db8f +Create Date: 2024-05-04 17:49:28.568109 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "570282d33c49" +down_revision = "7547d982db8f" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "chat_session", sa.Column("danswerbot_flow", sa.Boolean(), nullable=True) + ) + op.execute("UPDATE chat_session SET danswerbot_flow = one_shot") + op.alter_column("chat_session", "danswerbot_flow", nullable=False) + + +def downgrade() -> None: + op.drop_column("chat_session", "danswerbot_flow") diff --git a/backend/alembic/versions/57b53544726e_add_document_set_tables.py b/backend/alembic/versions/57b53544726e_add_document_set_tables.py index 719f43f23..b8d37fac8 100644 --- a/backend/alembic/versions/57b53544726e_add_document_set_tables.py +++ b/backend/alembic/versions/57b53544726e_add_document_set_tables.py @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "57b53544726e" down_revision = "800f48024ae9" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/5809c0787398_add_chat_sessions.py b/backend/alembic/versions/5809c0787398_add_chat_sessions.py index 1c5d9e540..0f00ad3b2 100644 --- a/backend/alembic/versions/5809c0787398_add_chat_sessions.py +++ b/backend/alembic/versions/5809c0787398_add_chat_sessions.py @@ -13,8 +13,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "5809c0787398" down_revision = "d929f0c1c6af" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/5e84129c8be3_add_docs_indexed_column_to_index_.py b/backend/alembic/versions/5e84129c8be3_add_docs_indexed_column_to_index_.py index c8ef3f2d9..08285c6cb 100644 --- a/backend/alembic/versions/5e84129c8be3_add_docs_indexed_column_to_index_.py +++ b/backend/alembic/versions/5e84129c8be3_add_docs_indexed_column_to_index_.py @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "5e84129c8be3" down_revision = "e6a4bbc13fe4" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/5f4b8568a221_add_removed_documents_to_index_attempt.py b/backend/alembic/versions/5f4b8568a221_add_removed_documents_to_index_attempt.py new file mode 100644 index 000000000..0721072ad --- /dev/null +++ b/backend/alembic/versions/5f4b8568a221_add_removed_documents_to_index_attempt.py @@ -0,0 +1,27 @@ +"""add removed documents to index_attempt + +Revision ID: 5f4b8568a221 +Revises: dbaa756c2ccf +Create Date: 2024-02-16 15:02:03.319907 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "5f4b8568a221" +down_revision = "8987770549c0" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "index_attempt", + sa.Column("docs_removed_from_index", sa.Integer()), + ) + op.execute("UPDATE index_attempt SET docs_removed_from_index = 0") + + +def downgrade() -> None: + op.drop_column("index_attempt", "docs_removed_from_index") diff --git a/backend/alembic/versions/643a84a42a33_add_user_configured_names_to_llmprovider.py b/backend/alembic/versions/643a84a42a33_add_user_configured_names_to_llmprovider.py new file mode 100644 index 000000000..5ccb6d853 --- /dev/null +++ b/backend/alembic/versions/643a84a42a33_add_user_configured_names_to_llmprovider.py @@ -0,0 +1,45 @@ +"""Add user-configured names to LLMProvider + +Revision ID: 643a84a42a33 +Revises: 0a98909f2757 +Create Date: 2024-05-07 14:54:55.493100 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "643a84a42a33" +down_revision = "0a98909f2757" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column("llm_provider", sa.Column("provider", sa.String(), nullable=True)) + # move "name" -> "provider" to match the new schema + op.execute("UPDATE llm_provider SET provider = name") + # pretty up display name + op.execute("UPDATE llm_provider SET name = 'OpenAI' WHERE name = 'openai'") + op.execute("UPDATE llm_provider SET name = 'Anthropic' WHERE name = 'anthropic'") + op.execute("UPDATE llm_provider SET name = 'Azure OpenAI' WHERE name = 'azure'") + op.execute("UPDATE llm_provider SET name = 'AWS Bedrock' WHERE name = 'bedrock'") + + # update personas to use the new provider names + op.execute( + "UPDATE persona SET llm_model_provider_override = 'OpenAI' WHERE llm_model_provider_override = 'openai'" + ) + op.execute( + "UPDATE persona SET llm_model_provider_override = 'Anthropic' WHERE llm_model_provider_override = 'anthropic'" + ) + op.execute( + "UPDATE persona SET llm_model_provider_override = 'Azure OpenAI' WHERE llm_model_provider_override = 'azure'" + ) + op.execute( + "UPDATE persona SET llm_model_provider_override = 'AWS Bedrock' WHERE llm_model_provider_override = 'bedrock'" + ) + + +def downgrade() -> None: + op.execute("UPDATE llm_provider SET name = provider") + op.drop_column("llm_provider", "provider") diff --git a/backend/alembic/versions/6d387b3196c2_basic_auth.py b/backend/alembic/versions/6d387b3196c2_basic_auth.py index 38bdf5603..8e2ad195b 100644 --- a/backend/alembic/versions/6d387b3196c2_basic_auth.py +++ b/backend/alembic/versions/6d387b3196c2_basic_auth.py @@ -13,8 +13,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "6d387b3196c2" down_revision = "47433d30de82" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py b/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py new file mode 100644 index 000000000..ed1993efe --- /dev/null +++ b/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py @@ -0,0 +1,83 @@ +"""Add TokenRateLimit Tables + +Revision ID: 703313b75876 +Revises: fad14119fb92 +Create Date: 2024-04-15 01:36:02.952809 + +""" +import json +from typing import cast +from alembic import op +import sqlalchemy as sa +from danswer.dynamic_configs.factory import get_dynamic_config_store + +# revision identifiers, used by Alembic. +revision = "703313b75876" +down_revision = "fad14119fb92" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "token_rate_limit", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("enabled", sa.Boolean(), nullable=False), + sa.Column("token_budget", sa.Integer(), nullable=False), + sa.Column("period_hours", sa.Integer(), nullable=False), + sa.Column( + "scope", + sa.String(length=10), + nullable=False, + ), + sa.Column( + "created_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "token_rate_limit__user_group", + sa.Column("rate_limit_id", sa.Integer(), nullable=False), + sa.Column("user_group_id", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["rate_limit_id"], + ["token_rate_limit.id"], + ), + sa.ForeignKeyConstraint( + ["user_group_id"], + ["user_group.id"], + ), + sa.PrimaryKeyConstraint("rate_limit_id", "user_group_id"), + ) + + try: + settings_json = cast( + str, get_dynamic_config_store().load("token_budget_settings") + ) + settings = json.loads(settings_json) + + is_enabled = settings.get("enable_token_budget", False) + token_budget = settings.get("token_budget", -1) + period_hours = settings.get("period_hours", -1) + + if is_enabled and token_budget > 0 and period_hours > 0: + op.execute( + f"INSERT INTO token_rate_limit \ + (enabled, token_budget, period_hours, scope) VALUES \ + ({is_enabled}, {token_budget}, {period_hours}, 'GLOBAL')" + ) + + # Delete the dynamic config + get_dynamic_config_store().delete("token_budget_settings") + + except Exception: + # Ignore if the dynamic config is not found + pass + + +def downgrade() -> None: + op.drop_table("token_rate_limit__user_group") + op.drop_table("token_rate_limit") diff --git a/backend/alembic/versions/70f00c45c0f2_more_descriptive_filestore.py b/backend/alembic/versions/70f00c45c0f2_more_descriptive_filestore.py new file mode 100644 index 000000000..3748553c3 --- /dev/null +++ b/backend/alembic/versions/70f00c45c0f2_more_descriptive_filestore.py @@ -0,0 +1,68 @@ +"""More Descriptive Filestore + +Revision ID: 70f00c45c0f2 +Revises: 3879338f8ba1 +Create Date: 2024-05-17 17:51:41.926893 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "70f00c45c0f2" +down_revision = "3879338f8ba1" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column("file_store", sa.Column("display_name", sa.String(), nullable=True)) + op.add_column( + "file_store", + sa.Column( + "file_origin", + sa.String(), + nullable=False, + server_default="connector", # Default to connector + ), + ) + op.add_column( + "file_store", + sa.Column( + "file_type", sa.String(), nullable=False, server_default="text/plain" + ), + ) + op.add_column( + "file_store", + sa.Column( + "file_metadata", + postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + ), + ) + + op.execute( + """ + UPDATE file_store + SET file_origin = CASE + WHEN file_name LIKE 'chat__%' THEN 'chat_upload' + ELSE 'connector' + END, + file_name = CASE + WHEN file_name LIKE 'chat__%' THEN SUBSTR(file_name, 7) + ELSE file_name + END, + file_type = CASE + WHEN file_name LIKE 'chat__%' THEN 'image/png' + ELSE 'text/plain' + END + """ + ) + + +def downgrade() -> None: + op.drop_column("file_store", "file_metadata") + op.drop_column("file_store", "file_type") + op.drop_column("file_store", "file_origin") + op.drop_column("file_store", "display_name") diff --git a/backend/alembic/versions/72bdc9929a46_permission_auto_sync_framework.py b/backend/alembic/versions/72bdc9929a46_permission_auto_sync_framework.py new file mode 100644 index 000000000..0774651cc --- /dev/null +++ b/backend/alembic/versions/72bdc9929a46_permission_auto_sync_framework.py @@ -0,0 +1,81 @@ +"""Permission Auto Sync Framework + +Revision ID: 72bdc9929a46 +Revises: 475fcefe8826 +Create Date: 2024-04-14 21:15:28.659634 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "72bdc9929a46" +down_revision = "475fcefe8826" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "email_to_external_user_cache", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("external_user_id", sa.String(), nullable=False), + sa.Column("user_id", sa.UUID(), nullable=True), + sa.Column("user_email", sa.String(), nullable=False), + sa.ForeignKeyConstraint( + ["user_id"], + ["user.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "external_permission", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column("user_id", sa.UUID(), nullable=True), + sa.Column("user_email", sa.String(), nullable=False), + sa.Column( + "source_type", + sa.String(), + nullable=False, + ), + sa.Column("external_permission_group", sa.String(), nullable=False), + sa.ForeignKeyConstraint( + ["user_id"], + ["user.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.create_table( + "permission_sync_run", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "source_type", + sa.String(), + nullable=False, + ), + sa.Column("update_type", sa.String(), nullable=False), + sa.Column("cc_pair_id", sa.Integer(), nullable=True), + sa.Column( + "status", + sa.String(), + nullable=False, + ), + sa.Column("error_msg", sa.Text(), nullable=True), + sa.Column( + "updated_at", + sa.DateTime(timezone=True), + server_default=sa.text("now()"), + nullable=False, + ), + sa.ForeignKeyConstraint( + ["cc_pair_id"], + ["connector_credential_pair.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + + +def downgrade() -> None: + op.drop_table("permission_sync_run") + op.drop_table("external_permission") + op.drop_table("email_to_external_user_cache") diff --git a/backend/alembic/versions/7547d982db8f_chat_folders.py b/backend/alembic/versions/7547d982db8f_chat_folders.py new file mode 100644 index 000000000..fc70090fe --- /dev/null +++ b/backend/alembic/versions/7547d982db8f_chat_folders.py @@ -0,0 +1,51 @@ +"""Chat Folders + +Revision ID: 7547d982db8f +Revises: ef7da92f7213 +Create Date: 2024-05-02 15:18:56.573347 + +""" +from alembic import op +import sqlalchemy as sa +import fastapi_users_db_sqlalchemy + +# revision identifiers, used by Alembic. +revision = "7547d982db8f" +down_revision = "ef7da92f7213" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "chat_folder", + sa.Column("id", sa.Integer(), nullable=False), + sa.Column( + "user_id", + fastapi_users_db_sqlalchemy.generics.GUID(), + nullable=True, + ), + sa.Column("name", sa.String(), nullable=True), + sa.Column("display_priority", sa.Integer(), nullable=False), + sa.ForeignKeyConstraint( + ["user_id"], + ["user.id"], + ), + sa.PrimaryKeyConstraint("id"), + ) + op.add_column("chat_session", sa.Column("folder_id", sa.Integer(), nullable=True)) + op.create_foreign_key( + "chat_session_chat_folder_fk", + "chat_session", + "chat_folder", + ["folder_id"], + ["id"], + ) + + +def downgrade() -> None: + op.drop_constraint( + "chat_session_chat_folder_fk", "chat_session", type_="foreignkey" + ) + op.drop_column("chat_session", "folder_id") + op.drop_table("chat_folder") diff --git a/backend/alembic/versions/767f1c2a00eb_count_chat_tokens.py b/backend/alembic/versions/767f1c2a00eb_count_chat_tokens.py index fba6a88ce..7f587bd95 100644 --- a/backend/alembic/versions/767f1c2a00eb_count_chat_tokens.py +++ b/backend/alembic/versions/767f1c2a00eb_count_chat_tokens.py @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "767f1c2a00eb" down_revision = "dba7f71618f5" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/76b60d407dfb_cc_pair_name_not_unique.py b/backend/alembic/versions/76b60d407dfb_cc_pair_name_not_unique.py index 66cedad4a..c609ca4ae 100644 --- a/backend/alembic/versions/76b60d407dfb_cc_pair_name_not_unique.py +++ b/backend/alembic/versions/76b60d407dfb_cc_pair_name_not_unique.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "76b60d407dfb" down_revision = "b156fa702355" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py b/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py new file mode 100644 index 000000000..6fb8c5ac0 --- /dev/null +++ b/backend/alembic/versions/776b3bbe9092_remove_remaining_enums.py @@ -0,0 +1,71 @@ +"""Remove Remaining Enums + +Revision ID: 776b3bbe9092 +Revises: 4738e4b3bae1 +Create Date: 2024-03-22 21:34:27.629444 + +""" +from alembic import op +import sqlalchemy as sa + +from danswer.db.models import IndexModelStatus +from danswer.search.enums import RecencyBiasSetting +from danswer.search.models import SearchType + +# revision identifiers, used by Alembic. +revision = "776b3bbe9092" +down_revision = "4738e4b3bae1" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.alter_column( + "persona", + "search_type", + type_=sa.String, + existing_type=sa.Enum(SearchType, native_enum=False), + existing_nullable=False, + ) + op.alter_column( + "persona", + "recency_bias", + type_=sa.String, + existing_type=sa.Enum(RecencyBiasSetting, native_enum=False), + existing_nullable=False, + ) + + # Because the indexmodelstatus enum does not have a mapping to a string type + # we need this workaround instead of directly changing the type + op.add_column("embedding_model", sa.Column("temp_status", sa.String)) + op.execute("UPDATE embedding_model SET temp_status = status::text") + op.drop_column("embedding_model", "status") + op.alter_column("embedding_model", "temp_status", new_column_name="status") + + op.execute("DROP TYPE IF EXISTS searchtype") + op.execute("DROP TYPE IF EXISTS recencybiassetting") + op.execute("DROP TYPE IF EXISTS indexmodelstatus") + + +def downgrade() -> None: + op.alter_column( + "persona", + "search_type", + type_=sa.Enum(SearchType, native_enum=False), + existing_type=sa.String(length=50), + existing_nullable=False, + ) + op.alter_column( + "persona", + "recency_bias", + type_=sa.Enum(RecencyBiasSetting, native_enum=False), + existing_type=sa.String(length=50), + existing_nullable=False, + ) + op.alter_column( + "embedding_model", + "status", + type_=sa.Enum(IndexModelStatus, native_enum=False), + existing_type=sa.String(length=50), + existing_nullable=False, + ) diff --git a/backend/alembic/versions/77d07dffae64_forcibly_remove_more_enum_types_from_.py b/backend/alembic/versions/77d07dffae64_forcibly_remove_more_enum_types_from_.py index 7193e9d4a..c953feb31 100644 --- a/backend/alembic/versions/77d07dffae64_forcibly_remove_more_enum_types_from_.py +++ b/backend/alembic/versions/77d07dffae64_forcibly_remove_more_enum_types_from_.py @@ -12,8 +12,8 @@ from sqlalchemy import String # revision identifiers, used by Alembic. revision = "77d07dffae64" down_revision = "d61e513bef0a" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/78dbe7e38469_task_tracking.py b/backend/alembic/versions/78dbe7e38469_task_tracking.py index 33eac0c39..d50aaac4c 100644 --- a/backend/alembic/versions/78dbe7e38469_task_tracking.py +++ b/backend/alembic/versions/78dbe7e38469_task_tracking.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "78dbe7e38469" down_revision = "7ccea01261f6" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/79acd316403a_add_api_key_table.py b/backend/alembic/versions/79acd316403a_add_api_key_table.py index b3617883f..3c220e041 100644 --- a/backend/alembic/versions/79acd316403a_add_api_key_table.py +++ b/backend/alembic/versions/79acd316403a_add_api_key_table.py @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "79acd316403a" down_revision = "904e5138fffb" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/7ccea01261f6_store_chat_retrieval_docs.py b/backend/alembic/versions/7ccea01261f6_store_chat_retrieval_docs.py index 24d8ff717..5cd8916d4 100644 --- a/backend/alembic/versions/7ccea01261f6_store_chat_retrieval_docs.py +++ b/backend/alembic/versions/7ccea01261f6_store_chat_retrieval_docs.py @@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "7ccea01261f6" down_revision = "a570b80a5f20" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/7da0ae5ad583_add_description_to_persona.py b/backend/alembic/versions/7da0ae5ad583_add_description_to_persona.py index 3a4d7e0a9..92715acc1 100644 --- a/backend/alembic/versions/7da0ae5ad583_add_description_to_persona.py +++ b/backend/alembic/versions/7da0ae5ad583_add_description_to_persona.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "7da0ae5ad583" down_revision = "e86866a9c78a" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/7da543f5672f_add_slackbotconfig_table.py b/backend/alembic/versions/7da543f5672f_add_slackbotconfig_table.py index 7766f4cf5..372fe5ebb 100644 --- a/backend/alembic/versions/7da543f5672f_add_slackbotconfig_table.py +++ b/backend/alembic/versions/7da543f5672f_add_slackbotconfig_table.py @@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "7da543f5672f" down_revision = "febe9eaa0644" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/7f726bad5367_slack_followup.py b/backend/alembic/versions/7f726bad5367_slack_followup.py index 0eb197840..a060458a3 100644 --- a/backend/alembic/versions/7f726bad5367_slack_followup.py +++ b/backend/alembic/versions/7f726bad5367_slack_followup.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "7f726bad5367" down_revision = "79acd316403a" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/7f99be1cb9f5_add_index_for_getting_documents_just_by_.py b/backend/alembic/versions/7f99be1cb9f5_add_index_for_getting_documents_just_by_.py index b8ac75418..26d19383f 100644 --- a/backend/alembic/versions/7f99be1cb9f5_add_index_for_getting_documents_just_by_.py +++ b/backend/alembic/versions/7f99be1cb9f5_add_index_for_getting_documents_just_by_.py @@ -11,8 +11,8 @@ from alembic import op # revision identifiers, used by Alembic. revision = "7f99be1cb9f5" down_revision = "78dbe7e38469" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/800f48024ae9_add_id_to_connectorcredentialpair.py b/backend/alembic/versions/800f48024ae9_add_id_to_connectorcredentialpair.py index 3074a8af0..c5e8536e0 100644 --- a/backend/alembic/versions/800f48024ae9_add_id_to_connectorcredentialpair.py +++ b/backend/alembic/versions/800f48024ae9_add_id_to_connectorcredentialpair.py @@ -12,8 +12,8 @@ from sqlalchemy.schema import Sequence, CreateSequence # revision identifiers, used by Alembic. revision = "800f48024ae9" down_revision = "767f1c2a00eb" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/80696cf850ae_add_chat_session_to_query_event.py b/backend/alembic/versions/80696cf850ae_add_chat_session_to_query_event.py index 5864dc7c6..2a1b8e978 100644 --- a/backend/alembic/versions/80696cf850ae_add_chat_session_to_query_event.py +++ b/backend/alembic/versions/80696cf850ae_add_chat_session_to_query_event.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "80696cf850ae" down_revision = "15326fcec57e" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/891cd83c87a8_add_is_visible_to_persona.py b/backend/alembic/versions/891cd83c87a8_add_is_visible_to_persona.py index 4711cb091..74ff50d4b 100644 --- a/backend/alembic/versions/891cd83c87a8_add_is_visible_to_persona.py +++ b/backend/alembic/versions/891cd83c87a8_add_is_visible_to_persona.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "891cd83c87a8" down_revision = "76b60d407dfb" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/8987770549c0_add_full_exception_stack_trace.py b/backend/alembic/versions/8987770549c0_add_full_exception_stack_trace.py index 5509c6620..ffb7ba9d8 100644 --- a/backend/alembic/versions/8987770549c0_add_full_exception_stack_trace.py +++ b/backend/alembic/versions/8987770549c0_add_full_exception_stack_trace.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "8987770549c0" down_revision = "ec3ec2eabf7b" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/8aabb57f3b49_restructure_document_indices.py b/backend/alembic/versions/8aabb57f3b49_restructure_document_indices.py index 3ff454ba7..9026b3f97 100644 --- a/backend/alembic/versions/8aabb57f3b49_restructure_document_indices.py +++ b/backend/alembic/versions/8aabb57f3b49_restructure_document_indices.py @@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "8aabb57f3b49" down_revision = "5e84129c8be3" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/8e26726b7683_chat_context_addition.py b/backend/alembic/versions/8e26726b7683_chat_context_addition.py index b70fd1d4b..d4d764304 100644 --- a/backend/alembic/versions/8e26726b7683_chat_context_addition.py +++ b/backend/alembic/versions/8e26726b7683_chat_context_addition.py @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "8e26726b7683" down_revision = "5809c0787398" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/904451035c9b_store_tool_details.py b/backend/alembic/versions/904451035c9b_store_tool_details.py index 9fadae659..46ee24472 100644 --- a/backend/alembic/versions/904451035c9b_store_tool_details.py +++ b/backend/alembic/versions/904451035c9b_store_tool_details.py @@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "904451035c9b" down_revision = "3b25685ff73c" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/904e5138fffb_tags.py b/backend/alembic/versions/904e5138fffb_tags.py index aaf4bd51f..24588eef6 100644 --- a/backend/alembic/versions/904e5138fffb_tags.py +++ b/backend/alembic/versions/904e5138fffb_tags.py @@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "904e5138fffb" down_revision = "891cd83c87a8" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/91fd3b470d1a_remove_documentsource_from_tag.py b/backend/alembic/versions/91fd3b470d1a_remove_documentsource_from_tag.py new file mode 100644 index 000000000..dc8749b9a --- /dev/null +++ b/backend/alembic/versions/91fd3b470d1a_remove_documentsource_from_tag.py @@ -0,0 +1,36 @@ +"""Remove DocumentSource from Tag + +Revision ID: 91fd3b470d1a +Revises: 173cae5bba26 +Create Date: 2024-03-21 12:05:23.956734 + +""" +from alembic import op +import sqlalchemy as sa +from danswer.configs.constants import DocumentSource + +# revision identifiers, used by Alembic. +revision = "91fd3b470d1a" +down_revision = "173cae5bba26" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.alter_column( + "tag", + "source", + type_=sa.String(length=50), + existing_type=sa.Enum(DocumentSource, native_enum=False), + existing_nullable=False, + ) + + +def downgrade() -> None: + op.alter_column( + "tag", + "source", + type_=sa.Enum(DocumentSource, native_enum=False), + existing_type=sa.String(length=50), + existing_nullable=False, + ) diff --git a/backend/alembic/versions/9d97fecfab7f_added_retrieved_docs_to_query_event.py b/backend/alembic/versions/9d97fecfab7f_added_retrieved_docs_to_query_event.py index 088b50731..e91ff3bd1 100644 --- a/backend/alembic/versions/9d97fecfab7f_added_retrieved_docs_to_query_event.py +++ b/backend/alembic/versions/9d97fecfab7f_added_retrieved_docs_to_query_event.py @@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "9d97fecfab7f" down_revision = "ffc707a226b4" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/a3bfd0d64902_add_chosen_assistants_to_user_table.py b/backend/alembic/versions/a3bfd0d64902_add_chosen_assistants_to_user_table.py new file mode 100644 index 000000000..89439adb6 --- /dev/null +++ b/backend/alembic/versions/a3bfd0d64902_add_chosen_assistants_to_user_table.py @@ -0,0 +1,27 @@ +"""Add chosen_assistants to User table + +Revision ID: a3bfd0d64902 +Revises: ec85f2b3c544 +Create Date: 2024-05-26 17:22:24.834741 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "a3bfd0d64902" +down_revision = "ec85f2b3c544" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "user", + sa.Column("chosen_assistants", postgresql.ARRAY(sa.Integer()), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("user", "chosen_assistants") diff --git a/backend/alembic/versions/a570b80a5f20_usergroup_tables.py b/backend/alembic/versions/a570b80a5f20_usergroup_tables.py index 6cbb67569..57827b316 100644 --- a/backend/alembic/versions/a570b80a5f20_usergroup_tables.py +++ b/backend/alembic/versions/a570b80a5f20_usergroup_tables.py @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "a570b80a5f20" down_revision = "904451035c9b" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/ae62505e3acc_add_saml_accounts.py b/backend/alembic/versions/ae62505e3acc_add_saml_accounts.py index db67d0274..e8bc81625 100644 --- a/backend/alembic/versions/ae62505e3acc_add_saml_accounts.py +++ b/backend/alembic/versions/ae62505e3acc_add_saml_accounts.py @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "ae62505e3acc" down_revision = "7da543f5672f" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py b/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py index ec180ccc7..cdf1722b3 100644 --- a/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py +++ b/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py @@ -11,8 +11,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "b082fec533f0" down_revision = "df0c7ad8a076" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/b156fa702355_chat_reworked.py b/backend/alembic/versions/b156fa702355_chat_reworked.py index 05e5fe56a..c80ab6a0f 100644 --- a/backend/alembic/versions/b156fa702355_chat_reworked.py +++ b/backend/alembic/versions/b156fa702355_chat_reworked.py @@ -15,8 +15,8 @@ from danswer.configs.constants import DocumentSource # revision identifiers, used by Alembic. revision = "b156fa702355" down_revision = "baf71f781b9e" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None searchtype_enum = ENUM( diff --git a/backend/alembic/versions/b85f02ec1308_fix_file_type_migration.py b/backend/alembic/versions/b85f02ec1308_fix_file_type_migration.py new file mode 100644 index 000000000..ac17670b7 --- /dev/null +++ b/backend/alembic/versions/b85f02ec1308_fix_file_type_migration.py @@ -0,0 +1,28 @@ +"""fix-file-type-migration + +Revision ID: b85f02ec1308 +Revises: a3bfd0d64902 +Create Date: 2024-05-31 18:09:26.658164 + +""" +from alembic import op + +# revision identifiers, used by Alembic. +revision = "b85f02ec1308" +down_revision = "a3bfd0d64902" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.execute( + """ + UPDATE file_store + SET file_origin = UPPER(file_origin) + """ + ) + + +def downgrade() -> None: + # Let's not break anything on purpose :) + pass diff --git a/backend/alembic/versions/baf71f781b9e_add_llm_model_version_override_to_.py b/backend/alembic/versions/baf71f781b9e_add_llm_model_version_override_to_.py index 1939ae78f..6a1b6adcc 100644 --- a/backend/alembic/versions/baf71f781b9e_add_llm_model_version_override_to_.py +++ b/backend/alembic/versions/baf71f781b9e_add_llm_model_version_override_to_.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "baf71f781b9e" down_revision = "50b683a8295c" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/d5645c915d0e_remove_deletion_attempt_table.py b/backend/alembic/versions/d5645c915d0e_remove_deletion_attempt_table.py index 120d63e16..aa4e7c71c 100644 --- a/backend/alembic/versions/d5645c915d0e_remove_deletion_attempt_table.py +++ b/backend/alembic/versions/d5645c915d0e_remove_deletion_attempt_table.py @@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "d5645c915d0e" down_revision = "8e26726b7683" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/d61e513bef0a_add_total_docs_for_index_attempt.py b/backend/alembic/versions/d61e513bef0a_add_total_docs_for_index_attempt.py index 7cc4bb639..2870b8696 100644 --- a/backend/alembic/versions/d61e513bef0a_add_total_docs_for_index_attempt.py +++ b/backend/alembic/versions/d61e513bef0a_add_total_docs_for_index_attempt.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "d61e513bef0a" down_revision = "46625e4745d4" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/d7111c1238cd_remove_document_ids.py b/backend/alembic/versions/d7111c1238cd_remove_document_ids.py index 6bbb5fa2d..4b40755f1 100644 --- a/backend/alembic/versions/d7111c1238cd_remove_document_ids.py +++ b/backend/alembic/versions/d7111c1238cd_remove_document_ids.py @@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "d7111c1238cd" down_revision = "465f78d9b7f9" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/d929f0c1c6af_feedback_feature.py b/backend/alembic/versions/d929f0c1c6af_feedback_feature.py index e2f4e6ff5..247bce1bc 100644 --- a/backend/alembic/versions/d929f0c1c6af_feedback_feature.py +++ b/backend/alembic/versions/d929f0c1c6af_feedback_feature.py @@ -13,8 +13,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "d929f0c1c6af" down_revision = "8aabb57f3b49" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/dba7f71618f5_danswer_custom_tool_flow.py b/backend/alembic/versions/dba7f71618f5_danswer_custom_tool_flow.py index bef3657de..7512038cd 100644 --- a/backend/alembic/versions/dba7f71618f5_danswer_custom_tool_flow.py +++ b/backend/alembic/versions/dba7f71618f5_danswer_custom_tool_flow.py @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "dba7f71618f5" down_revision = "d5645c915d0e" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/dbaa756c2ccf_embedding_models.py b/backend/alembic/versions/dbaa756c2ccf_embedding_models.py index c7b6fd58d..a7c9b8f5a 100644 --- a/backend/alembic/versions/dbaa756c2ccf_embedding_models.py +++ b/backend/alembic/versions/dbaa756c2ccf_embedding_models.py @@ -9,18 +9,18 @@ from alembic import op import sqlalchemy as sa from sqlalchemy import table, column, String, Integer, Boolean -from danswer.configs.model_configs import DOCUMENT_ENCODER_MODEL -from danswer.configs.model_configs import DOC_EMBEDDING_DIM -from danswer.configs.model_configs import NORMALIZE_EMBEDDINGS -from danswer.configs.model_configs import ASYM_QUERY_PREFIX -from danswer.configs.model_configs import ASYM_PASSAGE_PREFIX +from danswer.db.embedding_model import ( + get_new_default_embedding_model, + get_old_default_embedding_model, + user_has_overridden_embedding_model, +) from danswer.db.models import IndexModelStatus # revision identifiers, used by Alembic. revision = "dbaa756c2ccf" down_revision = "7f726bad5367" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: @@ -40,6 +40,9 @@ def upgrade() -> None: ), sa.PrimaryKeyConstraint("id"), ) + # since all index attempts must be associated with an embedding model, + # need to put something in here to avoid nulls. On server startup, + # this value will be overriden EmbeddingModel = table( "embedding_model", column("id", Integer), @@ -53,20 +56,44 @@ def upgrade() -> None: "status", sa.Enum(IndexModelStatus, name="indexmodelstatus", native=False) ), ) + # insert an embedding model row that corresponds to the embedding model + # the user selected via env variables before this change. This is needed since + # all index_attempts must be associated with an embedding model, so without this + # we will run into violations of non-null contraints + old_embedding_model = get_old_default_embedding_model() op.bulk_insert( EmbeddingModel, [ { - "model_name": DOCUMENT_ENCODER_MODEL, - "model_dim": DOC_EMBEDDING_DIM, - "normalize": NORMALIZE_EMBEDDINGS, - "query_prefix": ASYM_QUERY_PREFIX, - "passage_prefix": ASYM_PASSAGE_PREFIX, - "index_name": "danswer_chunk", - "status": IndexModelStatus.PRESENT, + "model_name": old_embedding_model.model_name, + "model_dim": old_embedding_model.model_dim, + "normalize": old_embedding_model.normalize, + "query_prefix": old_embedding_model.query_prefix, + "passage_prefix": old_embedding_model.passage_prefix, + "index_name": old_embedding_model.index_name, + "status": old_embedding_model.status, } ], ) + # if the user has not overridden the default embedding model via env variables, + # insert the new default model into the database to auto-upgrade them + if not user_has_overridden_embedding_model(): + new_embedding_model = get_new_default_embedding_model(is_present=False) + op.bulk_insert( + EmbeddingModel, + [ + { + "model_name": new_embedding_model.model_name, + "model_dim": new_embedding_model.model_dim, + "normalize": new_embedding_model.normalize, + "query_prefix": new_embedding_model.query_prefix, + "passage_prefix": new_embedding_model.passage_prefix, + "index_name": new_embedding_model.index_name, + "status": IndexModelStatus.FUTURE, + } + ], + ) + op.add_column( "index_attempt", sa.Column("embedding_model_id", sa.Integer(), nullable=True), diff --git a/backend/alembic/versions/df0c7ad8a076_added_deletion_attempt_table.py b/backend/alembic/versions/df0c7ad8a076_added_deletion_attempt_table.py index 4ccad3ff0..4e3d8ce50 100644 --- a/backend/alembic/versions/df0c7ad8a076_added_deletion_attempt_table.py +++ b/backend/alembic/versions/df0c7ad8a076_added_deletion_attempt_table.py @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "df0c7ad8a076" down_revision = "d7111c1238cd" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/e0a68a81d434_add_chat_feedback.py b/backend/alembic/versions/e0a68a81d434_add_chat_feedback.py index 528711bce..d36bb3f34 100644 --- a/backend/alembic/versions/e0a68a81d434_add_chat_feedback.py +++ b/backend/alembic/versions/e0a68a81d434_add_chat_feedback.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "e0a68a81d434" down_revision = "ae62505e3acc" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/e50154680a5c_no_source_enum.py b/backend/alembic/versions/e50154680a5c_no_source_enum.py new file mode 100644 index 000000000..8a7ccc751 --- /dev/null +++ b/backend/alembic/versions/e50154680a5c_no_source_enum.py @@ -0,0 +1,38 @@ +"""No Source Enum + +Revision ID: e50154680a5c +Revises: fcd135795f21 +Create Date: 2024-03-14 18:06:08.523106 + +""" +from alembic import op +import sqlalchemy as sa + +from danswer.configs.constants import DocumentSource + +# revision identifiers, used by Alembic. +revision = "e50154680a5c" +down_revision = "fcd135795f21" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.alter_column( + "search_doc", + "source_type", + type_=sa.String(length=50), + existing_type=sa.Enum(DocumentSource, native_enum=False), + existing_nullable=False, + ) + op.execute("DROP TYPE IF EXISTS documentsource") + + +def downgrade() -> None: + op.alter_column( + "search_doc", + "source_type", + type_=sa.Enum(DocumentSource, native_enum=False), + existing_type=sa.String(length=50), + existing_nullable=False, + ) diff --git a/backend/alembic/versions/e6a4bbc13fe4_add_index_for_retrieving_latest_index_.py b/backend/alembic/versions/e6a4bbc13fe4_add_index_for_retrieving_latest_index_.py index 3bdb7632b..a95a10897 100644 --- a/backend/alembic/versions/e6a4bbc13fe4_add_index_for_retrieving_latest_index_.py +++ b/backend/alembic/versions/e6a4bbc13fe4_add_index_for_retrieving_latest_index_.py @@ -11,8 +11,8 @@ from alembic import op # revision identifiers, used by Alembic. revision = "e6a4bbc13fe4" down_revision = "b082fec533f0" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/e86866a9c78a_add_persona_to_chat_session.py b/backend/alembic/versions/e86866a9c78a_add_persona_to_chat_session.py index 4a362bc74..97b0d7510 100644 --- a/backend/alembic/versions/e86866a9c78a_add_persona_to_chat_session.py +++ b/backend/alembic/versions/e86866a9c78a_add_persona_to_chat_session.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "e86866a9c78a" down_revision = "80696cf850ae" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/e91df4e935ef_private_personas_documentsets.py b/backend/alembic/versions/e91df4e935ef_private_personas_documentsets.py new file mode 100644 index 000000000..a7eb75a1e --- /dev/null +++ b/backend/alembic/versions/e91df4e935ef_private_personas_documentsets.py @@ -0,0 +1,118 @@ +"""Private Personas DocumentSets + +Revision ID: e91df4e935ef +Revises: 91fd3b470d1a +Create Date: 2024-03-17 11:47:24.675881 + +""" +import fastapi_users_db_sqlalchemy +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "e91df4e935ef" +down_revision = "91fd3b470d1a" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.create_table( + "document_set__user", + sa.Column("document_set_id", sa.Integer(), nullable=False), + sa.Column( + "user_id", + fastapi_users_db_sqlalchemy.generics.GUID(), + nullable=False, + ), + sa.ForeignKeyConstraint( + ["document_set_id"], + ["document_set.id"], + ), + sa.ForeignKeyConstraint( + ["user_id"], + ["user.id"], + ), + sa.PrimaryKeyConstraint("document_set_id", "user_id"), + ) + op.create_table( + "persona__user", + sa.Column("persona_id", sa.Integer(), nullable=False), + sa.Column( + "user_id", + fastapi_users_db_sqlalchemy.generics.GUID(), + nullable=False, + ), + sa.ForeignKeyConstraint( + ["persona_id"], + ["persona.id"], + ), + sa.ForeignKeyConstraint( + ["user_id"], + ["user.id"], + ), + sa.PrimaryKeyConstraint("persona_id", "user_id"), + ) + op.create_table( + "document_set__user_group", + sa.Column("document_set_id", sa.Integer(), nullable=False), + sa.Column( + "user_group_id", + sa.Integer(), + nullable=False, + ), + sa.ForeignKeyConstraint( + ["document_set_id"], + ["document_set.id"], + ), + sa.ForeignKeyConstraint( + ["user_group_id"], + ["user_group.id"], + ), + sa.PrimaryKeyConstraint("document_set_id", "user_group_id"), + ) + op.create_table( + "persona__user_group", + sa.Column("persona_id", sa.Integer(), nullable=False), + sa.Column( + "user_group_id", + sa.Integer(), + nullable=False, + ), + sa.ForeignKeyConstraint( + ["persona_id"], + ["persona.id"], + ), + sa.ForeignKeyConstraint( + ["user_group_id"], + ["user_group.id"], + ), + sa.PrimaryKeyConstraint("persona_id", "user_group_id"), + ) + + op.add_column( + "document_set", + sa.Column("is_public", sa.Boolean(), nullable=True), + ) + # fill in is_public for existing rows + op.execute("UPDATE document_set SET is_public = true WHERE is_public IS NULL") + op.alter_column("document_set", "is_public", nullable=False) + + op.add_column( + "persona", + sa.Column("is_public", sa.Boolean(), nullable=True), + ) + # fill in is_public for existing rows + op.execute("UPDATE persona SET is_public = true WHERE is_public IS NULL") + op.alter_column("persona", "is_public", nullable=False) + + +def downgrade() -> None: + op.drop_column("persona", "is_public") + + op.drop_column("document_set", "is_public") + + op.drop_table("persona__user") + op.drop_table("document_set__user") + op.drop_table("persona__user_group") + op.drop_table("document_set__user_group") diff --git a/backend/alembic/versions/ec3ec2eabf7b_index_from_beginning.py b/backend/alembic/versions/ec3ec2eabf7b_index_from_beginning.py index 9bb4cd53a..623c14060 100644 --- a/backend/alembic/versions/ec3ec2eabf7b_index_from_beginning.py +++ b/backend/alembic/versions/ec3ec2eabf7b_index_from_beginning.py @@ -11,8 +11,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "ec3ec2eabf7b" down_revision = "dbaa756c2ccf" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/ec85f2b3c544_remove_last_attempt_status_from_cc_pair.py b/backend/alembic/versions/ec85f2b3c544_remove_last_attempt_status_from_cc_pair.py new file mode 100644 index 000000000..fe073ce49 --- /dev/null +++ b/backend/alembic/versions/ec85f2b3c544_remove_last_attempt_status_from_cc_pair.py @@ -0,0 +1,31 @@ +"""Remove Last Attempt Status from CC Pair + +Revision ID: ec85f2b3c544 +Revises: 3879338f8ba1 +Create Date: 2024-05-23 21:39:46.126010 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "ec85f2b3c544" +down_revision = "70f00c45c0f2" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.drop_column("connector_credential_pair", "last_attempt_status") + + +def downgrade() -> None: + op.add_column( + "connector_credential_pair", + sa.Column( + "last_attempt_status", + sa.VARCHAR(), + autoincrement=False, + nullable=True, + ), + ) diff --git a/backend/alembic/versions/ecab2b3f1a3b_add_overrides_to_the_chat_session.py b/backend/alembic/versions/ecab2b3f1a3b_add_overrides_to_the_chat_session.py new file mode 100644 index 000000000..ca2b57adb --- /dev/null +++ b/backend/alembic/versions/ecab2b3f1a3b_add_overrides_to_the_chat_session.py @@ -0,0 +1,40 @@ +"""Add overrides to the chat session + +Revision ID: ecab2b3f1a3b +Revises: 38eda64af7fe +Create Date: 2024-04-01 19:08:21.359102 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "ecab2b3f1a3b" +down_revision = "38eda64af7fe" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "chat_session", + sa.Column( + "llm_override", + postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + ), + ) + op.add_column( + "chat_session", + sa.Column( + "prompt_override", + postgresql.JSONB(astext_type=sa.Text()), + nullable=True, + ), + ) + + +def downgrade() -> None: + op.drop_column("chat_session", "prompt_override") + op.drop_column("chat_session", "llm_override") diff --git a/backend/alembic/versions/ef7da92f7213_add_files_to_chatmessage.py b/backend/alembic/versions/ef7da92f7213_add_files_to_chatmessage.py new file mode 100644 index 000000000..eb04a1b82 --- /dev/null +++ b/backend/alembic/versions/ef7da92f7213_add_files_to_chatmessage.py @@ -0,0 +1,27 @@ +"""Add files to ChatMessage + +Revision ID: ef7da92f7213 +Revises: 401c1ac29467 +Create Date: 2024-04-28 16:59:33.199153 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "ef7da92f7213" +down_revision = "401c1ac29467" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "chat_message", + sa.Column("files", postgresql.JSONB(astext_type=sa.Text()), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("chat_message", "files") diff --git a/backend/alembic/versions/f1c6478c3fd8_add_pre_defined_feedback.py b/backend/alembic/versions/f1c6478c3fd8_add_pre_defined_feedback.py new file mode 100644 index 000000000..f6ba0d7dd --- /dev/null +++ b/backend/alembic/versions/f1c6478c3fd8_add_pre_defined_feedback.py @@ -0,0 +1,25 @@ +"""Add pre-defined feedback + +Revision ID: f1c6478c3fd8 +Revises: 643a84a42a33 +Create Date: 2024-05-09 18:11:49.210667 + +""" +from alembic import op +import sqlalchemy as sa + +revision = "f1c6478c3fd8" +down_revision = "643a84a42a33" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "chat_feedback", + sa.Column("predefined_feedback", sa.String(), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("chat_feedback", "predefined_feedback") diff --git a/backend/alembic/versions/fad14119fb92_delete_tags_with_wrong_enum.py b/backend/alembic/versions/fad14119fb92_delete_tags_with_wrong_enum.py new file mode 100644 index 000000000..b9c428640 --- /dev/null +++ b/backend/alembic/versions/fad14119fb92_delete_tags_with_wrong_enum.py @@ -0,0 +1,39 @@ +"""Delete Tags with wrong Enum + +Revision ID: fad14119fb92 +Revises: 72bdc9929a46 +Create Date: 2024-04-25 17:05:09.695703 + +""" +from alembic import op + +# revision identifiers, used by Alembic. +revision = "fad14119fb92" +down_revision = "72bdc9929a46" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + # Some documents may lose their tags but this is the only way as the enum + # mapping may have changed since tag switched to string (it will be reindexed anyway) + op.execute( + """ + DELETE FROM document__tag + WHERE tag_id IN ( + SELECT id FROM tag + WHERE source ~ '^[0-9]+$' + ) + """ + ) + + op.execute( + """ + DELETE FROM tag + WHERE source ~ '^[0-9]+$' + """ + ) + + +def downgrade() -> None: + pass diff --git a/backend/alembic/versions/fcd135795f21_add_slack_bot_display_type.py b/backend/alembic/versions/fcd135795f21_add_slack_bot_display_type.py new file mode 100644 index 000000000..fc7a6f502 --- /dev/null +++ b/backend/alembic/versions/fcd135795f21_add_slack_bot_display_type.py @@ -0,0 +1,39 @@ +"""Add slack bot display type + +Revision ID: fcd135795f21 +Revises: 0a2b51deb0b8 +Create Date: 2024-03-04 17:03:27.116284 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "fcd135795f21" +down_revision = "0a2b51deb0b8" +branch_labels: None = None +depends_on: None = None + + +def upgrade() -> None: + op.add_column( + "slack_bot_config", + sa.Column( + "response_type", + sa.Enum( + "QUOTES", + "CITATIONS", + name="slackbotresponsetype", + native_enum=False, + ), + nullable=True, + ), + ) + op.execute( + "UPDATE slack_bot_config SET response_type = 'QUOTES' WHERE response_type IS NULL" + ) + op.alter_column("slack_bot_config", "response_type", nullable=False) + + +def downgrade() -> None: + op.drop_column("slack_bot_config", "response_type") diff --git a/backend/alembic/versions/febe9eaa0644_add_document_set_persona_relationship_.py b/backend/alembic/versions/febe9eaa0644_add_document_set_persona_relationship_.py index 6486fcd11..77ca1d14c 100644 --- a/backend/alembic/versions/febe9eaa0644_add_document_set_persona_relationship_.py +++ b/backend/alembic/versions/febe9eaa0644_add_document_set_persona_relationship_.py @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "febe9eaa0644" down_revision = "57b53544726e" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/alembic/versions/ffc707a226b4_basic_document_metadata.py b/backend/alembic/versions/ffc707a226b4_basic_document_metadata.py index 5817a4faf..3a2b3b557 100644 --- a/backend/alembic/versions/ffc707a226b4_basic_document_metadata.py +++ b/backend/alembic/versions/ffc707a226b4_basic_document_metadata.py @@ -12,8 +12,8 @@ from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = "ffc707a226b4" down_revision = "30c1d5744104" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/danswer/auth/noauth_user.py b/backend/danswer/auth/noauth_user.py new file mode 100644 index 000000000..4744c4a64 --- /dev/null +++ b/backend/danswer/auth/noauth_user.py @@ -0,0 +1,40 @@ +from collections.abc import Mapping +from typing import Any +from typing import cast + +from danswer.auth.schemas import UserRole +from danswer.dynamic_configs.store import ConfigNotFoundError +from danswer.dynamic_configs.store import DynamicConfigStore +from danswer.server.manage.models import UserInfo +from danswer.server.manage.models import UserPreferences + + +NO_AUTH_USER_PREFERENCES_KEY = "no_auth_user_preferences" + + +def set_no_auth_user_preferences( + store: DynamicConfigStore, preferences: UserPreferences +) -> None: + store.store(NO_AUTH_USER_PREFERENCES_KEY, preferences.dict()) + + +def load_no_auth_user_preferences(store: DynamicConfigStore) -> UserPreferences: + try: + preferences_data = cast( + Mapping[str, Any], store.load(NO_AUTH_USER_PREFERENCES_KEY) + ) + return UserPreferences(**preferences_data) + except ConfigNotFoundError: + return UserPreferences(chosen_assistants=None) + + +def fetch_no_auth_user(store: DynamicConfigStore) -> UserInfo: + return UserInfo( + id="__no_auth_user__", + email="anonymous@danswer.ai", + is_active=True, + is_superuser=False, + is_verified=True, + role=UserRole.ADMIN, + preferences=load_no_auth_user_preferences(store), + ) diff --git a/backend/danswer/auth/users.py b/backend/danswer/auth/users.py index 02234c302..03e770bd5 100644 --- a/backend/danswer/auth/users.py +++ b/backend/danswer/auth/users.py @@ -23,8 +23,8 @@ from fastapi_users.authentication import CookieTransport from fastapi_users.authentication import Strategy from fastapi_users.authentication.strategy.db import AccessTokenDatabase from fastapi_users.authentication.strategy.db import DatabaseStrategy -from fastapi_users.db import SQLAlchemyUserDatabase from fastapi_users.openapi import OpenAPIResponseType +from fastapi_users_db_sqlalchemy import SQLAlchemyUserDatabase from sqlalchemy.orm import Session from danswer.auth.schemas import UserCreate @@ -33,15 +33,18 @@ from danswer.configs.app_configs import AUTH_TYPE from danswer.configs.app_configs import DISABLE_AUTH from danswer.configs.app_configs import EMAIL_FROM from danswer.configs.app_configs import REQUIRE_EMAIL_VERIFICATION -from danswer.configs.app_configs import SECRET from danswer.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS from danswer.configs.app_configs import SMTP_PASS from danswer.configs.app_configs import SMTP_PORT from danswer.configs.app_configs import SMTP_SERVER from danswer.configs.app_configs import SMTP_USER +from danswer.configs.app_configs import USER_AUTH_SECRET from danswer.configs.app_configs import VALID_EMAIL_DOMAINS from danswer.configs.app_configs import WEB_DOMAIN from danswer.configs.constants import AuthType +from danswer.configs.constants import DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN +from danswer.configs.constants import DANSWER_API_KEY_PREFIX +from danswer.configs.constants import UNNAMED_KEY_PLACEHOLDER from danswer.db.auth import get_access_token_db from danswer.db.auth import get_user_count from danswer.db.auth import get_user_db @@ -69,6 +72,20 @@ def verify_auth_setting() -> None: logger.info(f"Using Auth Type: {AUTH_TYPE.value}") +def get_display_email(email: str | None, space_less: bool = False) -> str: + if email and email.endswith(DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN): + name = email.split("@")[0] + if name == DANSWER_API_KEY_PREFIX + UNNAMED_KEY_PLACEHOLDER: + return "Unnamed API Key" + + if space_less: + return name + + return name.replace("API_KEY__", "API Key: ") + + return email or "" + + def user_needs_to_be_verified() -> bool: # all other auth types besides basic should require users to be # verified @@ -133,8 +150,8 @@ def send_user_verification_email( class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]): - reset_password_token_secret = SECRET - verification_token_secret = SECRET + reset_password_token_secret = USER_AUTH_SECRET + verification_token_secret = USER_AUTH_SECRET async def create( self, @@ -213,7 +230,10 @@ async def get_user_manager( yield UserManager(user_db) -cookie_transport = CookieTransport(cookie_max_age=SESSION_EXPIRE_TIME_SECONDS) +cookie_transport = CookieTransport( + cookie_max_age=SESSION_EXPIRE_TIME_SECONDS, + cookie_secure=WEB_DOMAIN.startswith("https"), +) def get_database_strategy( @@ -276,13 +296,32 @@ fastapi_users = FastAPIUserWithLogoutRouter[User, uuid.UUID]( # take care of that in `double_check_user` ourself. This is needed, since # we want the /me endpoint to still return a user even if they are not # yet verified, so that the frontend knows they exist -optional_valid_user = fastapi_users.current_user(active=True, optional=True) +optional_fastapi_current_user = fastapi_users.current_user(active=True, optional=True) -async def double_check_user( +async def optional_user_( request: Request, user: User | None, db_session: Session, +) -> User | None: + """NOTE: `request` and `db_session` are not used here, but are included + for the EE version of this function.""" + return user + + +async def optional_user( + request: Request, + user: User | None = Depends(optional_fastapi_current_user), + db_session: Session = Depends(get_session), +) -> User | None: + versioned_fetch_user = fetch_versioned_implementation( + "danswer.auth.users", "optional_user_" + ) + return await versioned_fetch_user(request, user, db_session) + + +async def double_check_user( + user: User | None, optional: bool = DISABLE_AUTH, ) -> User | None: if optional: @@ -304,15 +343,9 @@ async def double_check_user( async def current_user( - request: Request, - user: User | None = Depends(optional_valid_user), - db_session: Session = Depends(get_session), + user: User | None = Depends(optional_user), ) -> User | None: - double_check_user = fetch_versioned_implementation( - "danswer.auth.users", "double_check_user" - ) - user = await double_check_user(request, user, db_session) - return user + return await double_check_user(user) async def current_admin_user(user: User | None = Depends(current_user)) -> User | None: diff --git a/backend/danswer/background/celery/celery.py b/backend/danswer/background/celery/celery.py index 80a8a2a13..91fef292d 100644 --- a/backend/danswer/background/celery/celery.py +++ b/backend/danswer/background/celery/celery.py @@ -1,6 +1,4 @@ -import os from datetime import timedelta -from pathlib import Path from typing import cast from celery import Celery # type: ignore @@ -10,16 +8,14 @@ from danswer.background.connector_deletion import delete_connector_credential_pa from danswer.background.task_utils import build_celery_task_wrapper from danswer.background.task_utils import name_cc_cleanup_task from danswer.background.task_utils import name_document_set_sync_task -from danswer.configs.app_configs import FILE_CONNECTOR_TMP_STORAGE_PATH from danswer.configs.app_configs import JOB_TIMEOUT -from danswer.connectors.file.utils import file_age_in_hours from danswer.db.connector_credential_pair import get_connector_credential_pair from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed from danswer.db.document import prepare_to_modify_documents from danswer.db.document_set import delete_document_set from danswer.db.document_set import fetch_document_sets from danswer.db.document_set import fetch_document_sets_for_documents -from danswer.db.document_set import fetch_documents_for_document_set +from danswer.db.document_set import fetch_documents_for_document_set_paginated from danswer.db.document_set import get_document_set_by_id from danswer.db.document_set import mark_document_set_as_synced from danswer.db.engine import build_connection_string @@ -31,7 +27,6 @@ from danswer.db.tasks import get_latest_task from danswer.document_index.document_index_utils import get_both_index_names from danswer.document_index.factory import get_default_document_index from danswer.document_index.interfaces import UpdateRequest -from danswer.utils.batching import batch_generator from danswer.utils.logger import setup_logger logger = setup_logger() @@ -42,7 +37,7 @@ celery_backend_url = f"db+{connection_string}" celery_app = Celery(__name__, broker=celery_broker_url, backend=celery_backend_url) -_SYNC_BATCH_SIZE = 1000 +_SYNC_BATCH_SIZE = 100 ##### @@ -67,15 +62,18 @@ def cleanup_connector_credential_pair_task( connector_id=connector_id, credential_id=credential_id, ) - if not cc_pair or not check_deletion_attempt_is_allowed( - connector_credential_pair=cc_pair - ): + if not cc_pair: raise ValueError( - "Cannot run deletion attempt - connector_credential_pair is not deletable. " - "This is likely because there is an ongoing / planned indexing attempt OR the " - "connector is not disabled." + f"Cannot run deletion attempt - connector_credential_pair with Connector ID: " + f"{connector_id} and Credential ID: {credential_id} does not exist." ) + deletion_attempt_disallowed_reason = check_deletion_attempt_is_allowed( + connector_credential_pair=cc_pair, db_session=db_session + ) + if deletion_attempt_disallowed_reason: + raise ValueError(deletion_attempt_disallowed_reason) + try: # The bulk of the work is in here, updates Postgres and Vespa curr_ind_name, sec_ind_name = get_both_index_names(db_session) @@ -98,15 +96,13 @@ def sync_document_set_task(document_set_id: int) -> None: """For document sets marked as not up to date, sync the state from postgres into the datastore. Also handles deletions.""" - def _sync_document_batch(document_ids: list[str]) -> None: + def _sync_document_batch(document_ids: list[str], db_session: Session) -> None: logger.debug(f"Syncing document sets for: {document_ids}") - # begin a transaction, release lock at the end - with Session(get_sqlalchemy_engine()) as db_session: - # acquires a lock on the documents so that no other process can modify them - prepare_to_modify_documents( - db_session=db_session, document_ids=document_ids - ) + # Acquires a lock on the documents so that no other process can modify them + with prepare_to_modify_documents( + db_session=db_session, document_ids=document_ids + ): # get current state of document sets for these documents document_set_map = { document_id: document_sets @@ -131,17 +127,21 @@ def sync_document_set_task(document_set_id: int) -> None: with Session(get_sqlalchemy_engine()) as db_session: try: - documents_to_update = fetch_documents_for_document_set( - document_set_id=document_set_id, - db_session=db_session, - current_only=False, - ) - for document_batch in batch_generator( - documents_to_update, _SYNC_BATCH_SIZE - ): - _sync_document_batch( - document_ids=[document.id for document in document_batch] + cursor = None + while True: + document_batch, cursor = fetch_documents_for_document_set_paginated( + document_set_id=document_set_id, + db_session=db_session, + current_only=False, + last_document_id=cursor, + limit=_SYNC_BATCH_SIZE, ) + _sync_document_batch( + document_ids=[document.id for document in document_batch], + db_session=db_session, + ) + if cursor is None: + break # if there are no connectors, then delete the document set. Otherwise, just # mark it as successfully synced. @@ -182,7 +182,7 @@ def check_for_document_sets_sync_task() -> None: with Session(get_sqlalchemy_engine()) as db_session: # check if any document sets are not synced document_set_info = fetch_document_sets( - db_session=db_session, include_outdated=True + user_id=None, db_session=db_session, include_outdated=True ) for document_set, _ in document_set_info: if not document_set.is_up_to_date: @@ -203,21 +203,6 @@ def check_for_document_sets_sync_task() -> None: ) -@celery_app.task(name="clean_old_temp_files_task", soft_time_limit=JOB_TIMEOUT) -def clean_old_temp_files_task( - age_threshold_in_hours: float | int = 24 * 7, # 1 week, - base_path: Path | str = FILE_CONNECTOR_TMP_STORAGE_PATH, -) -> None: - """Files added via the File connector need to be deleted after ingestion - Currently handled async of the indexing job""" - os.makedirs(base_path, exist_ok=True) - for file in os.listdir(base_path): - full_file_path = Path(base_path) / file - if file_age_in_hours(full_file_path) > age_threshold_in_hours: - logger.info(f"Cleaning up uploaded file: {full_file_path}") - os.remove(full_file_path) - - ##### # Celery Beat (Periodic Tasks) Settings ##### @@ -226,8 +211,4 @@ celery_app.conf.beat_schedule = { "task": "check_for_document_sets_sync_task", "schedule": timedelta(seconds=5), }, - "clean-old-temp-files": { - "task": "clean_old_temp_files_task", - "schedule": timedelta(minutes=30), - }, } diff --git a/backend/danswer/background/connector_deletion.py b/backend/danswer/background/connector_deletion.py index 845850144..d9701e577 100644 --- a/backend/danswer/background/connector_deletion.py +++ b/backend/danswer/background/connector_deletion.py @@ -19,8 +19,8 @@ from danswer.db.connector import fetch_connector_by_id from danswer.db.connector_credential_pair import ( delete_connector_credential_pair__no_commit, ) -from danswer.db.document import delete_document_by_connector_credential_pair -from danswer.db.document import delete_documents_complete +from danswer.db.document import delete_document_by_connector_credential_pair__no_commit +from danswer.db.document import delete_documents_complete__no_commit from danswer.db.document import get_document_connector_cnts from danswer.db.document import get_documents_for_connector_credential_pair from danswer.db.document import prepare_to_modify_documents @@ -47,60 +47,65 @@ def _delete_connector_credential_pair_batch( credential_id: int, document_index: DocumentIndex, ) -> None: + """ + Removes a batch of documents ids from a cc-pair. If no other cc-pair uses a document anymore + it gets permanently deleted. + """ with Session(get_sqlalchemy_engine()) as db_session: # acquire lock for all documents in this batch so that indexing can't # override the deletion - prepare_to_modify_documents(db_session=db_session, document_ids=document_ids) - - document_connector_cnts = get_document_connector_cnts( + with prepare_to_modify_documents( db_session=db_session, document_ids=document_ids - ) - - # figure out which docs need to be completely deleted - document_ids_to_delete = [ - document_id for document_id, cnt in document_connector_cnts if cnt == 1 - ] - logger.debug(f"Deleting documents: {document_ids_to_delete}") - - document_index.delete(doc_ids=document_ids_to_delete) - - delete_documents_complete( - db_session=db_session, - document_ids=document_ids_to_delete, - ) - - # figure out which docs need to be updated - document_ids_to_update = [ - document_id for document_id, cnt in document_connector_cnts if cnt > 1 - ] - access_for_documents = get_access_for_documents( - document_ids=document_ids_to_update, - db_session=db_session, - cc_pair_to_delete=ConnectorCredentialPairIdentifier( - connector_id=connector_id, - credential_id=credential_id, - ), - ) - update_requests = [ - UpdateRequest( - document_ids=[document_id], - access=access, + ): + document_connector_cnts = get_document_connector_cnts( + db_session=db_session, document_ids=document_ids ) - for document_id, access in access_for_documents.items() - ] - logger.debug(f"Updating documents: {document_ids_to_update}") - document_index.update(update_requests=update_requests) + # figure out which docs need to be completely deleted + document_ids_to_delete = [ + document_id for document_id, cnt in document_connector_cnts if cnt == 1 + ] + logger.debug(f"Deleting documents: {document_ids_to_delete}") - delete_document_by_connector_credential_pair( - db_session=db_session, - document_ids=document_ids_to_update, - connector_credential_pair_identifier=ConnectorCredentialPairIdentifier( - connector_id=connector_id, - credential_id=credential_id, - ), - ) - db_session.commit() + document_index.delete(doc_ids=document_ids_to_delete) + + delete_documents_complete__no_commit( + db_session=db_session, + document_ids=document_ids_to_delete, + ) + + # figure out which docs need to be updated + document_ids_to_update = [ + document_id for document_id, cnt in document_connector_cnts if cnt > 1 + ] + access_for_documents = get_access_for_documents( + document_ids=document_ids_to_update, + db_session=db_session, + cc_pair_to_delete=ConnectorCredentialPairIdentifier( + connector_id=connector_id, + credential_id=credential_id, + ), + ) + update_requests = [ + UpdateRequest( + document_ids=[document_id], + access=access, + ) + for document_id, access in access_for_documents.items() + ] + logger.debug(f"Updating documents: {document_ids_to_update}") + + document_index.update(update_requests=update_requests) + + delete_document_by_connector_credential_pair__no_commit( + db_session=db_session, + document_ids=document_ids_to_update, + connector_credential_pair_identifier=ConnectorCredentialPairIdentifier( + connector_id=connector_id, + credential_id=credential_id, + ), + ) + db_session.commit() def cleanup_synced_entities( diff --git a/backend/danswer/background/indexing/job_client.py b/backend/danswer/background/indexing/job_client.py index 8e22f8e45..e9ddad58e 100644 --- a/backend/danswer/background/indexing/job_client.py +++ b/backend/danswer/background/indexing/job_client.py @@ -6,16 +6,16 @@ NOTE: cannot use Celery directly due to https://github.com/celery/celery/issues/7007#issuecomment-1740139367""" from collections.abc import Callable from dataclasses import dataclass +from multiprocessing import Process from typing import Any from typing import Literal +from typing import Optional -from torch import multiprocessing - +from danswer.db.engine import get_sqlalchemy_engine from danswer.utils.logger import setup_logger logger = setup_logger() - JobStatusType = ( Literal["error"] | Literal["finished"] @@ -25,12 +25,28 @@ JobStatusType = ( ) +def _initializer( + func: Callable, args: list | tuple, kwargs: dict[str, Any] | None = None +) -> Any: + """Ensure the parent proc's database connections are not touched + in the new connection pool + + Based on the recommended approach in the SQLAlchemy docs found: + https://docs.sqlalchemy.org/en/20/core/pooling.html#using-connection-pools-with-multiprocessing-or-os-fork + """ + if kwargs is None: + kwargs = {} + + get_sqlalchemy_engine().dispose(close=False) + return func(*args, **kwargs) + + @dataclass class SimpleJob: """Drop in replacement for `dask.distributed.Future`""" id: int - process: multiprocessing.Process | None = None + process: Optional["Process"] = None def cancel(self) -> bool: return self.release() @@ -95,7 +111,7 @@ class SimpleJobClient: job_id = self.job_id_counter self.job_id_counter += 1 - process = multiprocessing.Process(target=func, args=args, daemon=True) + process = Process(target=_initializer(func=func, args=args), daemon=True) job = SimpleJob(id=job_id, process=process) process.start() diff --git a/backend/danswer/background/indexing/run_indexing.py b/backend/danswer/background/indexing/run_indexing.py index 79c5f7903..18b30113c 100644 --- a/backend/danswer/background/indexing/run_indexing.py +++ b/backend/danswer/background/indexing/run_indexing.py @@ -4,10 +4,13 @@ from datetime import datetime from datetime import timedelta from datetime import timezone -import torch from sqlalchemy.orm import Session +from danswer.background.connector_deletion import ( + _delete_connector_credential_pair_batch, +) from danswer.background.indexing.checkpointing import get_time_windows_for_index_attempt +from danswer.configs.app_configs import DISABLE_DOCUMENT_CLEANUP from danswer.configs.app_configs import POLL_CONNECTOR_OFFSET from danswer.connectors.factory import instantiate_connector from danswer.connectors.interfaces import GenerateDocumentsOutput @@ -19,10 +22,11 @@ from danswer.db.connector import disable_connector from danswer.db.connector_credential_pair import get_last_successful_attempt_time from danswer.db.connector_credential_pair import update_connector_credential_pair from danswer.db.credentials import backend_update_credential_json +from danswer.db.document import get_documents_for_connector_credential_pair from danswer.db.engine import get_sqlalchemy_engine from danswer.db.index_attempt import get_index_attempt from danswer.db.index_attempt import mark_attempt_failed -from danswer.db.index_attempt import mark_attempt_in_progress +from danswer.db.index_attempt import mark_attempt_in_progress__no_commit from danswer.db.index_attempt import mark_attempt_succeeded from danswer.db.index_attempt import update_docs_indexed from danswer.db.models import IndexAttempt @@ -42,8 +46,14 @@ def _get_document_generator( attempt: IndexAttempt, start_time: datetime, end_time: datetime, -) -> GenerateDocumentsOutput: - """NOTE: `start_time` and `end_time` are only used for poll connectors""" +) -> tuple[GenerateDocumentsOutput, bool]: + """ + NOTE: `start_time` and `end_time` are only used for poll connectors + + Returns an interator of document batches and whether the returned documents + are the complete list of existing documents of the connector. If the task + of type LOAD_STATE, the list will be considered complete and otherwise incomplete. + """ task = attempt.connector.input_type try: @@ -65,7 +75,7 @@ def _get_document_generator( if task == InputType.LOAD_STATE: assert isinstance(runnable_connector, LoadConnector) doc_batch_generator = runnable_connector.load_from_state() - + is_listing_complete = True elif task == InputType.POLL: assert isinstance(runnable_connector, PollConnector) if attempt.connector_id is None or attempt.credential_id is None: @@ -78,12 +88,13 @@ def _get_document_generator( doc_batch_generator = runnable_connector.poll_source( start=start_time.timestamp(), end=end_time.timestamp() ) + is_listing_complete = False else: # Event types cannot be handled by a background type raise RuntimeError(f"Invalid task type: {task}") - return doc_batch_generator + return doc_batch_generator, is_listing_complete def _run_indexing( @@ -104,16 +115,6 @@ def _run_indexing( # Secondary index syncs at the end when swapping is_primary = index_attempt.embedding_model.status == IndexModelStatus.PRESENT - # Mark as started - mark_attempt_in_progress(index_attempt, db_session) - if is_primary: - update_connector_credential_pair( - db_session=db_session, - connector_id=index_attempt.connector.id, - credential_id=index_attempt.credential.id, - attempt_status=IndexingStatus.IN_PROGRESS, - ) - # Indexing is only done into one index at a time document_index = get_default_document_index( primary_index_name=index_name, secondary_index_name=None @@ -131,6 +132,7 @@ def _run_indexing( document_index=document_index, ignore_time_skip=index_attempt.from_beginning or (db_embedding_model.status == IndexModelStatus.FUTURE), + db_session=db_session, ) db_connector = index_attempt.connector @@ -158,19 +160,20 @@ def _run_indexing( source_type=db_connector.source, ) ): - window_start = max( - window_start - timedelta(minutes=POLL_CONNECTOR_OFFSET), - datetime(1970, 1, 1, tzinfo=timezone.utc), - ) - - doc_batch_generator = _get_document_generator( - db_session=db_session, - attempt=index_attempt, - start_time=window_start, - end_time=window_end, - ) - try: + window_start = max( + window_start - timedelta(minutes=POLL_CONNECTOR_OFFSET), + datetime(1970, 1, 1, tzinfo=timezone.utc), + ) + + doc_batch_generator, is_listing_complete = _get_document_generator( + db_session=db_session, + attempt=index_attempt, + start_time=window_start, + end_time=window_end, + ) + + all_connector_doc_ids: set[str] = set() for doc_batch in doc_batch_generator: # Check if connector is disabled mid run and stop if so unless it's the secondary # index being built. We want to populate it even for paused connectors @@ -186,6 +189,7 @@ def _run_indexing( db_session.refresh(index_attempt) if index_attempt.status != IndexingStatus.IN_PROGRESS: + # Likely due to user manually disabling it or model swap raise RuntimeError("Index Attempt was canceled") logger.debug( @@ -202,6 +206,7 @@ def _run_indexing( net_doc_change += new_docs chunk_count += total_batch_chunks document_count += len(doc_batch) + all_connector_doc_ids.update(doc.id for doc in doc_batch) # commit transaction so that the `update` below begins # with a brand new transaction. Postgres uses the start @@ -216,6 +221,40 @@ def _run_indexing( index_attempt=index_attempt, total_docs_indexed=document_count, new_docs_indexed=net_doc_change, + docs_removed_from_index=0, + ) + + if is_listing_complete and not DISABLE_DOCUMENT_CLEANUP: + # clean up all documents from the index that have not been returned from the connector + all_indexed_document_ids = { + d.id + for d in get_documents_for_connector_credential_pair( + db_session=db_session, + connector_id=db_connector.id, + credential_id=db_credential.id, + ) + } + doc_ids_to_remove = list( + all_indexed_document_ids - all_connector_doc_ids + ) + logger.debug( + f"Cleaning up {len(doc_ids_to_remove)} documents that are not contained in the newest connector state" + ) + + # delete docs from cc-pair and receive the number of completely deleted docs in return + _delete_connector_credential_pair_batch( + document_ids=doc_ids_to_remove, + connector_id=db_connector.id, + credential_id=db_credential.id, + document_index=document_index, + ) + + update_docs_indexed( + db_session=db_session, + index_attempt=index_attempt, + total_docs_indexed=document_count, + new_docs_indexed=net_doc_change, + docs_removed_from_index=len(doc_ids_to_remove), ) run_end_dt = window_end @@ -224,7 +263,6 @@ def _run_indexing( db_session=db_session, connector_id=db_connector.id, credential_id=db_credential.id, - attempt_status=IndexingStatus.IN_PROGRESS, net_docs=net_doc_change, run_dt=run_end_dt, ) @@ -255,7 +293,6 @@ def _run_indexing( db_session=db_session, connector_id=index_attempt.connector.id, credential_id=index_attempt.credential.id, - attempt_status=IndexingStatus.FAILED, net_docs=net_doc_change, ) raise e @@ -270,7 +307,6 @@ def _run_indexing( db_session=db_session, connector_id=db_connector.id, credential_id=db_credential.id, - attempt_status=IndexingStatus.SUCCESS, run_dt=run_end_dt, ) @@ -282,7 +318,35 @@ def _run_indexing( ) -def run_indexing_entrypoint(index_attempt_id: int, num_threads: int) -> None: +def _prepare_index_attempt(db_session: Session, index_attempt_id: int) -> IndexAttempt: + # make sure that the index attempt can't change in between checking the + # status and marking it as in_progress. This setting will be discarded + # after the next commit: + # https://docs.sqlalchemy.org/en/20/orm/session_transaction.html#setting-isolation-for-individual-transactions + db_session.connection(execution_options={"isolation_level": "SERIALIZABLE"}) # type: ignore + + attempt = get_index_attempt( + db_session=db_session, + index_attempt_id=index_attempt_id, + ) + if attempt is None: + raise RuntimeError(f"Unable to find IndexAttempt for ID '{index_attempt_id}'") + + if attempt.status != IndexingStatus.NOT_STARTED: + raise RuntimeError( + f"Indexing attempt with ID '{index_attempt_id}' is not in NOT_STARTED status. " + f"Current status is '{attempt.status}'." + ) + + # only commit once, to make sure this all happens in a single transaction + mark_attempt_in_progress__no_commit(attempt) + if attempt.embedding_model.status != IndexModelStatus.PRESENT: + db_session.commit() + + return attempt + + +def run_indexing_entrypoint(index_attempt_id: int) -> None: """Entrypoint for indexing run when using dask distributed. Wraps the actual logic in a `try` block so that we can catch any exceptions and mark the attempt as failed.""" @@ -291,17 +355,10 @@ def run_indexing_entrypoint(index_attempt_id: int, num_threads: int) -> None: # will have it added as a prefix IndexAttemptSingleton.set_index_attempt_id(index_attempt_id) - logger.info(f"Setting task to use {num_threads} threads") - torch.set_num_threads(num_threads) - with Session(get_sqlalchemy_engine()) as db_session: - attempt = get_index_attempt( - db_session=db_session, index_attempt_id=index_attempt_id - ) - if attempt is None: - raise RuntimeError( - f"Unable to find IndexAttempt for ID '{index_attempt_id}'" - ) + # make sure that it is valid to run this indexing attempt + mark it + # as in progress + attempt = _prepare_index_attempt(db_session, index_attempt_id) logger.info( f"Running indexing attempt for connector: '{attempt.connector.name}', " @@ -309,10 +366,7 @@ def run_indexing_entrypoint(index_attempt_id: int, num_threads: int) -> None: f"with credentials: '{attempt.credential_id}'" ) - _run_indexing( - db_session=db_session, - index_attempt=attempt, - ) + _run_indexing(db_session, attempt) logger.info( f"Completed indexing attempt for connector: '{attempt.connector.name}', " diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index 851ada5d0..8b115e448 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -3,7 +3,6 @@ import time from datetime import datetime import dask -import torch from dask.distributed import Client from dask.distributed import Future from distributed import LocalCluster @@ -15,21 +14,13 @@ from danswer.background.indexing.job_client import SimpleJobClient from danswer.background.indexing.run_indexing import run_indexing_entrypoint from danswer.configs.app_configs import CLEANUP_INDEXING_JOBS_TIMEOUT from danswer.configs.app_configs import DASK_JOB_CLIENT_ENABLED -from danswer.configs.app_configs import LOG_LEVEL +from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP from danswer.configs.app_configs import NUM_INDEXING_WORKERS -from danswer.configs.model_configs import MIN_THREADS_ML_MODELS from danswer.db.connector import fetch_connectors -from danswer.db.connector_credential_pair import get_connector_credential_pairs -from danswer.db.connector_credential_pair import mark_all_in_progress_cc_pairs_failed -from danswer.db.connector_credential_pair import resync_cc_pair -from danswer.db.connector_credential_pair import update_connector_credential_pair from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.embedding_model import get_secondary_db_embedding_model -from danswer.db.embedding_model import update_embedding_model_status from danswer.db.engine import get_db_current_time from danswer.db.engine import get_sqlalchemy_engine -from danswer.db.index_attempt import cancel_indexing_attempts_past_model -from danswer.db.index_attempt import count_unique_cc_pairs_with_index_attempts from danswer.db.index_attempt import create_index_attempt from danswer.db.index_attempt import get_index_attempt from danswer.db.index_attempt import get_inprogress_index_attempts @@ -41,7 +32,12 @@ from danswer.db.models import EmbeddingModel from danswer.db.models import IndexAttempt from danswer.db.models import IndexingStatus from danswer.db.models import IndexModelStatus +from danswer.db.swap_index import check_index_swap +from danswer.search.search_nlp_models import warm_up_encoders from danswer.utils.logger import setup_logger +from shared_configs.configs import INDEXING_MODEL_SERVER_HOST +from shared_configs.configs import LOG_LEVEL +from shared_configs.configs import MODEL_SERVER_PORT logger = setup_logger() @@ -54,22 +50,19 @@ _UNEXPECTED_STATE_FAILURE_REASON = ( ) -"""Util funcs""" - - -def _get_num_threads() -> int: - """Get # of "threads" to use for ML models in an indexing job. By default uses - the torch implementation, which returns the # of physical cores on the machine. - """ - return max(MIN_THREADS_ML_MODELS, torch.get_num_threads()) - - def _should_create_new_indexing( connector: Connector, last_index: IndexAttempt | None, model: EmbeddingModel, + secondary_index_building: bool, db_session: Session, ) -> bool: + # User can still manually create single indexing attempts via the UI for the + # currently in use index + if DISABLE_INDEX_UPDATE_ON_SWAP: + if model.status == IndexModelStatus.PRESENT and secondary_index_building: + return False + # When switching over models, always index at least once if model.status == IndexModelStatus.FUTURE and not last_index: if connector.id == 0: # Ingestion API @@ -124,17 +117,6 @@ def _mark_run_failed( db_session=db_session, failure_reason=failure_reason, ) - if ( - index_attempt.connector_id is not None - and index_attempt.credential_id is not None - and index_attempt.embedding_model.status == IndexModelStatus.PRESENT - ): - update_connector_credential_pair( - db_session=db_session, - connector_id=index_attempt.connector_id, - credential_id=index_attempt.credential_id, - attempt_status=IndexingStatus.FAILED, - ) """Main funcs""" @@ -185,7 +167,11 @@ def create_indexing_jobs(existing_jobs: dict[int, Future | SimpleJob]) -> None: connector.id, credential.id, model.id, db_session ) if not _should_create_new_indexing( - connector, last_attempt, model, db_session + connector=connector, + last_index=last_attempt, + model=model, + secondary_index_building=len(embedding_models) > 1, + db_session=db_session, ): continue @@ -193,16 +179,6 @@ def create_indexing_jobs(existing_jobs: dict[int, Future | SimpleJob]) -> None: connector.id, credential.id, model.id, db_session ) - # CC-Pair will have the status that it should for the primary index - # Will be re-sync-ed once the indices are swapped - if model.status == IndexModelStatus.PRESENT: - update_connector_credential_pair( - db_session=db_session, - connector_id=connector.id, - credential_id=credential.id, - attempt_status=IndexingStatus.NOT_STARTED, - ) - def cleanup_indexing_jobs( existing_jobs: dict[int, Future | SimpleJob], @@ -254,6 +230,9 @@ def cleanup_indexing_jobs( ) for index_attempt in in_progress_indexing_attempts: if index_attempt.id in existing_jobs: + # If index attempt is canceled, stop the run + if index_attempt.status == IndexingStatus.FAILED: + existing_jobs[index_attempt.id].cancel() # check to see if the job has been updated in last `timeout_hours` hours, if not # assume it to frozen in some bad state and just mark it as failed. Note: this relies # on the fact that the `time_updated` field is constantly updated every @@ -328,12 +307,10 @@ def kickoff_indexing_jobs( if use_secondary_index: run = secondary_client.submit( - run_indexing_entrypoint, attempt.id, _get_num_threads(), pure=False + run_indexing_entrypoint, attempt.id, pure=False ) else: - run = client.submit( - run_indexing_entrypoint, attempt.id, _get_num_threads(), pure=False - ) + run = client.submit(run_indexing_entrypoint, attempt.id, pure=False) if run: secondary_str = "(secondary index) " if use_secondary_index else "" @@ -348,49 +325,22 @@ def kickoff_indexing_jobs( return existing_jobs_copy -def check_index_swap(db_session: Session) -> None: - """Get count of cc-pairs and count of index_attempts for the new model grouped by - connector + credential, if it's the same, then assume new index is done building. - This does not take into consideration if the attempt failed or not""" - # Default CC-pair created for Ingestion API unused here - all_cc_pairs = get_connector_credential_pairs(db_session) - cc_pair_count = len(all_cc_pairs) - 1 - embedding_model = get_secondary_db_embedding_model(db_session) +def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> None: + engine = get_sqlalchemy_engine() + with Session(engine) as db_session: + check_index_swap(db_session=db_session) + db_embedding_model = get_current_db_embedding_model(db_session) - if not embedding_model: - return - - unique_cc_indexings = count_unique_cc_pairs_with_index_attempts( - embedding_model_id=embedding_model.id, db_session=db_session + # So that the first time users aren't surprised by really slow speed of first + # batch of documents indexed + logger.info("Running a first inference to warm up embedding model") + warm_up_encoders( + model_name=db_embedding_model.model_name, + normalize=db_embedding_model.normalize, + model_server_host=INDEXING_MODEL_SERVER_HOST, + model_server_port=MODEL_SERVER_PORT, ) - if unique_cc_indexings > cc_pair_count: - raise RuntimeError("More unique indexings than cc pairs, should not occur") - - if cc_pair_count == unique_cc_indexings: - # Swap indices - now_old_embedding_model = get_current_db_embedding_model(db_session) - update_embedding_model_status( - embedding_model=now_old_embedding_model, - new_status=IndexModelStatus.PAST, - db_session=db_session, - ) - - update_embedding_model_status( - embedding_model=embedding_model, - new_status=IndexModelStatus.PRESENT, - db_session=db_session, - ) - - # Expire jobs for the now past index/embedding model - cancel_indexing_attempts_past_model(db_session) - - # Recount aggregates - for cc_pair in all_cc_pairs: - resync_cc_pair(cc_pair, db_session=db_session) - - -def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> None: client_primary: Client | SimpleJobClient client_secondary: Client | SimpleJobClient if DASK_JOB_CLIENT_ENABLED: @@ -417,12 +367,6 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non client_secondary = SimpleJobClient(n_workers=num_workers) existing_jobs: dict[int, Future | SimpleJob] = {} - engine = get_sqlalchemy_engine() - - with Session(engine) as db_session: - # Previous version did not always clean up cc-pairs well leaving some connectors undeleteable - # This ensures that bad states get cleaned up - mark_all_in_progress_cc_pairs_failed(db_session) while True: start = time.time() @@ -454,12 +398,6 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non def update__main() -> None: - # needed for CUDA to work with multiprocessing - # NOTE: needs to be done on application startup - # before any other torch code has been run - if not DASK_JOB_CLIENT_ENABLED: - torch.multiprocessing.set_start_method("spawn") - logger.info("Starting Indexing Loop") update_loop() diff --git a/backend/danswer/chat/chat_utils.py b/backend/danswer/chat/chat_utils.py index ed45c9562..f4b0b2e02 100644 --- a/backend/danswer/chat/chat_utils.py +++ b/backend/danswer/chat/chat_utils.py @@ -1,168 +1,39 @@ import re -from collections.abc import Callable -from collections.abc import Iterator -from datetime import datetime -from functools import lru_cache +from collections.abc import Sequence from typing import cast -from langchain.schema.messages import BaseMessage -from langchain.schema.messages import HumanMessage -from langchain.schema.messages import SystemMessage from sqlalchemy.orm import Session from danswer.chat.models import CitationInfo -from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import LlmDoc -from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION -from danswer.configs.chat_configs import STOP_STREAM_PAT -from danswer.configs.constants import DocumentSource -from danswer.configs.constants import IGNORE_FOR_QA -from danswer.configs.model_configs import GEN_AI_MODEL_VERSION -from danswer.configs.model_configs import GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS from danswer.db.chat import get_chat_messages_by_session from danswer.db.models import ChatMessage -from danswer.db.models import Persona -from danswer.db.models import Prompt -from danswer.indexing.models import InferenceChunk -from danswer.llm.utils import check_number_of_tokens -from danswer.llm.utils import get_max_input_tokens -from danswer.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT -from danswer.prompts.chat_prompts import CHAT_USER_PROMPT -from danswer.prompts.chat_prompts import CITATION_REMINDER -from danswer.prompts.chat_prompts import DEFAULT_IGNORE_STATEMENT -from danswer.prompts.chat_prompts import NO_CITATION_STATEMENT -from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT -from danswer.prompts.constants import CODE_BLOCK_PAT -from danswer.prompts.constants import TRIPLE_BACKTICK -from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT -from danswer.prompts.prompt_utils import get_current_llm_day_time -from danswer.prompts.token_counts import ( - CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT, -) -from danswer.prompts.token_counts import CITATION_REMINDER_TOKEN_CNT -from danswer.prompts.token_counts import CITATION_STATEMENT_TOKEN_CNT -from danswer.prompts.token_counts import LANGUAGE_HINT_TOKEN_CNT +from danswer.llm.answering.models import PreviousMessage +from danswer.search.models import InferenceChunk +from danswer.search.models import InferenceSection +from danswer.utils.logger import setup_logger -# Maps connector enum string to a more natural language representation for the LLM -# If not on the list, uses the original but slightly cleaned up, see below -CONNECTOR_NAME_MAP = { - "web": "Website", - "requesttracker": "Request Tracker", - "github": "GitHub", - "file": "File Upload", -} +logger = setup_logger() -def clean_up_source(source_str: str) -> str: - if source_str in CONNECTOR_NAME_MAP: - return CONNECTOR_NAME_MAP[source_str] - return source_str.replace("_", " ").title() - - -def build_doc_context_str( - semantic_identifier: str, - source_type: DocumentSource, - content: str, - metadata_dict: dict[str, str | list[str]], - updated_at: datetime | None, - ind: int, - include_metadata: bool = True, -) -> str: - context_str = "" - if include_metadata: - context_str += f"DOCUMENT {ind}: {semantic_identifier}\n" - context_str += f"Source: {clean_up_source(source_type)}\n" - - for k, v in metadata_dict.items(): - if isinstance(v, list): - v_str = ", ".join(v) - context_str += f"{k.capitalize()}: {v_str}\n" - else: - context_str += f"{k.capitalize()}: {v}\n" - - if updated_at: - update_str = updated_at.strftime("%B %d, %Y %H:%M") - context_str += f"Updated: {update_str}\n" - context_str += f"{CODE_BLOCK_PAT.format(content.strip())}\n\n\n" - return context_str - - -def build_complete_context_str( - context_docs: list[LlmDoc | InferenceChunk], - include_metadata: bool = True, -) -> str: - context_str = "" - for ind, doc in enumerate(context_docs, start=1): - context_str += build_doc_context_str( - semantic_identifier=doc.semantic_identifier, - source_type=doc.source_type, - content=doc.content, - metadata_dict=doc.metadata, - updated_at=doc.updated_at, - ind=ind, - include_metadata=include_metadata, - ) - - return context_str.strip() - - -@lru_cache() -def build_chat_system_message( - prompt: Prompt, - context_exists: bool, - llm_tokenizer_encode_func: Callable, - citation_line: str = REQUIRE_CITATION_STATEMENT, - no_citation_line: str = NO_CITATION_STATEMENT, -) -> tuple[SystemMessage | None, int]: - system_prompt = prompt.system_prompt.strip() - if prompt.include_citations: - if context_exists: - system_prompt += citation_line - else: - system_prompt += no_citation_line - if prompt.datetime_aware: - if system_prompt: - system_prompt += ( - f"\n\nAdditional Information:\n\t- {get_current_llm_day_time()}." - ) - else: - system_prompt = get_current_llm_day_time() - - if not system_prompt: - return None, 0 - - token_count = len(llm_tokenizer_encode_func(system_prompt)) - system_msg = SystemMessage(content=system_prompt) - - return system_msg, token_count - - -def build_task_prompt_reminders( - prompt: Prompt, - use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), - citation_str: str = CITATION_REMINDER, - language_hint_str: str = LANGUAGE_HINT, -) -> str: - base_task = prompt.task_prompt - citation_or_nothing = citation_str if prompt.include_citations else "" - language_hint_or_nothing = language_hint_str.lstrip() if use_language_hint else "" - return base_task + citation_or_nothing + language_hint_or_nothing - - -def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc: +def llm_doc_from_inference_section(inf_chunk: InferenceSection) -> LlmDoc: return LlmDoc( document_id=inf_chunk.document_id, - content=inf_chunk.content, + # This one is using the combined content of all the chunks of the section + # In default settings, this is the same as just the content of base chunk + content=inf_chunk.combined_content, + blurb=inf_chunk.blurb, semantic_identifier=inf_chunk.semantic_identifier, source_type=inf_chunk.source_type, metadata=inf_chunk.metadata, updated_at=inf_chunk.updated_at, link=inf_chunk.source_links[0] if inf_chunk.source_links else None, + source_links=inf_chunk.source_links, ) def map_document_id_order( - chunks: list[InferenceChunk | LlmDoc], one_indexed: bool = True + chunks: Sequence[InferenceChunk | LlmDoc], one_indexed: bool = True ) -> dict[str, int]: order_mapping = {} current = 1 if one_indexed else 0 @@ -174,157 +45,6 @@ def map_document_id_order( return order_mapping -def build_chat_user_message( - chat_message: ChatMessage, - prompt: Prompt, - context_docs: list[LlmDoc], - llm_tokenizer_encode_func: Callable, - all_doc_useful: bool, - user_prompt_template: str = CHAT_USER_PROMPT, - context_free_template: str = CHAT_USER_CONTEXT_FREE_PROMPT, - ignore_str: str = DEFAULT_IGNORE_STATEMENT, -) -> tuple[HumanMessage, int]: - user_query = chat_message.message - - if not context_docs: - # Simpler prompt for cases where there is no context - user_prompt = ( - context_free_template.format( - task_prompt=prompt.task_prompt, user_query=user_query - ) - if prompt.task_prompt - else user_query - ) - user_prompt = user_prompt.strip() - token_count = len(llm_tokenizer_encode_func(user_prompt)) - user_msg = HumanMessage(content=user_prompt) - return user_msg, token_count - - context_docs_str = build_complete_context_str( - cast(list[LlmDoc | InferenceChunk], context_docs) - ) - optional_ignore = "" if all_doc_useful else ignore_str - - task_prompt_with_reminder = build_task_prompt_reminders(prompt) - - user_prompt = user_prompt_template.format( - optional_ignore_statement=optional_ignore, - context_docs_str=context_docs_str, - task_prompt=task_prompt_with_reminder, - user_query=user_query, - ) - - user_prompt = user_prompt.strip() - token_count = len(llm_tokenizer_encode_func(user_prompt)) - user_msg = HumanMessage(content=user_prompt) - - return user_msg, token_count - - -def _get_usable_chunks( - chunks: list[InferenceChunk], token_limit: int -) -> list[InferenceChunk]: - total_token_count = 0 - usable_chunks = [] - for chunk in chunks: - chunk_token_count = check_number_of_tokens(chunk.content) - if total_token_count + chunk_token_count > token_limit: - break - - total_token_count += chunk_token_count - usable_chunks.append(chunk) - - # try and return at least one chunk if possible. This chunk will - # get truncated later on in the pipeline. This would only occur if - # the first chunk is larger than the token limit (usually due to character - # count -> token count mismatches caused by special characters / non-ascii - # languages) - if not usable_chunks and chunks: - usable_chunks = [chunks[0]] - - return usable_chunks - - -def get_usable_chunks( - chunks: list[InferenceChunk], - token_limit: int, - offset: int = 0, -) -> list[InferenceChunk]: - offset_into_chunks = 0 - usable_chunks: list[InferenceChunk] = [] - for _ in range(min(offset + 1, 1)): # go through this process at least once - if offset_into_chunks >= len(chunks) and offset_into_chunks > 0: - raise ValueError( - "Chunks offset too large, should not retry this many times" - ) - - usable_chunks = _get_usable_chunks( - chunks=chunks[offset_into_chunks:], token_limit=token_limit - ) - offset_into_chunks += len(usable_chunks) - - return usable_chunks - - -def get_chunks_for_qa( - chunks: list[InferenceChunk], - llm_chunk_selection: list[bool], - token_limit: int | None, - batch_offset: int = 0, -) -> list[int]: - """ - Gives back indices of chunks to pass into the LLM for Q&A. - - Only selects chunks viable for Q&A, within the token limit, and prioritize those selected - by the LLM in a separate flow (this can be turned off) - - Note, the batch_offset calculation has to count the batches from the beginning each time as - there's no way to know which chunks were included in the prior batches without recounting atm, - this is somewhat slow as it requires tokenizing all the chunks again - """ - batch_index = 0 - latest_batch_indices: list[int] = [] - token_count = 0 - - # First iterate the LLM selected chunks, then iterate the rest if tokens remaining - for selection_target in [True, False]: - for ind, chunk in enumerate(chunks): - if llm_chunk_selection[ind] is not selection_target or chunk.metadata.get( - IGNORE_FOR_QA - ): - continue - - # We calculate it live in case the user uses a different LLM + tokenizer - chunk_token = check_number_of_tokens(chunk.content) - # 50 for an approximate/slight overestimate for # tokens for metadata for the chunk - token_count += chunk_token + 50 - - # Always use at least 1 chunk - if ( - token_limit is None - or token_count <= token_limit - or not latest_batch_indices - ): - latest_batch_indices.append(ind) - current_chunk_unused = False - else: - current_chunk_unused = True - - if token_limit is not None and token_count >= token_limit: - if batch_index < batch_offset: - batch_index += 1 - if current_chunk_unused: - latest_batch_indices = [ind] - token_count = chunk_token - else: - latest_batch_indices = [] - token_count = 0 - else: - return latest_batch_indices - - return latest_batch_indices - - def create_chat_chain( chat_session_id: int, db_session: Session, @@ -340,7 +60,7 @@ def create_chat_chain( id_to_msg = {msg.id: msg for msg in all_chat_messages} if not all_chat_messages: - raise ValueError("No messages in Chat Session") + raise RuntimeError("No messages in Chat Session") root_message = all_chat_messages[0] if root_message.parent_message is not None: @@ -370,7 +90,7 @@ def create_chat_chain( def combine_message_chain( - messages: list[ChatMessage], + messages: list[ChatMessage] | list[PreviousMessage], token_limit: int, msg_limit: int | None = None, ) -> str: @@ -381,7 +101,7 @@ def combine_message_chain( if msg_limit is not None: messages = messages[-msg_limit:] - for message in reversed(messages): + for message in cast(list[ChatMessage] | list[PreviousMessage], reversed(messages)): message_token_count = message.token_count if total_token_count + message_token_count > token_limit: @@ -394,218 +114,58 @@ def combine_message_chain( return "\n\n".join(message_strs) -_PER_MESSAGE_TOKEN_BUFFER = 7 +def reorganize_citations( + answer: str, citations: list[CitationInfo] +) -> tuple[str, list[CitationInfo]]: + """For a complete, citation-aware response, we want to reorganize the citations so that + they are in the order of the documents that were used in the response. This just looks nicer / avoids + confusion ("Why is there [7] when only 2 documents are cited?").""" + # Regular expression to find all instances of [[x]](LINK) + pattern = r"\[\[(.*?)\]\]\((.*?)\)" -def find_last_index(lst: list[int], max_prompt_tokens: int) -> int: - """From the back, find the index of the last element to include - before the list exceeds the maximum""" - running_sum = 0 + all_citation_matches = re.findall(pattern, answer) - last_ind = 0 - for i in range(len(lst) - 1, -1, -1): - running_sum += lst[i] + _PER_MESSAGE_TOKEN_BUFFER - if running_sum > max_prompt_tokens: - last_ind = i + 1 - break - if last_ind >= len(lst): - raise ValueError("Last message alone is too large!") - return last_ind - - -def drop_messages_history_overflow( - system_msg: BaseMessage | None, - system_token_count: int, - history_msgs: list[BaseMessage], - history_token_counts: list[int], - final_msg: BaseMessage, - final_msg_token_count: int, - max_allowed_tokens: int, -) -> list[BaseMessage]: - """As message history grows, messages need to be dropped starting from the furthest in the past. - The System message should be kept if at all possible and the latest user input which is inserted in the - prompt template must be included""" - if len(history_msgs) != len(history_token_counts): - # This should never happen - raise ValueError("Need exactly 1 token count per message for tracking overflow") - - prompt: list[BaseMessage] = [] - - # Start dropping from the history if necessary - all_tokens = history_token_counts + [system_token_count, final_msg_token_count] - ind_prev_msg_start = find_last_index( - all_tokens, max_prompt_tokens=max_allowed_tokens - ) - - if system_msg and ind_prev_msg_start <= len(history_msgs): - prompt.append(system_msg) - - prompt.extend(history_msgs[ind_prev_msg_start:]) - - prompt.append(final_msg) - - return prompt - - -def in_code_block(llm_text: str) -> bool: - count = llm_text.count(TRIPLE_BACKTICK) - return count % 2 != 0 - - -def extract_citations_from_stream( - tokens: Iterator[str], - context_docs: list[LlmDoc], - doc_id_to_rank_map: dict[str, int], - stop_stream: str | None = STOP_STREAM_PAT, -) -> Iterator[DanswerAnswerPiece | CitationInfo]: - llm_out = "" - max_citation_num = len(context_docs) - curr_segment = "" - prepend_bracket = False - cited_inds = set() - hold = "" - for raw_token in tokens: - if stop_stream: - next_hold = hold + raw_token - - if stop_stream in next_hold: - break - - if next_hold == stop_stream[: len(next_hold)]: - hold = next_hold + new_citation_info: dict[int, CitationInfo] = {} + for citation_match in all_citation_matches: + try: + citation_num = int(citation_match[0]) + if citation_num in new_citation_info: continue - token = next_hold - hold = "" - else: - token = raw_token + matching_citation = next( + iter([c for c in citations if c.citation_num == int(citation_num)]), + None, + ) + if matching_citation is None: + continue - # Special case of [1][ where ][ is a single token - # This is where the model attempts to do consecutive citations like [1][2] - if prepend_bracket: - curr_segment += "[" + curr_segment - prepend_bracket = False + new_citation_info[citation_num] = CitationInfo( + citation_num=len(new_citation_info) + 1, + document_id=matching_citation.document_id, + ) + except Exception: + pass - curr_segment += token - llm_out += token + # Function to replace citations with their new number + def slack_link_format(match: re.Match) -> str: + link_text = match.group(1) + try: + citation_num = int(link_text) + if citation_num in new_citation_info: + link_text = new_citation_info[citation_num].citation_num + except Exception: + pass - possible_citation_pattern = r"(\[\d*$)" # [1, [, etc - possible_citation_found = re.search(possible_citation_pattern, curr_segment) + link_url = match.group(2) + return f"[[{link_text}]]({link_url})" - citation_pattern = r"\[(\d+)\]" # [1], [2] etc - citation_found = re.search(citation_pattern, curr_segment) + # Substitute all matches in the input text + new_answer = re.sub(pattern, slack_link_format, answer) - if citation_found and not in_code_block(llm_out): - numerical_value = int(citation_found.group(1)) - if 1 <= numerical_value <= max_citation_num: - context_llm_doc = context_docs[ - numerical_value - 1 - ] # remove 1 index offset + # if any citations weren't parsable, just add them back to be safe + for citation in citations: + if citation.citation_num not in new_citation_info: + new_citation_info[citation.citation_num] = citation - link = context_llm_doc.link - target_citation_num = doc_id_to_rank_map[context_llm_doc.document_id] - - # Use the citation number for the document's rank in - # the search (or selected docs) results - curr_segment = re.sub( - rf"\[{numerical_value}\]", f"[{target_citation_num}]", curr_segment - ) - - if target_citation_num not in cited_inds: - cited_inds.add(target_citation_num) - yield CitationInfo( - citation_num=target_citation_num, - document_id=context_llm_doc.document_id, - ) - - if link: - curr_segment = re.sub(r"\[", "[[", curr_segment, count=1) - curr_segment = re.sub("]", f"]]({link})", curr_segment, count=1) - - # In case there's another open bracket like [1][, don't want to match this - possible_citation_found = None - - # if we see "[", but haven't seen the right side, hold back - this may be a - # citation that needs to be replaced with a link - if possible_citation_found: - continue - - # Special case with back to back citations [1][2] - if curr_segment and curr_segment[-1] == "[": - curr_segment = curr_segment[:-1] - prepend_bracket = True - - yield DanswerAnswerPiece(answer_piece=curr_segment) - curr_segment = "" - - if curr_segment: - if prepend_bracket: - yield DanswerAnswerPiece(answer_piece="[" + curr_segment) - else: - yield DanswerAnswerPiece(answer_piece=curr_segment) - - -def get_prompt_tokens(prompt: Prompt) -> int: - return ( - check_number_of_tokens(prompt.system_prompt) - + check_number_of_tokens(prompt.task_prompt) - + CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT - + CITATION_STATEMENT_TOKEN_CNT - + CITATION_REMINDER_TOKEN_CNT - + (LANGUAGE_HINT_TOKEN_CNT if bool(MULTILINGUAL_QUERY_EXPANSION) else 0) - ) - - -# buffer just to be safe so that we don't overflow the token limit due to -# a small miscalculation -_MISC_BUFFER = 40 - - -def compute_max_document_tokens( - persona: Persona, - actual_user_input: str | None = None, - max_llm_token_override: int | None = None, -) -> int: - """Estimates the number of tokens available for context documents. Formula is roughly: - - ( - model_context_window - reserved_output_tokens - prompt_tokens - - (actual_user_input OR reserved_user_message_tokens) - buffer (just to be safe) - ) - - The actual_user_input is used at query time. If we are calculating this before knowing the exact input (e.g. - if we're trying to determine if the user should be able to select another document) then we just set an - arbitrary "upper bound". - """ - llm_name = GEN_AI_MODEL_VERSION - if persona.llm_model_version_override: - llm_name = persona.llm_model_version_override - - # if we can't find a number of tokens, just assume some common default - max_input_tokens = ( - max_llm_token_override - if max_llm_token_override - else get_max_input_tokens(model_name=llm_name) - ) - if persona.prompts: - # TODO this may not always be the first prompt - prompt_tokens = get_prompt_tokens(persona.prompts[0]) - else: - raise RuntimeError("Persona has no prompts - this should never happen") - user_input_tokens = ( - check_number_of_tokens(actual_user_input) - if actual_user_input is not None - else GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS - ) - - return max_input_tokens - prompt_tokens - user_input_tokens - _MISC_BUFFER - - -def compute_max_llm_input_tokens(persona: Persona) -> int: - """Maximum tokens allows in the input to the LLM (of any type).""" - llm_name = GEN_AI_MODEL_VERSION - if persona.llm_model_version_override: - llm_name = persona.llm_model_version_override - - input_tokens = get_max_input_tokens(model_name=llm_name) - return input_tokens - _MISC_BUFFER + return new_answer, list(new_citation_info.values()) diff --git a/backend/danswer/chat/load_yamls.py b/backend/danswer/chat/load_yamls.py index d2e93c474..abb10461a 100644 --- a/backend/danswer/chat/load_yamls.py +++ b/backend/danswer/chat/load_yamls.py @@ -13,7 +13,7 @@ from danswer.db.document_set import get_or_create_document_set_by_name from danswer.db.engine import get_sqlalchemy_engine from danswer.db.models import DocumentSet as DocumentSetDBModel from danswer.db.models import Prompt as PromptDBModel -from danswer.search.models import RecencyBiasSetting +from danswer.search.enums import RecencyBiasSetting def load_prompts_from_yaml(prompts_yaml: str = PROMPTS_YAML) -> None: @@ -24,7 +24,7 @@ def load_prompts_from_yaml(prompts_yaml: str = PROMPTS_YAML) -> None: with Session(get_sqlalchemy_engine()) as db_session: for prompt in all_prompts: upsert_prompt( - user_id=None, + user=None, prompt_id=prompt.get("id"), name=prompt["name"], description=prompt["description"].strip(), @@ -34,7 +34,6 @@ def load_prompts_from_yaml(prompts_yaml: str = PROMPTS_YAML) -> None: datetime_aware=prompt.get("datetime_aware", True), default_prompt=True, personas=None, - shared=True, db_session=db_session, commit=True, ) @@ -67,9 +66,7 @@ def load_personas_from_yaml( prompts: list[PromptDBModel | None] | None = None else: prompts = [ - get_prompt_by_name( - prompt_name, user_id=None, shared=True, db_session=db_session - ) + get_prompt_by_name(prompt_name, user=None, db_session=db_session) for prompt_name in prompt_set_names ] if any([prompt is None for prompt in prompts]): @@ -78,22 +75,26 @@ def load_personas_from_yaml( if not prompts: prompts = None + p_id = persona.get("id") upsert_persona( - user_id=None, - persona_id=persona.get("id"), + user=None, + # Negative to not conflict with existing personas + persona_id=(-1 * p_id) if p_id is not None else None, name=persona["name"], description=persona["description"], num_chunks=persona.get("num_chunks") if persona.get("num_chunks") is not None else default_chunks, llm_relevance_filter=persona.get("llm_relevance_filter"), + starter_messages=persona.get("starter_messages"), llm_filter_extraction=persona.get("llm_filter_extraction"), + llm_model_provider_override=None, llm_model_version_override=None, recency_bias=RecencyBiasSetting(persona["recency_bias"]), prompts=cast(list[PromptDBModel] | None, prompts), document_sets=doc_sets, default_persona=True, - shared=True, + is_public=True, db_session=db_session, ) diff --git a/backend/danswer/chat/models.py b/backend/danswer/chat/models.py index de3f7e4f0..8fa5eecae 100644 --- a/backend/danswer/chat/models.py +++ b/backend/danswer/chat/models.py @@ -5,10 +5,10 @@ from typing import Any from pydantic import BaseModel from danswer.configs.constants import DocumentSource -from danswer.search.models import QueryFlow +from danswer.search.enums import QueryFlow +from danswer.search.enums import SearchType from danswer.search.models import RetrievalDocs from danswer.search.models import SearchResponse -from danswer.search.models import SearchType class LlmDoc(BaseModel): @@ -16,11 +16,13 @@ class LlmDoc(BaseModel): document_id: str content: str + blurb: str semantic_identifier: str source_type: DocumentSource metadata: dict[str, str | list[str]] updated_at: datetime | None link: str | None + source_links: dict[int, str] | None # First chunk of info for streaming QA @@ -100,9 +102,21 @@ class QAResponse(SearchResponse, DanswerAnswer): error_msg: str | None = None -AnswerQuestionStreamReturn = Iterator[ - DanswerAnswerPiece | DanswerQuotes | DanswerContexts | StreamingError -] +class ImageGenerationDisplay(BaseModel): + file_ids: list[str] + + +AnswerQuestionPossibleReturn = ( + DanswerAnswerPiece + | DanswerQuotes + | CitationInfo + | DanswerContexts + | ImageGenerationDisplay + | StreamingError +) + + +AnswerQuestionStreamReturn = Iterator[AnswerQuestionPossibleReturn] class LLMMetricsContainer(BaseModel): diff --git a/backend/danswer/chat/personas.yaml b/backend/danswer/chat/personas.yaml index 1f358e4b1..d60b2724d 100644 --- a/backend/danswer/chat/personas.yaml +++ b/backend/danswer/chat/personas.yaml @@ -5,9 +5,9 @@ personas: # this is for DanswerBot to use when tagged in a non-configured channel # Careful setting specific IDs, this won't autoincrement the next ID value for postgres - id: 0 - name: "Default" + name: "Danswer" description: > - Default Danswer Question Answering functionality. + Assistant with access to documents from your Connected Sources. # Default Prompt objects attached to the persona, see prompts.yaml prompts: - "Answer-Question" @@ -39,22 +39,23 @@ personas: document_sets: [] - - name: "Summarize" + - id: 1 + name: "GPT" description: > - A less creative assistant which summarizes relevant documents but does not try to - extrapolate any answers for you. + Assistant with no access to documents. Chat with just the Language Model. prompts: - - "Summarize" - num_chunks: 10 + - "OnlyLLM" + num_chunks: 0 llm_relevance_filter: true llm_filter_extraction: true recency_bias: "auto" document_sets: [] - - name: "Paraphrase" + - id: 2 + name: "Paraphrase" description: > - The least creative default assistant that only provides quotes from the documents. + Assistant that is heavily constrained and only provides exact quotes from Connected Sources. prompts: - "Paraphrase" num_chunks: 10 diff --git a/backend/danswer/chat/process_message.py b/backend/danswer/chat/process_message.py index 1f8ecc55a..7022c06a6 100644 --- a/backend/danswer/chat/process_message.py +++ b/backend/danswer/chat/process_message.py @@ -5,29 +5,19 @@ from typing import cast from sqlalchemy.orm import Session -from danswer.chat.chat_utils import build_chat_system_message -from danswer.chat.chat_utils import build_chat_user_message -from danswer.chat.chat_utils import build_doc_context_str -from danswer.chat.chat_utils import compute_max_document_tokens -from danswer.chat.chat_utils import compute_max_llm_input_tokens from danswer.chat.chat_utils import create_chat_chain -from danswer.chat.chat_utils import drop_messages_history_overflow -from danswer.chat.chat_utils import extract_citations_from_stream -from danswer.chat.chat_utils import get_chunks_for_qa -from danswer.chat.chat_utils import llm_doc_from_inference_chunk -from danswer.chat.chat_utils import map_document_id_order from danswer.chat.models import CitationInfo from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.models import ImageGenerationDisplay from danswer.chat.models import LlmDoc from danswer.chat.models import LLMRelevanceFilterResponse from danswer.chat.models import QADocsResponse from danswer.chat.models import StreamingError from danswer.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE +from danswer.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT -from danswer.configs.constants import DISABLED_GEN_AI_MSG from danswer.configs.constants import MessageType -from danswer.configs.model_configs import CHUNK_SIZE -from danswer.configs.model_configs import GEN_AI_MODEL_VERSION +from danswer.db.chat import attach_files_to_chat_message from danswer.db.chat import create_db_search_doc from danswer.db.chat import create_new_chat_message from danswer.db.chat import get_chat_message @@ -38,101 +28,49 @@ from danswer.db.chat import get_or_create_root_message from danswer.db.chat import translate_db_message_to_chat_message_detail from danswer.db.chat import translate_db_search_doc_to_server_search_doc from danswer.db.embedding_model import get_current_db_embedding_model -from danswer.db.models import ChatMessage -from danswer.db.models import Persona +from danswer.db.engine import get_session_context_manager +from danswer.db.llm import fetch_existing_llm_providers from danswer.db.models import SearchDoc as DbSearchDoc from danswer.db.models import User from danswer.document_index.factory import get_default_document_index -from danswer.indexing.models import InferenceChunk +from danswer.file_store.models import ChatFileType +from danswer.file_store.models import FileDescriptor +from danswer.file_store.utils import load_all_chat_files +from danswer.file_store.utils import save_files_from_urls +from danswer.llm.answering.answer import Answer +from danswer.llm.answering.models import AnswerStyleConfig +from danswer.llm.answering.models import CitationConfig +from danswer.llm.answering.models import DocumentPruningConfig +from danswer.llm.answering.models import PreviousMessage +from danswer.llm.answering.models import PromptConfig from danswer.llm.exceptions import GenAIDisabledException -from danswer.llm.factory import get_default_llm -from danswer.llm.interfaces import LLM +from danswer.llm.factory import get_llm_for_persona from danswer.llm.utils import get_default_llm_tokenizer -from danswer.llm.utils import get_max_input_tokens -from danswer.llm.utils import tokenizer_trim_content -from danswer.llm.utils import translate_history_to_basemessages -from danswer.search.models import OptionalSearchSetting -from danswer.search.models import RetrievalDetails -from danswer.search.request_preprocessing import retrieval_preprocessing -from danswer.search.search_runner import chunks_to_search_docs -from danswer.search.search_runner import full_chunk_search_generator -from danswer.search.search_runner import inference_documents_from_ids -from danswer.secondary_llm_flows.choose_search import check_if_need_search -from danswer.secondary_llm_flows.query_expansion import history_based_query_rephrase +from danswer.search.enums import OptionalSearchSetting +from danswer.search.retrieval.search_runner import inference_documents_from_ids +from danswer.search.utils import chunks_or_sections_to_search_docs +from danswer.server.query_and_chat.models import ChatMessageDetail from danswer.server.query_and_chat.models import CreateChatMessageRequest from danswer.server.utils import get_json_line +from danswer.tools.factory import get_tool_cls +from danswer.tools.force import ForceUseTool +from danswer.tools.images.image_generation_tool import IMAGE_GENERATION_RESPONSE_ID +from danswer.tools.images.image_generation_tool import ImageGenerationResponse +from danswer.tools.images.image_generation_tool import ImageGenerationTool +from danswer.tools.search.search_tool import SEARCH_RESPONSE_SUMMARY_ID +from danswer.tools.search.search_tool import SearchResponseSummary +from danswer.tools.search.search_tool import SearchTool +from danswer.tools.search.search_tool import SECTION_RELEVANCE_LIST_ID +from danswer.tools.tool import Tool +from danswer.tools.tool import ToolResponse +from danswer.tools.utils import compute_all_tool_tokens +from danswer.tools.utils import explicit_tool_calling_supported from danswer.utils.logger import setup_logger from danswer.utils.timing import log_generator_function_time logger = setup_logger() -def generate_ai_chat_response( - query_message: ChatMessage, - history: list[ChatMessage], - persona: Persona, - context_docs: list[LlmDoc], - doc_id_to_rank_map: dict[str, int], - llm: LLM | None, - llm_tokenizer_encode_func: Callable, - all_doc_useful: bool, -) -> Iterator[DanswerAnswerPiece | CitationInfo | StreamingError]: - if llm is None: - try: - llm = get_default_llm() - except GenAIDisabledException: - # Not an error if it's a user configuration - yield DanswerAnswerPiece(answer_piece=DISABLED_GEN_AI_MSG) - return - - if query_message.prompt is None: - raise RuntimeError("No prompt received for generating Gen AI answer.") - - try: - context_exists = len(context_docs) > 0 - - system_message_or_none, system_tokens = build_chat_system_message( - prompt=query_message.prompt, - context_exists=context_exists, - llm_tokenizer_encode_func=llm_tokenizer_encode_func, - ) - - history_basemessages, history_token_counts = translate_history_to_basemessages( - history - ) - - # Be sure the context_docs passed to build_chat_user_message - # Is the same as passed in later for extracting citations - user_message, user_tokens = build_chat_user_message( - chat_message=query_message, - prompt=query_message.prompt, - context_docs=context_docs, - llm_tokenizer_encode_func=llm_tokenizer_encode_func, - all_doc_useful=all_doc_useful, - ) - - prompt = drop_messages_history_overflow( - system_msg=system_message_or_none, - system_token_count=system_tokens, - history_msgs=history_basemessages, - history_token_counts=history_token_counts, - final_msg=user_message, - final_msg_token_count=user_tokens, - max_allowed_tokens=compute_max_llm_input_tokens(persona), - ) - - # Good Debug/Breakpoint - tokens = llm.stream(prompt) - - yield from extract_citations_from_stream( - tokens, context_docs, doc_id_to_rank_map - ) - - except Exception as e: - logger.exception(f"LLM failed to produce valid chat message, error: {e}") - yield StreamingError(error=str(e)) - - def translate_citations( citations_list: list[CitationInfo], db_docs: list[DbSearchDoc] ) -> dict[int, int]: @@ -153,18 +91,99 @@ def translate_citations( return citation_to_saved_doc_id_map -@log_generator_function_time() -def stream_chat_message( +def _handle_search_tool_response_summary( + packet: ToolResponse, + db_session: Session, + selected_search_docs: list[DbSearchDoc] | None, +) -> tuple[QADocsResponse, list[DbSearchDoc]]: + response_sumary = cast(SearchResponseSummary, packet.response) + + if not selected_search_docs: + top_docs = chunks_or_sections_to_search_docs(response_sumary.top_sections) + reference_db_search_docs = [ + create_db_search_doc(server_search_doc=top_doc, db_session=db_session) + for top_doc in top_docs + ] + else: + reference_db_search_docs = selected_search_docs + + response_docs = [ + translate_db_search_doc_to_server_search_doc(db_search_doc) + for db_search_doc in reference_db_search_docs + ] + return ( + QADocsResponse( + rephrased_query=response_sumary.rephrased_query, + top_documents=response_docs, + predicted_flow=response_sumary.predicted_flow, + predicted_search=response_sumary.predicted_search, + applied_source_filters=response_sumary.final_filters.source_type, + applied_time_cutoff=response_sumary.final_filters.time_cutoff, + recency_bias_multiplier=response_sumary.recency_bias_multiplier, + ), + reference_db_search_docs, + ) + + +def _check_should_force_search( + new_msg_req: CreateChatMessageRequest, +) -> ForceUseTool | None: + # If files are already provided, don't run the search tool + if new_msg_req.file_descriptors: + return None + + if ( + new_msg_req.query_override + or ( + new_msg_req.retrieval_options + and new_msg_req.retrieval_options.run_search == OptionalSearchSetting.ALWAYS + ) + or new_msg_req.search_doc_ids + or DISABLE_LLM_CHOOSE_SEARCH + ): + args = ( + {"query": new_msg_req.query_override} + if new_msg_req.query_override + else None + ) + # if we are using selected docs, just put something here so the Tool doesn't need + # to build its own args via an LLM call + if new_msg_req.search_doc_ids: + args = {"query": new_msg_req.message} + + return ForceUseTool( + tool_name=SearchTool.name(), + args=args, + ) + return None + + +ChatPacket = ( + StreamingError + | QADocsResponse + | LLMRelevanceFilterResponse + | ChatMessageDetail + | DanswerAnswerPiece + | CitationInfo + | ImageGenerationDisplay +) +ChatPacketStream = Iterator[ChatPacket] + + +def stream_chat_message_objects( new_msg_req: CreateChatMessageRequest, user: User | None, db_session: Session, # Needed to translate persona num_chunks to tokens to the LLM default_num_chunks: float = MAX_CHUNKS_FED_TO_CHAT, - default_chunk_size: int = CHUNK_SIZE, # For flow with search, don't include as many chunks as possible since we need to leave space # for the chat history, for smaller models, we likely won't get MAX_CHUNKS_FED_TO_CHAT chunks max_document_percentage: float = CHAT_TARGET_CHUNK_PERCENTAGE, -) -> Iterator[str]: + # if specified, uses the last user message and does not create a new user message based + # on the `new_msg_req.message`. Currently, requires a state where the last message is a + # user message (e.g. this can only be used for the chat-seeding flow). + use_existing_user_message: bool = False, +) -> ChatPacketStream: """Streams in order: 1. [conditional] Retrieved documents if a search needs to be run 2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on @@ -184,11 +203,13 @@ def stream_chat_message( message_text = new_msg_req.message chat_session_id = new_msg_req.chat_session_id parent_id = new_msg_req.parent_message_id - prompt_id = new_msg_req.prompt_id reference_doc_ids = new_msg_req.search_doc_ids retrieval_options = new_msg_req.retrieval_options persona = chat_session.persona - query_override = new_msg_req.query_override + + prompt_id = new_msg_req.prompt_id + if prompt_id is None and persona.prompts: + prompt_id = sorted(persona.prompts, key=lambda x: x.id)[-1].id if reference_doc_ids is None and retrieval_options is None: raise RuntimeError( @@ -196,11 +217,11 @@ def stream_chat_message( ) try: - llm = get_default_llm( - gen_ai_model_version_override=persona.llm_model_version_override + llm = get_llm_for_persona( + persona, new_msg_req.llm_override or chat_session.llm_override ) except GenAIDisabledException: - llm = None + raise RuntimeError("LLM is disabled. Can't use chat flow without LLM.") llm_tokenizer = get_default_llm_tokenizer() llm_tokenizer_encode_func = cast( @@ -226,51 +247,68 @@ def stream_chat_message( else: parent_message = root_message - # Create new message at the right place in the tree and update the parent's child pointer - # Don't commit yet until we verify the chat message chain - new_user_message = create_new_chat_message( - chat_session_id=chat_session_id, - parent_message=parent_message, - prompt_id=prompt_id, - message=message_text, - token_count=len(llm_tokenizer_encode_func(message_text)), - message_type=MessageType.USER, - db_session=db_session, - commit=False, - ) - - # Create linear history of messages - final_msg, history_msgs = create_chat_chain( - chat_session_id=chat_session_id, db_session=db_session - ) - - if final_msg.id != new_user_message.id: - db_session.rollback() - raise RuntimeError( - "The new message was not on the mainline. " - "Be sure to update the chat pointers before calling this." + user_message = None + if not use_existing_user_message: + # Create new message at the right place in the tree and update the parent's child pointer + # Don't commit yet until we verify the chat message chain + user_message = create_new_chat_message( + chat_session_id=chat_session_id, + parent_message=parent_message, + prompt_id=prompt_id, + message=message_text, + token_count=len(llm_tokenizer_encode_func(message_text)), + message_type=MessageType.USER, + files=None, # Need to attach later for optimization to only load files once in parallel + db_session=db_session, + commit=False, ) - - # Save now to save the latest chat message - db_session.commit() - - run_search = False - # Retrieval options are only None if reference_doc_ids are provided - if retrieval_options is not None and persona.num_chunks != 0: - if retrieval_options.run_search == OptionalSearchSetting.ALWAYS: - run_search = True - elif retrieval_options.run_search == OptionalSearchSetting.NEVER: - run_search = False - else: - run_search = check_if_need_search( - query_message=final_msg, history=history_msgs, llm=llm + # re-create linear history of messages + final_msg, history_msgs = create_chat_chain( + chat_session_id=chat_session_id, db_session=db_session + ) + if final_msg.id != user_message.id: + db_session.rollback() + raise RuntimeError( + "The new message was not on the mainline. " + "Be sure to update the chat pointers before calling this." ) - max_document_tokens = compute_max_document_tokens( - persona=persona, actual_user_input=message_text - ) + # NOTE: do not commit user message - it will be committed when the + # assistant message is successfully generated + else: + # re-create linear history of messages + final_msg, history_msgs = create_chat_chain( + chat_session_id=chat_session_id, db_session=db_session + ) + if final_msg.message_type != MessageType.USER: + raise RuntimeError( + "The last message was not a user message. Cannot call " + "`stream_chat_message_objects` with `is_regenerate=True` " + "when the last message is not a user message." + ) - rephrased_query = None + # load all files needed for this chat chain in memory + files = load_all_chat_files( + history_msgs, new_msg_req.file_descriptors, db_session + ) + latest_query_files = [ + file + for file in files + if file.file_id in [f["id"] for f in new_msg_req.file_descriptors] + ] + + if user_message: + attach_files_to_chat_message( + chat_message=user_message, + files=[ + new_file.to_file_descriptor() for new_file in latest_query_files + ], + db_session=db_session, + commit=False, + ) + + selected_db_search_docs = None + selected_llm_docs: list[LlmDoc] | None = None if reference_doc_ids: identifier_tuples = get_doc_query_identifiers_from_model( search_doc_ids=reference_doc_ids, @@ -281,70 +319,12 @@ def stream_chat_message( # Generates full documents currently # May extend to include chunk ranges - llm_docs: list[LlmDoc] = inference_documents_from_ids( + selected_llm_docs = inference_documents_from_ids( doc_identifiers=identifier_tuples, document_index=document_index, ) - - # truncate the last document if it exceeds the token limit - tokens_per_doc = [ - len( - llm_tokenizer_encode_func( - build_doc_context_str( - semantic_identifier=llm_doc.semantic_identifier, - source_type=llm_doc.source_type, - content=llm_doc.content, - metadata_dict=llm_doc.metadata, - updated_at=llm_doc.updated_at, - ind=ind, - ) - ) - ) - for ind, llm_doc in enumerate(llm_docs) - ] - final_doc_ind = None - total_tokens = 0 - for ind, tokens in enumerate(tokens_per_doc): - total_tokens += tokens - if total_tokens > max_document_tokens: - final_doc_ind = ind - break - if final_doc_ind is not None: - # only allow the final document to get truncated - # if more than that, then the user message is too long - if final_doc_ind != len(tokens_per_doc) - 1: - yield get_json_line( - StreamingError( - error="LLM context window exceeded. Please de-select some documents or shorten your query." - ).dict() - ) - return - - final_doc_desired_length = tokens_per_doc[final_doc_ind] - ( - total_tokens - max_document_tokens - ) - # 75 tokens is a reasonable over-estimate of the metadata and title - final_doc_content_length = final_doc_desired_length - 75 - # this could occur if we only have space for the title / metadata - # not ideal, but it's the most reasonable thing to do - # NOTE: the frontend prevents documents from being selected if - # less than 75 tokens are available to try and avoid this situation - # from occuring in the first place - if final_doc_content_length <= 0: - logger.error( - f"Final doc ({llm_docs[final_doc_ind].semantic_identifier}) content " - "length is less than 0. Removing this doc from the final prompt." - ) - llm_docs.pop() - else: - llm_docs[final_doc_ind].content = tokenizer_trim_content( - content=llm_docs[final_doc_ind].content, - desired_length=final_doc_content_length, - tokenizer=llm_tokenizer, - ) - - doc_id_to_rank_map = map_document_id_order( - cast(list[InferenceChunk | LlmDoc], llm_docs) + document_pruning_config = DocumentPruningConfig( + is_manually_selected_docs=True ) # In case the search doc is deleted, just don't include it @@ -354,193 +334,179 @@ def stream_chat_message( for doc_id in reference_doc_ids ] - reference_db_search_docs = [ + selected_db_search_docs = [ db_sd for db_sd in db_search_docs_or_none if db_sd ] - elif run_search: - rephrased_query = ( - history_based_query_rephrase( - query_message=final_msg, history=history_msgs, llm=llm - ) - if query_override is None - else query_override - ) - - ( - retrieval_request, - predicted_search_type, - predicted_flow, - ) = retrieval_preprocessing( - query=rephrased_query, - retrieval_details=cast(RetrievalDetails, retrieval_options), - persona=persona, - user=user, - db_session=db_session, - ) - - documents_generator = full_chunk_search_generator( - search_query=retrieval_request, - document_index=document_index, - db_session=db_session, - ) - time_cutoff = retrieval_request.filters.time_cutoff - recency_bias_multiplier = retrieval_request.recency_bias_multiplier - run_llm_chunk_filter = not retrieval_request.skip_llm_chunk_filter - - # First fetch and return the top chunks to the UI so the user can - # immediately see some results - top_chunks = cast(list[InferenceChunk], next(documents_generator)) - - # Get ranking of the documents for citation purposes later - doc_id_to_rank_map = map_document_id_order( - cast(list[InferenceChunk | LlmDoc], top_chunks) - ) - - top_docs = chunks_to_search_docs(top_chunks) - - reference_db_search_docs = [ - create_db_search_doc(server_search_doc=top_doc, db_session=db_session) - for top_doc in top_docs - ] - - response_docs = [ - translate_db_search_doc_to_server_search_doc(db_search_doc) - for db_search_doc in reference_db_search_docs - ] - - initial_response = QADocsResponse( - rephrased_query=rephrased_query, - top_documents=response_docs, - predicted_flow=predicted_flow, - predicted_search=predicted_search_type, - applied_source_filters=retrieval_request.filters.source_type, - applied_time_cutoff=time_cutoff, - recency_bias_multiplier=recency_bias_multiplier, - ).dict() - yield get_json_line(initial_response) - - # Get the final ordering of chunks for the LLM call - llm_chunk_selection = cast(list[bool], next(documents_generator)) - - # Yield the list of LLM selected chunks for showing the LLM selected icons in the UI - llm_relevance_filtering_response = LLMRelevanceFilterResponse( - relevant_chunk_indices=[ - index for index, value in enumerate(llm_chunk_selection) if value - ] - if run_llm_chunk_filter - else [] - ).dict() - yield get_json_line(llm_relevance_filtering_response) - - # Prep chunks to pass to LLM - num_llm_chunks = ( - persona.num_chunks - if persona.num_chunks is not None - else default_num_chunks - ) - - llm_name = GEN_AI_MODEL_VERSION - if persona.llm_model_version_override: - llm_name = persona.llm_model_version_override - - llm_max_input_tokens = get_max_input_tokens(model_name=llm_name) - - llm_token_based_chunk_lim = max_document_percentage * llm_max_input_tokens - - chunk_token_limit = int( - min( - num_llm_chunks * default_chunk_size, - max_document_tokens, - llm_token_based_chunk_lim, - ) - ) - llm_chunks_indices = get_chunks_for_qa( - chunks=top_chunks, - llm_chunk_selection=llm_chunk_selection, - token_limit=chunk_token_limit, - ) - llm_chunks = [top_chunks[i] for i in llm_chunks_indices] - llm_docs = [llm_doc_from_inference_chunk(chunk) for chunk in llm_chunks] - else: - llm_docs = [] - doc_id_to_rank_map = {} - reference_db_search_docs = None + document_pruning_config = DocumentPruningConfig( + max_chunks=int( + persona.num_chunks + if persona.num_chunks is not None + else default_num_chunks + ), + max_window_percentage=max_document_percentage, + use_sections=new_msg_req.chunks_above > 0 + or new_msg_req.chunks_below > 0, + ) # Cannot determine these without the LLM step or breaking out early partial_response = partial( create_new_chat_message, chat_session_id=chat_session_id, - parent_message=new_user_message, + parent_message=final_msg, prompt_id=prompt_id, # message=, - rephrased_query=rephrased_query, + # rephrased_query=, # token_count=, message_type=MessageType.ASSISTANT, # error=, - reference_docs=reference_db_search_docs, + # reference_docs=, db_session=db_session, - commit=True, + commit=False, ) - # If no prompt is provided, this is interpreted as not wanting an AI Answer - # Simply provide/save the retrieval results - if final_msg.prompt is None: - gen_ai_response_message = partial_response( - message="", - token_count=0, - citations=None, - error=None, - ) - msg_detail_response = translate_db_message_to_chat_message_detail( - gen_ai_response_message - ) + if not final_msg.prompt: + raise RuntimeError("No Prompt found") - yield get_json_line(msg_detail_response.dict()) + prompt_config = PromptConfig.from_model( + final_msg.prompt, + prompt_override=( + new_msg_req.prompt_override or chat_session.prompt_override + ), + ) - # Stop here after saving message details, the above still needs to be sent for the - # message id to send the next follow-up message - return + persona_tool_classes = [ + get_tool_cls(tool, db_session) for tool in persona.tools + ] + + # factor in tool definition size when pruning + document_pruning_config.tool_num_tokens = compute_all_tool_tokens( + persona_tool_classes + ) + document_pruning_config.using_tool_message = explicit_tool_calling_supported( + llm.config.model_provider, llm.config.model_name + ) + + # NOTE: for now, only support SearchTool and ImageGenerationTool + # in the future, will support arbitrary user-defined tools + search_tool: SearchTool | None = None + tools: list[Tool] = [] + for tool_cls in persona_tool_classes: + if tool_cls.__name__ == SearchTool.__name__ and not latest_query_files: + search_tool = SearchTool( + db_session=db_session, + user=user, + persona=persona, + retrieval_options=retrieval_options, + prompt_config=prompt_config, + llm_config=llm.config, + pruning_config=document_pruning_config, + selected_docs=selected_llm_docs, + chunks_above=new_msg_req.chunks_above, + chunks_below=new_msg_req.chunks_below, + full_doc=new_msg_req.full_doc, + ) + tools.append(search_tool) + elif tool_cls.__name__ == ImageGenerationTool.__name__: + dalle_key = None + if llm and llm.config.api_key and llm.config.model_provider == "openai": + dalle_key = llm.config.api_key + else: + llm_providers = fetch_existing_llm_providers(db_session) + openai_provider = next( + iter( + [ + llm_provider + for llm_provider in llm_providers + if llm_provider.provider == "openai" + ] + ), + None, + ) + if not openai_provider or not openai_provider.api_key: + raise ValueError( + "Image generation tool requires an OpenAI API key" + ) + dalle_key = openai_provider.api_key + tools.append(ImageGenerationTool(api_key=dalle_key)) # LLM prompt building, response capturing, etc. - response_packets = generate_ai_chat_response( - query_message=final_msg, - history=history_msgs, - persona=persona, - context_docs=llm_docs, - doc_id_to_rank_map=doc_id_to_rank_map, - llm=llm, - llm_tokenizer_encode_func=llm_tokenizer_encode_func, - all_doc_useful=reference_doc_ids is not None, + answer = Answer( + question=final_msg.message, + latest_query_files=latest_query_files, + answer_style_config=AnswerStyleConfig( + citation_config=CitationConfig( + all_docs_useful=selected_db_search_docs is not None + ), + document_pruning_config=document_pruning_config, + ), + prompt_config=prompt_config, + llm=( + llm + or get_llm_for_persona( + persona, new_msg_req.llm_override or chat_session.llm_override + ) + ), + message_history=[ + PreviousMessage.from_chat_message(msg, files) for msg in history_msgs + ], + tools=tools, + force_use_tool=( + _check_should_force_search(new_msg_req) if search_tool else None + ), ) - # Capture outputs and errors - llm_output = "" - error: str | None = None - citations: list[CitationInfo] = [] - for packet in response_packets: - if isinstance(packet, DanswerAnswerPiece): - token = packet.answer_piece - if token: - llm_output += token - elif isinstance(packet, StreamingError): - error = packet.error - elif isinstance(packet, CitationInfo): - citations.append(packet) - continue + reference_db_search_docs = None + qa_docs_response = None + ai_message_files = None # any files to associate with the AI message e.g. dall-e generated images + for packet in answer.processed_streamed_output: + if isinstance(packet, ToolResponse): + if packet.id == SEARCH_RESPONSE_SUMMARY_ID: + ( + qa_docs_response, + reference_db_search_docs, + ) = _handle_search_tool_response_summary( + packet, db_session, selected_db_search_docs + ) + yield qa_docs_response + elif packet.id == SECTION_RELEVANCE_LIST_ID: + yield LLMRelevanceFilterResponse( + relevant_chunk_indices=packet.response + ) + elif packet.id == IMAGE_GENERATION_RESPONSE_ID: + img_generation_response = cast( + list[ImageGenerationResponse], packet.response + ) + + file_ids = save_files_from_urls( + [img.url for img in img_generation_response] + ) + ai_message_files = [ + FileDescriptor(id=str(file_id), type=ChatFileType.IMAGE) + for file_id in file_ids + ] + yield ImageGenerationDisplay( + file_ids=[str(file_id) for file_id in file_ids] + ) + + else: + yield cast(ChatPacket, packet) - yield get_json_line(packet.dict()) except Exception as e: - logger.exception(e) + logger.exception("Failed to process chat message") - # Frontend will erase whatever answer and show this instead - # This will be the issue 99% of the time - error_packet = StreamingError( - error="LLM failed to respond, have you set your API key?" - ) + # Don't leak the API key + error_msg = str(e) + if llm.config.api_key and llm.config.api_key.lower() in error_msg.lower(): + error_msg = ( + f"LLM failed to respond. Invalid API " + f"key error from '{llm.config.model_provider}'." + ) - yield get_json_line(error_packet.dict()) + yield StreamingError(error=error_msg) + # Cancel the transaction so that no messages are saved + db_session.rollback() return # Post-LLM answer processing @@ -548,27 +514,48 @@ def stream_chat_message( db_citations = None if reference_db_search_docs: db_citations = translate_citations( - citations_list=citations, + citations_list=answer.citations, db_docs=reference_db_search_docs, ) # Saving Gen AI answer and responding with message info gen_ai_response_message = partial_response( - message=llm_output, - token_count=len(llm_tokenizer_encode_func(llm_output)), + message=answer.llm_answer, + rephrased_query=( + qa_docs_response.rephrased_query if qa_docs_response else None + ), + reference_docs=reference_db_search_docs, + files=ai_message_files, + token_count=len(llm_tokenizer_encode_func(answer.llm_answer)), citations=db_citations, - error=error, + error=None, ) + db_session.commit() # actually save user / assistant message msg_detail_response = translate_db_message_to_chat_message_detail( gen_ai_response_message ) - yield get_json_line(msg_detail_response.dict()) + yield msg_detail_response except Exception as e: logger.exception(e) # Frontend will erase whatever answer and show this instead - error_packet = StreamingError(error="Failed to parse LLM output") + yield StreamingError(error="Failed to parse LLM output") - yield get_json_line(error_packet.dict()) + +@log_generator_function_time() +def stream_chat_message( + new_msg_req: CreateChatMessageRequest, + user: User | None, + use_existing_user_message: bool = False, +) -> Iterator[str]: + with get_session_context_manager() as db_session: + objects = stream_chat_message_objects( + new_msg_req=new_msg_req, + user=user, + db_session=db_session, + use_existing_user_message=use_existing_user_message, + ) + for obj in objects: + yield get_json_line(obj.dict()) diff --git a/backend/danswer/chat/prompts.yaml b/backend/danswer/chat/prompts.yaml index 4ab9d7918..474327f04 100644 --- a/backend/danswer/chat/prompts.yaml +++ b/backend/danswer/chat/prompts.yaml @@ -8,6 +8,7 @@ prompts: # System Prompt (as shown in UI) system: > You are a question answering system that is constantly learning and improving. + The current date is DANSWER_DATETIME_REPLACEMENT. You can process and comprehend vast amounts of text and utilize this knowledge to provide grounded, accurate, and concise answers to diverse queries. @@ -21,8 +22,9 @@ prompts: I have not read or seen any of the documents and do not want to read them. - If there are no relevant documents, refer to the chat history and existing knowledge. + If there are no relevant documents, refer to the chat history and your internal knowledge. # Inject a statement at the end of system prompt to inform the LLM of the current date/time + # If the DANSWER_DATETIME_REPLACEMENT is set, the date/time is inserted there instead # Format looks like: "October 16, 2023 14:30" datetime_aware: true # Prompts the LLM to include citations in the for [1], [2] etc. @@ -30,15 +32,33 @@ prompts: include_citations: true + - name: "OnlyLLM" + description: "Chat directly with the LLM!" + system: > + You are a helpful AI assistant. The current date is DANSWER_DATETIME_REPLACEMENT + + + You give concise responses to very simple questions, but provide more thorough responses to + more complex and open-ended questions. + + + You are happy to help with writing, analysis, question answering, math, coding and all sorts + of other tasks. You use markdown where reasonable and also for coding. + task: "" + datetime_aware: true + include_citations: true + + - name: "Summarize" description: "Summarize relevant information from retrieved context!" system: > You are a text summarizing assistant that highlights the most important knowledge from the context provided, prioritizing the information that relates to the user query. + The current date is DANSWER_DATETIME_REPLACEMENT. You ARE NOT creative and always stick to the provided documents. If there are no documents, refer to the conversation history. - + IMPORTANT: YOU ONLY SUMMARIZE THE IMPORTANT INFORMATION FROM THE PROVIDED DOCUMENTS, NEVER USE YOUR OWN KNOWLEDGE. task: > @@ -53,7 +73,8 @@ prompts: description: "Recites information from retrieved context! Least creative but most safe!" system: > Quote and cite relevant information from provided context based on the user query. - + The current date is DANSWER_DATETIME_REPLACEMENT. + You only provide quotes that are EXACT substrings from provided documents! If there are no documents provided, diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index d0159c432..577ef2f0a 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -1,9 +1,10 @@ +import json import os +import urllib.parse from danswer.configs.constants import AuthType from danswer.configs.constants import DocumentIndexType - ##### # App Configs ##### @@ -19,7 +20,9 @@ APP_API_PREFIX = os.environ.get("API_PREFIX", "") # User Facing Features Configs ##### BLURB_SIZE = 128 # Number Encoder Tokens included in the chunk blurb -GENERATIVE_MODEL_ACCESS_CHECK_FREQ = 86400 # 1 day +GENERATIVE_MODEL_ACCESS_CHECK_FREQ = int( + os.environ.get("GENERATIVE_MODEL_ACCESS_CHECK_FREQ") or 86400 +) # 1 day DISABLE_GENERATIVE_AI = os.environ.get("DISABLE_GENERATIVE_AI", "").lower() == "true" @@ -39,12 +42,16 @@ WEB_DOMAIN = os.environ.get("WEB_DOMAIN") or "http://localhost:3000" AUTH_TYPE = AuthType((os.environ.get("AUTH_TYPE") or AuthType.DISABLED.value).lower()) DISABLE_AUTH = AUTH_TYPE == AuthType.DISABLED +# Encryption key secret is used to encrypt connector credentials, api keys, and other sensitive +# information. This provides an extra layer of security on top of Postgres access controls +# and is available in Danswer EE +ENCRYPTION_KEY_SECRET = os.environ.get("ENCRYPTION_KEY_SECRET") + # Turn off mask if admin users should see full credentials for data connectors. MASK_CREDENTIAL_PREFIX = ( os.environ.get("MASK_CREDENTIAL_PREFIX", "True").lower() != "false" ) -SECRET = os.environ.get("SECRET", "") SESSION_EXPIRE_TIME_SECONDS = int( os.environ.get("SESSION_EXPIRE_TIME_SECONDS") or 86400 * 7 ) # 7 days @@ -73,6 +80,7 @@ OAUTH_CLIENT_SECRET = ( or "" ) +USER_AUTH_SECRET = os.environ.get("USER_AUTH_SECRET", "") # for basic auth REQUIRE_EMAIL_VERIFICATION = ( os.environ.get("REQUIRE_EMAIL_VERIFICATION", "").lower() == "true" @@ -93,6 +101,9 @@ DOCUMENT_INDEX_TYPE = os.environ.get( "DOCUMENT_INDEX_TYPE", DocumentIndexType.COMBINED.value ) VESPA_HOST = os.environ.get("VESPA_HOST") or "localhost" +# NOTE: this is used if and only if the vespa config server is accessible via a +# different host than the main vespa application +VESPA_CONFIG_SERVER_HOST = os.environ.get("VESPA_CONFIG_SERVER_HOST") or VESPA_HOST VESPA_PORT = os.environ.get("VESPA_PORT") or "8081" VESPA_TENANT_PORT = os.environ.get("VESPA_TENANT_PORT") or "19071" # The default below is for dockerized deployment @@ -108,7 +119,10 @@ except ValueError: # Below are intended to match the env variables names used by the official postgres docker image # https://hub.docker.com/_/postgres POSTGRES_USER = os.environ.get("POSTGRES_USER") or "postgres" -POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD") or "password" +# URL-encode the password for asyncpg to avoid issues with special characters on some machines. +POSTGRES_PASSWORD = urllib.parse.quote_plus( + os.environ.get("POSTGRES_PASSWORD") or "password" +) POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "localhost" POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432" POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres" @@ -119,6 +133,12 @@ POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres" ##### POLL_CONNECTOR_OFFSET = 30 # Minutes overlap between poll windows +# View the list here: +# https://github.com/danswer-ai/danswer/blob/main/backend/danswer/connectors/factory.py +# If this is empty, all connectors are enabled, this is an option for security heavy orgs where +# only very select connectors are enabled and admins cannot add other connector types +ENABLED_CONNECTOR_TYPES = os.environ.get("ENABLED_CONNECTOR_TYPES") or "" + # Some calls to get information on expert users are quite costly especially with rate limiting # Since experts are not used in the actual user experience, currently it is turned off # for some connectors @@ -128,10 +148,6 @@ GOOGLE_DRIVE_INCLUDE_SHARED = False GOOGLE_DRIVE_FOLLOW_SHORTCUTS = False GOOGLE_DRIVE_ONLY_ORG_PUBLIC = False -FILE_CONNECTOR_TMP_STORAGE_PATH = os.environ.get( - "FILE_CONNECTOR_TMP_STORAGE_PATH", "/home/file_connector_storage" -) - # TODO these should be available for frontend configuration, via advanced options expandable WEB_CONNECTOR_IGNORED_CLASSES = os.environ.get( "WEB_CONNECTOR_IGNORED_CLASSES", "sidebar,footer" @@ -142,6 +158,7 @@ WEB_CONNECTOR_IGNORED_ELEMENTS = os.environ.get( WEB_CONNECTOR_OAUTH_CLIENT_ID = os.environ.get("WEB_CONNECTOR_OAUTH_CLIENT_ID") WEB_CONNECTOR_OAUTH_CLIENT_SECRET = os.environ.get("WEB_CONNECTOR_OAUTH_CLIENT_SECRET") WEB_CONNECTOR_OAUTH_TOKEN_URL = os.environ.get("WEB_CONNECTOR_OAUTH_TOKEN_URL") +WEB_CONNECTOR_VALIDATE_URLS = os.environ.get("WEB_CONNECTOR_VALIDATE_URLS") NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP = ( os.environ.get("NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP", "").lower() @@ -156,6 +173,17 @@ CONFLUENCE_CONNECTOR_LABELS_TO_SKIP = [ if ignored_tag ] +# Avoid to get archived pages +CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES = ( + os.environ.get("CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES", "").lower() == "true" +) + +JIRA_CONNECTOR_LABELS_TO_SKIP = [ + ignored_tag + for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",") + if ignored_tag +] + GONG_CONNECTOR_START_TIME = os.environ.get("GONG_CONNECTOR_START_TIME") GITHUB_CONNECTOR_BASE_URL = os.environ.get("GITHUB_CONNECTOR_BASE_URL") or None @@ -177,6 +205,11 @@ EXPERIMENTAL_CHECKPOINTING_ENABLED = ( CONTINUE_ON_CONNECTOR_FAILURE = os.environ.get( "CONTINUE_ON_CONNECTOR_FAILURE", "" ).lower() not in ["false", ""] +# When swapping to a new embedding model, a secondary index is created in the background, to conserve +# resources, we pause updates on the primary index by default while the secondary index is created +DISABLE_INDEX_UPDATE_ON_SWAP = ( + os.environ.get("DISABLE_INDEX_UPDATE_ON_SWAP", "").lower() == "true" +) # Controls how many worker processes we spin up to index documents in the # background. This is useful for speeding up indexing, but does require a # fairly large amount of memory in order to increase substantially, since @@ -191,32 +224,18 @@ ENABLE_MINI_CHUNK = os.environ.get("ENABLE_MINI_CHUNK", "").lower() == "true" MINI_CHUNK_SIZE = 150 # Timeout to wait for job's last update before killing it, in hours CLEANUP_INDEXING_JOBS_TIMEOUT = int(os.environ.get("CLEANUP_INDEXING_JOBS_TIMEOUT", 3)) - - -##### -# Model Server Configs -##### -# If MODEL_SERVER_HOST is set, the NLP models required for Danswer are offloaded to the server via -# requests. Be sure to include the scheme in the MODEL_SERVER_HOST value. -MODEL_SERVER_HOST = os.environ.get("MODEL_SERVER_HOST") or None -MODEL_SERVER_ALLOWED_HOST = os.environ.get("MODEL_SERVER_HOST") or "0.0.0.0" -MODEL_SERVER_PORT = int(os.environ.get("MODEL_SERVER_PORT") or "9000") - -# specify this env variable directly to have a different model server for the background -# indexing job vs the api server so that background indexing does not effect query-time -# performance -INDEXING_MODEL_SERVER_HOST = ( - os.environ.get("INDEXING_MODEL_SERVER_HOST") or MODEL_SERVER_HOST +# If set to true, then will not clean up documents that "no longer exist" when running Load connectors +DISABLE_DOCUMENT_CLEANUP = ( + os.environ.get("DISABLE_DOCUMENT_CLEANUP", "").lower() == "true" ) ##### # Miscellaneous ##### -DYNAMIC_CONFIG_STORE = os.environ.get( - "DYNAMIC_CONFIG_STORE", "FileSystemBackedDynamicConfigStore" -) -DYNAMIC_CONFIG_DIR_PATH = os.environ.get("DYNAMIC_CONFIG_DIR_PATH", "/home/storage") +# File based Key Value store no longer used +DYNAMIC_CONFIG_STORE = "PostgresBackedDynamicConfigStore" + JOB_TIMEOUT = 60 * 60 * 6 # 6 hours default # used to allow the background indexing jobs to use a different embedding # model server than the API server @@ -232,7 +251,16 @@ LOG_ALL_MODEL_INTERACTIONS = ( LOG_VESPA_TIMING_INFORMATION = ( os.environ.get("LOG_VESPA_TIMING_INFORMATION", "").lower() == "true" ) +LOG_ENDPOINT_LATENCY = os.environ.get("LOG_ENDPOINT_LATENCY", "").lower() == "true" # Anonymous usage telemetry DISABLE_TELEMETRY = os.environ.get("DISABLE_TELEMETRY", "").lower() == "true" -# notset, debug, info, warning, error, or critical -LOG_LEVEL = os.environ.get("LOG_LEVEL", "info") + +TOKEN_BUDGET_GLOBALLY_ENABLED = ( + os.environ.get("TOKEN_BUDGET_GLOBALLY_ENABLED", "").lower() == "true" +) + +# Defined custom query/answer conditions to validate the query and the LLM answer. +# Format: list of strings +CUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads( + os.environ.get("CUSTOM_ANSWER_VALIDITY_CONDITIONS", "[]") +) diff --git a/backend/danswer/configs/chat_configs.py b/backend/danswer/configs/chat_configs.py index 6fc2b6fb9..42553bfea 100644 --- a/backend/danswer/configs/chat_configs.py +++ b/backend/danswer/configs/chat_configs.py @@ -37,6 +37,9 @@ DISABLE_LLM_CHUNK_FILTER = ( DISABLE_LLM_CHOOSE_SEARCH = ( os.environ.get("DISABLE_LLM_CHOOSE_SEARCH", "").lower() == "true" ) +DISABLE_LLM_QUERY_REPHRASE = ( + os.environ.get("DISABLE_LLM_QUERY_REPHRASE", "").lower() == "true" +) # 1 edit per 20 characters, currently unused due to fuzzy match being too slow QUOTE_ALLOWED_ERROR_PERCENT = 0.05 QA_TIMEOUT = int(os.environ.get("QA_TIMEOUT") or "60") # 60 seconds diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 76fc419c0..641738a4c 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -24,6 +24,7 @@ MATCH_HIGHLIGHTS = "match_highlights" # not be used for QA. For example, Google Drive file types which can't be parsed # are still useful as a search result but not for QA. IGNORE_FOR_QA = "ignore_for_qa" +# NOTE: deprecated, only used for porting key from old system GEN_AI_API_KEY_STORAGE_KEY = "genai_api_key" PUBLIC_DOC_PAT = "PUBLIC" PUBLIC_DOCUMENT_SET = "__PUBLIC" @@ -40,6 +41,10 @@ DEFAULT_BOOST = 0 SESSION_KEY = "session" QUERY_EVENT_ID = "query_event_id" LLM_CHUNKS = "llm_chunks" +TOKEN_BUDGET = "token_budget" +TOKEN_BUDGET_TIME_PERIOD = "token_budget_time_period" +ENABLE_TOKEN_BUDGET = "enable_token_budget" +TOKEN_BUDGET_SETTINGS = "token_budget_settings" # For chunking/processing chunks TITLE_SEPARATOR = "\n\r\n" @@ -56,6 +61,12 @@ DISABLED_GEN_AI_MSG = ( ) +# API Keys +DANSWER_API_KEY_PREFIX = "API_KEY__" +DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN = "danswerapikey.ai" +UNNAMED_KEY_PLACEHOLDER = "Unnamed" + + class DocumentSource(str, Enum): # Special case, document passed in via Danswer APIs without specifying a source type INGESTION_API = "ingestion_api" @@ -84,6 +95,10 @@ class DocumentSource(str, Enum): LOOPIO = "loopio" SHAREPOINT = "sharepoint" TEAMS = "teams" + DISCOURSE = "discourse" + AXERO = "axero" + MEDIAWIKI = "mediawiki" + WIKIPEDIA = "wikipedia" class DocumentIndexType(str, Enum): @@ -112,3 +127,16 @@ class MessageType(str, Enum): SYSTEM = "system" # SystemMessage USER = "user" # HumanMessage ASSISTANT = "assistant" # AIMessage + + +class TokenRateLimitScope(str, Enum): + USER = "user" + USER_GROUP = "user_group" + GLOBAL = "global" + + +class FileOrigin(str, Enum): + CHAT_UPLOAD = "chat_upload" + CHAT_IMAGE_GEN = "chat_image_gen" + CONNECTOR = "connector" + OTHER = "other" diff --git a/backend/danswer/configs/danswerbot_configs.py b/backend/danswer/configs/danswerbot_configs.py index 484ba144b..1dc01bca1 100644 --- a/backend/danswer/configs/danswerbot_configs.py +++ b/backend/danswer/configs/danswerbot_configs.py @@ -21,6 +21,14 @@ DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER = os.environ.get( DANSWER_REACT_EMOJI = os.environ.get("DANSWER_REACT_EMOJI") or "eyes" # When User needs more help, what should the emoji be DANSWER_FOLLOWUP_EMOJI = os.environ.get("DANSWER_FOLLOWUP_EMOJI") or "sos" +# What kind of message should be shown when someone gives an AI answer feedback to DanswerBot +# Defaults to Private if not provided or invalid +# Private: Only visible to user clicking the feedback +# Anonymous: Public but anonymous +# Public: Visible with the user name who submitted the feedback +DANSWER_BOT_FEEDBACK_VISIBILITY = ( + os.environ.get("DANSWER_BOT_FEEDBACK_VISIBILITY") or "private" +) # Should DanswerBot send an apology message if it's not able to find an answer # That way the user isn't confused as to why DanswerBot reacted but then said nothing # Off by default to be less intrusive (don't want to give a notif that just says we couldnt help) @@ -52,8 +60,16 @@ ENABLE_DANSWERBOT_REFLEXION = ( ) # Currently not support chain of thought, probably will add back later DANSWER_BOT_DISABLE_COT = True +# if set, will default DanswerBot to use quotes and reference documents +DANSWER_BOT_USE_QUOTES = os.environ.get("DANSWER_BOT_USE_QUOTES", "").lower() == "true" # Maximum Questions Per Minute, Default Uncapped DANSWER_BOT_MAX_QPM = int(os.environ.get("DANSWER_BOT_MAX_QPM") or 0) or None # Maximum time to wait when a question is queued DANSWER_BOT_MAX_WAIT_TIME = int(os.environ.get("DANSWER_BOT_MAX_WAIT_TIME") or 180) + +# Time (in minutes) after which a Slack message is sent to the user to remind him to give feedback. +# Set to 0 to disable it (default) +DANSWER_BOT_FEEDBACK_REMINDER = int( + os.environ.get("DANSWER_BOT_FEEDBACK_REMINDER") or 0 +) diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py index 62563b111..151b41811 100644 --- a/backend/danswer/configs/model_configs.py +++ b/backend/danswer/configs/model_configs.py @@ -1,68 +1,50 @@ +import json import os ##### # Embedding/Reranking Model Configs ##### -CHUNK_SIZE = 512 # Important considerations when choosing models # Max tokens count needs to be high considering use case (at least 512) # Models used must be MIT or Apache license # Inference/Indexing speed # https://huggingface.co/DOCUMENT_ENCODER_MODEL # The useable models configured as below must be SentenceTransformer compatible +# NOTE: DO NOT CHANGE SET THESE UNLESS YOU KNOW WHAT YOU ARE DOING +# IDEALLY, YOU SHOULD CHANGE EMBEDDING MODELS VIA THE UI +DEFAULT_DOCUMENT_ENCODER_MODEL = "intfloat/e5-base-v2" DOCUMENT_ENCODER_MODEL = ( - # This is not a good model anymore, but this default needs to be kept for not breaking existing - # deployments, will eventually be retired/swapped for a different default model - os.environ.get("DOCUMENT_ENCODER_MODEL") - or "thenlper/gte-small" + os.environ.get("DOCUMENT_ENCODER_MODEL") or DEFAULT_DOCUMENT_ENCODER_MODEL ) # If the below is changed, Vespa deployment must also be changed -DOC_EMBEDDING_DIM = int(os.environ.get("DOC_EMBEDDING_DIM") or 384) +DOC_EMBEDDING_DIM = int(os.environ.get("DOC_EMBEDDING_DIM") or 768) # Model should be chosen with 512 context size, ideally don't change this DOC_EMBEDDING_CONTEXT_SIZE = 512 NORMALIZE_EMBEDDINGS = ( - os.environ.get("NORMALIZE_EMBEDDINGS") or "False" + os.environ.get("NORMALIZE_EMBEDDINGS") or "true" ).lower() == "true" + +# Old default model settings, which are needed for an automatic easy upgrade +OLD_DEFAULT_DOCUMENT_ENCODER_MODEL = "thenlper/gte-small" +OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM = 384 +OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS = False + # These are only used if reranking is turned off, to normalize the direct retrieval scores for display # Currently unused SIM_SCORE_RANGE_LOW = float(os.environ.get("SIM_SCORE_RANGE_LOW") or 0.0) SIM_SCORE_RANGE_HIGH = float(os.environ.get("SIM_SCORE_RANGE_HIGH") or 1.0) # Certain models like e5, BGE, etc use a prefix for asymmetric retrievals (query generally shorter than docs) -ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "") -ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "") +ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "query: ") +ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "passage: ") # Purely an optimization, memory limitation consideration BATCH_SIZE_ENCODE_CHUNKS = 8 -# This controls the minimum number of pytorch "threads" to allocate to the embedding -# model. If torch finds more threads on its own, this value is not used. -MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1) - - -# Cross Encoder Settings -ENABLE_RERANKING_ASYNC_FLOW = ( - os.environ.get("ENABLE_RERANKING_ASYNC_FLOW", "").lower() == "true" -) -ENABLE_RERANKING_REAL_TIME_FLOW = ( - os.environ.get("ENABLE_RERANKING_REAL_TIME_FLOW", "").lower() == "true" -) -# https://www.sbert.net/docs/pretrained-models/ce-msmarco.html -CROSS_ENCODER_MODEL_ENSEMBLE = [ - "cross-encoder/ms-marco-MiniLM-L-4-v2", - "cross-encoder/ms-marco-TinyBERT-L-2-v2", -] -# For score normalizing purposes, only way is to know the expected ranges +# For score display purposes, only way is to know the expected ranges CROSS_ENCODER_RANGE_MAX = 12 CROSS_ENCODER_RANGE_MIN = -12 -CROSS_EMBED_CONTEXT_SIZE = 512 # Unused currently, can't be used with the current default encoder model due to its output range SEARCH_DISTANCE_CUTOFF = 0 -# Intent model max context size -QUERY_MAX_CONTEXT_SIZE = 256 - -# Danswer custom Deep Learning Models -INTENT_MODEL_VERSION = "danswer/intent-model" - ##### # Generative AI Model Configs @@ -78,12 +60,11 @@ INTENT_MODEL_VERSION = "danswer/intent-model" # Set GEN_AI_MODEL_PROVIDER to "gpt4all" to use gpt4all models running locally GEN_AI_MODEL_PROVIDER = os.environ.get("GEN_AI_MODEL_PROVIDER") or "openai" # If using Azure, it's the engine name, for example: Danswer -GEN_AI_MODEL_VERSION = os.environ.get("GEN_AI_MODEL_VERSION") or "gpt-3.5-turbo-0125" +GEN_AI_MODEL_VERSION = os.environ.get("GEN_AI_MODEL_VERSION") + # For secondary flows like extracting filters or deciding if a chunk is useful, we don't need # as powerful of a model as say GPT-4 so we can use an alternative that is faster and cheaper -FAST_GEN_AI_MODEL_VERSION = ( - os.environ.get("FAST_GEN_AI_MODEL_VERSION") or GEN_AI_MODEL_VERSION -) +FAST_GEN_AI_MODEL_VERSION = os.environ.get("FAST_GEN_AI_MODEL_VERSION") # If the Generative AI model requires an API key for access, otherwise can leave blank GEN_AI_API_KEY = ( @@ -111,3 +92,24 @@ GEN_AI_HISTORY_CUTOFF = 3000 # error if the total # of tokens exceeds the max input tokens. GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS = 512 GEN_AI_TEMPERATURE = float(os.environ.get("GEN_AI_TEMPERATURE") or 0) + +# should be used if you are using a custom LLM inference provider that doesn't support +# streaming format AND you are still using the langchain/litellm LLM class +DISABLE_LITELLM_STREAMING = ( + os.environ.get("DISABLE_LITELLM_STREAMING") or "false" +).lower() == "true" + +# extra headers to pass to LiteLLM +LITELLM_EXTRA_HEADERS = None +_LITELLM_EXTRA_HEADERS_RAW = os.environ.get("LITELLM_EXTRA_HEADERS") +if _LITELLM_EXTRA_HEADERS_RAW: + try: + LITELLM_EXTRA_HEADERS = json.loads(_LITELLM_EXTRA_HEADERS_RAW) + except Exception: + # need to import here to avoid circular imports + from danswer.utils.logger import setup_logger + + logger = setup_logger() + logger.error( + "Failed to parse LITELLM_EXTRA_HEADERS, must be a valid JSON object" + ) diff --git a/backend/shared_models/__init__.py b/backend/danswer/connectors/axero/__init__.py similarity index 100% rename from backend/shared_models/__init__.py rename to backend/danswer/connectors/axero/__init__.py diff --git a/backend/danswer/connectors/axero/connector.py b/backend/danswer/connectors/axero/connector.py new file mode 100644 index 000000000..a4d5162b6 --- /dev/null +++ b/backend/danswer/connectors/axero/connector.py @@ -0,0 +1,363 @@ +import time +from datetime import datetime +from datetime import timezone +from typing import Any + +import requests +from pydantic import BaseModel + +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import DocumentSource +from danswer.connectors.cross_connector_utils.miscellaneous_utils import ( + process_in_batches, +) +from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc +from danswer.connectors.cross_connector_utils.rate_limit_wrapper import ( + rate_limit_builder, +) +from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import PollConnector +from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.models import ConnectorMissingCredentialError +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.file_processing.html_utils import parse_html_page_basic +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + + +ENTITY_NAME_MAP = {1: "Forum", 3: "Article", 4: "Blog", 9: "Wiki"} + + +def _get_auth_header(api_key: str) -> dict[str, str]: + return {"Rest-Api-Key": api_key} + + +@retry_builder() +@rate_limit_builder(max_calls=5, period=1) +def _rate_limited_request( + endpoint: str, headers: dict, params: dict | None = None +) -> Any: + # https://my.axerosolutions.com/spaces/5/communifire-documentation/wiki/view/370/rest-api + return requests.get(endpoint, headers=headers, params=params) + + +# https://my.axerosolutions.com/spaces/5/communifire-documentation/wiki/view/595/rest-api-get-content-list +def _get_entities( + entity_type: int, + api_key: str, + axero_base_url: str, + start: datetime, + end: datetime, + space_id: str | None = None, +) -> list[dict]: + endpoint = axero_base_url + "api/content/list" + page_num = 1 + pages_fetched = 0 + pages_to_return = [] + break_out = False + while True: + params = { + "EntityType": str(entity_type), + "SortColumn": "DateUpdated", + "SortOrder": "1", # descending + "StartPage": str(page_num), + } + + if space_id is not None: + params["SpaceID"] = space_id + + res = _rate_limited_request( + endpoint, headers=_get_auth_header(api_key), params=params + ) + res.raise_for_status() + + # Axero limitations: + # No next page token, can paginate but things may have changed + # for example, a doc that hasn't been read in by Danswer is updated and is now front of the list + # due to this limitation and the fact that Axero has no rate limiting but API calls can cause + # increased latency for the team, we have to just fetch all the pages quickly to reduce the + # chance of missing a document due to an update (it will still get updated next pass) + # Assumes the volume of data isn't too big to store in memory (probably fine) + data = res.json() + total_records = data["TotalRecords"] + contents = data["ResponseData"] + pages_fetched += len(contents) + logger.debug(f"Fetched {pages_fetched} {ENTITY_NAME_MAP[entity_type]}") + + for page in contents: + update_time = time_str_to_utc(page["DateUpdated"]) + + if update_time > end: + continue + + if update_time < start: + break_out = True + break + + pages_to_return.append(page) + + if pages_fetched >= total_records: + break + + page_num += 1 + + if break_out: + break + + return pages_to_return + + +def _get_obj_by_id(obj_id: int, api_key: str, axero_base_url: str) -> dict: + endpoint = axero_base_url + f"api/content/{obj_id}" + res = _rate_limited_request(endpoint, headers=_get_auth_header(api_key)) + res.raise_for_status() + + return res.json() + + +class AxeroForum(BaseModel): + doc_id: str + title: str + link: str + initial_content: str + responses: list[str] + last_update: datetime + + +def _map_post_to_parent( + posts: dict, + api_key: str, + axero_base_url: str, +) -> list[AxeroForum]: + """Cannot handle in batches since the posts aren't ordered or structured in any way + may need to map any number of them to the initial post""" + epoch_str = "1970-01-01T00:00:00.000" + post_map: dict[int, AxeroForum] = {} + + for ind, post in enumerate(posts): + if (ind + 1) % 25 == 0: + logger.debug(f"Processed {ind + 1} posts or responses") + + post_time = time_str_to_utc( + post.get("DateUpdated") or post.get("DateCreated") or epoch_str + ) + p_id = post.get("ParentContentID") + if p_id in post_map: + axero_forum = post_map[p_id] + axero_forum.responses.insert(0, post.get("ContentSummary")) + axero_forum.last_update = max(axero_forum.last_update, post_time) + else: + initial_post_d = _get_obj_by_id(p_id, api_key, axero_base_url)[ + "ResponseData" + ] + initial_post_time = time_str_to_utc( + initial_post_d.get("DateUpdated") + or initial_post_d.get("DateCreated") + or epoch_str + ) + post_map[p_id] = AxeroForum( + doc_id="AXERO_" + str(initial_post_d.get("ContentID")), + title=initial_post_d.get("ContentTitle"), + link=initial_post_d.get("ContentURL"), + initial_content=initial_post_d.get("ContentSummary"), + responses=[post.get("ContentSummary")], + last_update=max(post_time, initial_post_time), + ) + + return list(post_map.values()) + + +def _get_forums( + api_key: str, + axero_base_url: str, + space_id: str | None = None, +) -> list[dict]: + endpoint = axero_base_url + "api/content/list" + page_num = 1 + pages_fetched = 0 + pages_to_return = [] + break_out = False + + while True: + params = { + "EntityType": "54", + "SortColumn": "DateUpdated", + "SortOrder": "1", # descending + "StartPage": str(page_num), + } + + if space_id is not None: + params["SpaceID"] = space_id + + res = _rate_limited_request( + endpoint, headers=_get_auth_header(api_key), params=params + ) + res.raise_for_status() + + data = res.json() + total_records = data["TotalRecords"] + contents = data["ResponseData"] + pages_fetched += len(contents) + logger.debug(f"Fetched {pages_fetched} forums") + + for page in contents: + pages_to_return.append(page) + + if pages_fetched >= total_records: + break + + page_num += 1 + + if break_out: + break + + return pages_to_return + + +def _translate_forum_to_doc(af: AxeroForum) -> Document: + doc = Document( + id=af.doc_id, + sections=[Section(link=af.link, text=reply) for reply in af.responses], + source=DocumentSource.AXERO, + semantic_identifier=af.title, + doc_updated_at=af.last_update, + metadata={}, + ) + + return doc + + +def _translate_content_to_doc(content: dict) -> Document: + page_text = "" + summary = content.get("ContentSummary") + body = content.get("ContentBody") + if summary: + page_text += f"{summary}\n" + + if body: + content_parsed = parse_html_page_basic(body) + page_text += content_parsed + + doc = Document( + id="AXERO_" + str(content["ContentID"]), + sections=[Section(link=content["ContentURL"], text=page_text)], + source=DocumentSource.AXERO, + semantic_identifier=content["ContentTitle"], + doc_updated_at=time_str_to_utc(content["DateUpdated"]), + metadata={"space": content["SpaceName"]}, + ) + + return doc + + +class AxeroConnector(PollConnector): + def __init__( + self, + # Strings of the integer ids of the spaces + spaces: list[str] | None = None, + include_article: bool = True, + include_blog: bool = True, + include_wiki: bool = True, + include_forum: bool = True, + batch_size: int = INDEX_BATCH_SIZE, + ) -> None: + self.include_article = include_article + self.include_blog = include_blog + self.include_wiki = include_wiki + self.include_forum = include_forum + self.batch_size = batch_size + self.space_ids = spaces + self.axero_key = None + self.base_url = None + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + self.axero_key = credentials["axero_api_token"] + # As the API key specifically applies to a particular deployment, this is + # included as part of the credential + base_url = credentials["base_url"] + if not base_url.endswith("/"): + base_url += "/" + self.base_url = base_url + return None + + def poll_source( + self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch + ) -> GenerateDocumentsOutput: + if not self.axero_key or not self.base_url: + raise ConnectorMissingCredentialError("Axero") + + start_datetime = datetime.utcfromtimestamp(start).replace(tzinfo=timezone.utc) + end_datetime = datetime.utcfromtimestamp(end).replace(tzinfo=timezone.utc) + + entity_types = [] + if self.include_article: + entity_types.append(3) + if self.include_blog: + entity_types.append(4) + if self.include_wiki: + entity_types.append(9) + + iterable_space_ids = self.space_ids if self.space_ids else [None] + + for space_id in iterable_space_ids: + for entity in entity_types: + axero_obj = _get_entities( + entity_type=entity, + api_key=self.axero_key, + axero_base_url=self.base_url, + start=start_datetime, + end=end_datetime, + space_id=space_id, + ) + yield from process_in_batches( + objects=axero_obj, + process_function=_translate_content_to_doc, + batch_size=self.batch_size, + ) + + if self.include_forum: + forums_posts = _get_forums( + api_key=self.axero_key, + axero_base_url=self.base_url, + space_id=space_id, + ) + + all_axero_forums = _map_post_to_parent( + posts=forums_posts, + api_key=self.axero_key, + axero_base_url=self.base_url, + ) + + filtered_forums = [ + f + for f in all_axero_forums + if f.last_update >= start_datetime and f.last_update <= end_datetime + ] + + yield from process_in_batches( + objects=filtered_forums, + process_function=_translate_forum_to_doc, + batch_size=self.batch_size, + ) + + +if __name__ == "__main__": + import os + + connector = AxeroConnector() + connector.load_credentials( + { + "axero_api_token": os.environ["AXERO_API_TOKEN"], + "base_url": os.environ["AXERO_BASE_URL"], + } + ) + current = time.time() + + one_year_ago = current - 24 * 60 * 60 * 360 + latest_docs = connector.poll_source(one_year_ago, current) + + print(next(latest_docs)) diff --git a/backend/danswer/connectors/bookstack/connector.py b/backend/danswer/connectors/bookstack/connector.py index 606866b42..f2e692d2c 100644 --- a/backend/danswer/connectors/bookstack/connector.py +++ b/backend/danswer/connectors/bookstack/connector.py @@ -7,7 +7,6 @@ from typing import Any from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.bookstack.client import BookStackApiClient -from danswer.connectors.cross_connector_utils.html_utils import parse_html_page_basic from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector @@ -16,6 +15,7 @@ from danswer.connectors.interfaces import SecondsSinceUnixEpoch from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.file_processing.html_utils import parse_html_page_basic class BookstackConnector(LoadConnector, PollConnector): diff --git a/backend/danswer/connectors/confluence/connector.py b/backend/danswer/connectors/confluence/connector.py index f2d091e4d..a20dd4779 100644 --- a/backend/danswer/connectors/confluence/connector.py +++ b/backend/danswer/connectors/confluence/connector.py @@ -11,11 +11,14 @@ import bs4 from atlassian import Confluence # type:ignore from requests import HTTPError +from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource -from danswer.connectors.cross_connector_utils.html_utils import format_document_soup +from danswer.connectors.confluence.rate_limit_handler import ( + make_confluence_call_handle_rate_limit, +) from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector @@ -24,6 +27,7 @@ from danswer.connectors.models import BasicExpertInfo from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.file_processing.html_utils import format_document_soup from danswer.utils.logger import setup_logger logger = setup_logger() @@ -72,7 +76,10 @@ def _extract_confluence_keys_from_datacenter_url(wiki_url: str) -> tuple[str, st def extract_confluence_keys_from_url(wiki_url: str) -> tuple[str, str, bool]: - is_confluence_cloud = ".atlassian.net/wiki/spaces/" in wiki_url + is_confluence_cloud = ( + ".atlassian.net/wiki/spaces/" in wiki_url + or ".jira.com/wiki/spaces/" in wiki_url + ) try: if is_confluence_cloud: @@ -100,10 +107,11 @@ def _get_user(user_id: str, confluence_client: Confluence) -> str: """ user_not_found = "Unknown User" + get_user_details_by_accountid = make_confluence_call_handle_rate_limit( + confluence_client.get_user_details_by_accountid + ) try: - return confluence_client.get_user_details_by_accountid(user_id).get( - "displayName", user_not_found - ) + return get_user_details_by_accountid(user_id).get("displayName", user_not_found) except Exception as e: logger.warning( f"Unable to get the User Display Name with the id: '{user_id}' - {e}" @@ -127,8 +135,13 @@ def parse_html_page(text: str, confluence_client: Confluence) -> str: user_id = ( user.attrs["ri:account-id"] if "ri:account-id" in user.attrs - else user.attrs["ri:userkey"] + else user.get("ri:userkey") ) + if not user_id: + logger.warning( + "ri:userkey not found in ri:user element. " f"Found attrs: {user.attrs}" + ) + continue # Include @ sign for tagging, more clear for LLM user.replaceWith("@" + _get_user(user_id, confluence_client)) return format_document_soup(soup) @@ -139,12 +152,16 @@ def _comment_dfs( comment_pages: Collection[dict[str, Any]], confluence_client: Confluence, ) -> str: + get_page_child_by_type = make_confluence_call_handle_rate_limit( + confluence_client.get_page_child_by_type + ) + for comment_page in comment_pages: comment_html = comment_page["body"]["storage"]["value"] comments_str += "\nComment:\n" + parse_html_page( comment_html, confluence_client ) - child_comment_pages = confluence_client.get_page_child_by_type( + child_comment_pages = get_page_child_by_type( comment_page["id"], type="comment", start=None, @@ -195,11 +212,17 @@ class ConfluenceConnector(LoadConnector, PollConnector): start_ind: int, ) -> Collection[dict[str, Any]]: def _fetch(start_ind: int, batch_size: int) -> Collection[dict[str, Any]]: + get_all_pages_from_space = make_confluence_call_handle_rate_limit( + confluence_client.get_all_pages_from_space + ) try: - return confluence_client.get_all_pages_from_space( + return get_all_pages_from_space( self.space, start=start_ind, limit=batch_size, + status="current" + if CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES + else None, expand="body.storage.value,version", ) except Exception: @@ -214,10 +237,13 @@ class ConfluenceConnector(LoadConnector, PollConnector): # Could be that one of the pages here failed due to this bug: # https://jira.atlassian.com/browse/CONFCLOUD-76433 view_pages.extend( - confluence_client.get_all_pages_from_space( + get_all_pages_from_space( self.space, start=start_ind + i, limit=1, + status="current" + if CONFLUENCE_CONNECTOR_INDEX_ONLY_ACTIVE_PAGES + else None, expand="body.storage.value,version", ) ) @@ -228,7 +254,7 @@ class ConfluenceConnector(LoadConnector, PollConnector): ) # Use view instead, which captures most info but is less complete view_pages.extend( - confluence_client.get_all_pages_from_space( + get_all_pages_from_space( self.space, start=start_ind + i, limit=1, @@ -257,10 +283,13 @@ class ConfluenceConnector(LoadConnector, PollConnector): return pages def _fetch_comments(self, confluence_client: Confluence, page_id: str) -> str: + get_page_child_by_type = make_confluence_call_handle_rate_limit( + confluence_client.get_page_child_by_type + ) try: comment_pages = cast( Collection[dict[str, Any]], - confluence_client.get_page_child_by_type( + get_page_child_by_type( page_id, type="comment", start=None, @@ -279,8 +308,11 @@ class ConfluenceConnector(LoadConnector, PollConnector): return "" def _fetch_labels(self, confluence_client: Confluence, page_id: str) -> list[str]: + get_page_labels = make_confluence_call_handle_rate_limit( + confluence_client.get_page_labels + ) try: - labels_response = confluence_client.get_page_labels(page_id) + labels_response = get_page_labels(page_id) return [label["name"] for label in labels_response["results"]] except Exception as e: if not self.continue_on_failure: diff --git a/backend/danswer/connectors/confluence/rate_limit_handler.py b/backend/danswer/connectors/confluence/rate_limit_handler.py new file mode 100644 index 000000000..b9481d6bd --- /dev/null +++ b/backend/danswer/connectors/confluence/rate_limit_handler.py @@ -0,0 +1,40 @@ +from collections.abc import Callable +from typing import Any +from typing import cast +from typing import TypeVar + +from requests import HTTPError +from retry import retry + + +F = TypeVar("F", bound=Callable[..., Any]) + + +RATE_LIMIT_MESSAGE_LOWERCASE = "Rate limit exceeded".lower() + + +class ConfluenceRateLimitError(Exception): + pass + + +def make_confluence_call_handle_rate_limit(confluence_call: F) -> F: + @retry( + exceptions=ConfluenceRateLimitError, + tries=10, + delay=1, + max_delay=600, # 10 minutes + backoff=2, + jitter=1, + ) + def wrapped_call(*args: list[Any], **kwargs: Any) -> Any: + try: + return confluence_call(*args, **kwargs) + except HTTPError as e: + if ( + e.response.status_code == 429 + or RATE_LIMIT_MESSAGE_LOWERCASE in e.response.text.lower() + ): + raise ConfluenceRateLimitError() + raise + + return cast(F, wrapped_call) diff --git a/backend/danswer/connectors/cross_connector_utils/file_utils.py b/backend/danswer/connectors/cross_connector_utils/file_utils.py deleted file mode 100644 index b0a9c723f..000000000 --- a/backend/danswer/connectors/cross_connector_utils/file_utils.py +++ /dev/null @@ -1,158 +0,0 @@ -import json -import os -import re -import zipfile -from collections.abc import Generator -from pathlib import Path -from typing import Any -from typing import IO - -import chardet -from pypdf import PdfReader -from pypdf.errors import PdfStreamError - -from danswer.utils.logger import setup_logger - - -logger = setup_logger() - - -def extract_metadata(line: str) -> dict | None: - html_comment_pattern = r"" - hashtag_pattern = r"#DANSWER_METADATA=\{(.*?)\}" - - html_comment_match = re.search(html_comment_pattern, line) - hashtag_match = re.search(hashtag_pattern, line) - - if html_comment_match: - json_str = html_comment_match.group(1) - elif hashtag_match: - json_str = hashtag_match.group(1) - else: - return None - - try: - return json.loads("{" + json_str + "}") - except json.JSONDecodeError: - return None - - -def read_pdf_file(file: IO[Any], file_name: str, pdf_pass: str | None = None) -> str: - try: - pdf_reader = PdfReader(file) - - # If marked as encrypted and a password is provided, try to decrypt - if pdf_reader.is_encrypted and pdf_pass is not None: - decrypt_success = False - if pdf_pass is not None: - try: - decrypt_success = pdf_reader.decrypt(pdf_pass) != 0 - except Exception: - logger.error(f"Unable to decrypt pdf {file_name}") - else: - logger.info(f"No Password available to to decrypt pdf {file_name}") - - if not decrypt_success: - # By user request, keep files that are unreadable just so they - # can be discoverable by title. - return "" - - return "\n".join(page.extract_text() for page in pdf_reader.pages) - except PdfStreamError: - logger.exception(f"PDF file {file_name} is not a valid PDF") - except Exception: - logger.exception(f"Failed to read PDF {file_name}") - - # File is still discoverable by title - # but the contents are not included as they cannot be parsed - return "" - - -def is_macos_resource_fork_file(file_name: str) -> bool: - return os.path.basename(file_name).startswith("._") and file_name.startswith( - "__MACOSX" - ) - - -# To include additional metadata in the search index, add a .danswer_metadata.json file -# to the zip file. This file should contain a list of objects with the following format: -# [{ "filename": "file1.txt", "link": "https://example.com/file1.txt" }] -def load_files_from_zip( - zip_location: str | Path, - ignore_macos_resource_fork_files: bool = True, - ignore_dirs: bool = True, -) -> Generator[tuple[zipfile.ZipInfo, IO[Any], dict[str, Any]], None, None]: - with zipfile.ZipFile(zip_location, "r") as zip_file: - zip_metadata = {} - try: - metadata_file_info = zip_file.getinfo(".danswer_metadata.json") - with zip_file.open(metadata_file_info, "r") as metadata_file: - try: - zip_metadata = json.load(metadata_file) - if isinstance(zip_metadata, list): - # convert list of dicts to dict of dicts - zip_metadata = {d["filename"]: d for d in zip_metadata} - except json.JSONDecodeError: - logger.warn("Unable to load .danswer_metadata.json") - except KeyError: - logger.info("No .danswer_metadata.json file") - - for file_info in zip_file.infolist(): - with zip_file.open(file_info.filename, "r") as file: - if ignore_dirs and file_info.is_dir(): - continue - - if ignore_macos_resource_fork_files and is_macos_resource_fork_file( - file_info.filename - ): - continue - yield file_info, file, zip_metadata.get(file_info.filename, {}) - - -def detect_encoding(file_path: str | Path) -> str: - with open(file_path, "rb") as file: - raw_data = file.read(50000) # Read a portion of the file to guess encoding - return chardet.detect(raw_data)["encoding"] or "utf-8" - - -def read_file( - file_reader: IO[Any], encoding: str = "utf-8", errors: str = "replace" -) -> tuple[str, dict]: - metadata = {} - file_content_raw = "" - for ind, line in enumerate(file_reader): - try: - line = line.decode(encoding) if isinstance(line, bytes) else line - except UnicodeDecodeError: - line = ( - line.decode(encoding, errors=errors) - if isinstance(line, bytes) - else line - ) - - if ind == 0: - metadata_or_none = extract_metadata(line) - if metadata_or_none is not None: - metadata = metadata_or_none - else: - file_content_raw += line - else: - file_content_raw += line - - return file_content_raw, metadata - - -def is_text_file_extension(file_name: str) -> bool: - extensions = ( - ".txt", - ".mdx", - ".md", - ".conf", - ".log", - ".json", - ".xml", - ".yaml", - ".yml", - ".json", - ) - return any(file_name.endswith(ext) for ext in extensions) diff --git a/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py b/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py index 10c831560..8faf6bfad 100644 --- a/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py +++ b/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py @@ -1,5 +1,8 @@ +from collections.abc import Callable +from collections.abc import Iterator from datetime import datetime from datetime import timezone +from typing import TypeVar from dateutil.parser import parse @@ -43,3 +46,14 @@ def get_experts_stores_representations( reps = [basic_expert_info_representation(owner) for owner in experts] return [owner for owner in reps if owner is not None] + + +T = TypeVar("T") +U = TypeVar("U") + + +def process_in_batches( + objects: list[T], process_function: Callable[[T], U], batch_size: int +) -> Iterator[list[U]]: + for i in range(0, len(objects), batch_size): + yield [process_function(obj) for obj in objects[i : i + batch_size]] diff --git a/backend/danswer/connectors/cross_connector_utils/rate_limit_wrapper.py b/backend/danswer/connectors/cross_connector_utils/rate_limit_wrapper.py index 43baced17..8733ca66e 100644 --- a/backend/danswer/connectors/cross_connector_utils/rate_limit_wrapper.py +++ b/backend/danswer/connectors/cross_connector_utils/rate_limit_wrapper.py @@ -5,6 +5,8 @@ from typing import Any from typing import cast from typing import TypeVar +import requests + from danswer.utils.logger import setup_logger logger = setup_logger() @@ -84,3 +86,45 @@ class _RateLimitDecorator: rate_limit_builder = _RateLimitDecorator + + +"""If you want to allow the external service to tell you when you've hit the rate limit, +use the following instead""" + +R = TypeVar("R", bound=Callable[..., requests.Response]) + + +def wrap_request_to_handle_ratelimiting( + request_fn: R, default_wait_time_sec: int = 30, max_waits: int = 30 +) -> R: + def wrapped_request(*args: list, **kwargs: dict[str, Any]) -> requests.Response: + for _ in range(max_waits): + response = request_fn(*args, **kwargs) + if response.status_code == 429: + try: + wait_time = int( + response.headers.get("Retry-After", default_wait_time_sec) + ) + except ValueError: + wait_time = default_wait_time_sec + + time.sleep(wait_time) + continue + + return response + + raise RateLimitTriedTooManyTimesError(f"Exceeded '{max_waits}' retries") + + return cast(R, wrapped_request) + + +_rate_limited_get = wrap_request_to_handle_ratelimiting(requests.get) +_rate_limited_post = wrap_request_to_handle_ratelimiting(requests.post) + + +class _RateLimitedRequest: + get = _rate_limited_get + post = _rate_limited_post + + +rl_requests = _RateLimitedRequest diff --git a/backend/danswer/connectors/danswer_jira/connector.py b/backend/danswer/connectors/danswer_jira/connector.py index 8d82fd8b4..212035901 100644 --- a/backend/danswer/connectors/danswer_jira/connector.py +++ b/backend/danswer/connectors/danswer_jira/connector.py @@ -1,3 +1,4 @@ +import os from datetime import datetime from datetime import timezone from typing import Any @@ -7,6 +8,7 @@ from jira import JIRA from jira.resources import Issue from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP from danswer.configs.constants import DocumentSource from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc from danswer.connectors.interfaces import GenerateDocumentsOutput @@ -22,6 +24,7 @@ from danswer.utils.logger import setup_logger logger = setup_logger() PROJECT_URL_PAT = "projects" +JIRA_API_VERSION = os.environ.get("JIRA_API_VERSION") or "2" def extract_jira_project(url: str) -> tuple[str, str]: @@ -42,11 +45,54 @@ def extract_jira_project(url: str) -> tuple[str, str]: return jira_base, jira_project +def extract_text_from_content(content: dict) -> str: + texts = [] + if "content" in content: + for block in content["content"]: + if "content" in block: + for item in block["content"]: + if item["type"] == "text": + texts.append(item["text"]) + return " ".join(texts) + + +def _get_comment_strs( + jira: Issue, comment_email_blacklist: tuple[str, ...] = () +) -> list[str]: + comment_strs = [] + for comment in jira.fields.comment.comments: + try: + if hasattr(comment, "body"): + body_text = extract_text_from_content(comment.raw["body"]) + elif hasattr(comment, "raw"): + body = comment.raw.get("body", "No body content available") + body_text = ( + extract_text_from_content(body) if isinstance(body, dict) else body + ) + else: + body_text = "No body attribute found" + + if ( + hasattr(comment, "author") + and comment.author.emailAddress in comment_email_blacklist + ): + continue # Skip adding comment if author's email is in blacklist + + comment_strs.append(body_text) + except Exception as e: + logger.error(f"Failed to process comment due to an error: {e}") + continue + + return comment_strs + + def fetch_jira_issues_batch( jql: str, start_index: int, jira_client: JIRA, batch_size: int = INDEX_BATCH_SIZE, + comment_email_blacklist: tuple[str, ...] = (), + labels_to_skip: set[str] | None = None, ) -> tuple[list[Document], int]: doc_batch = [] @@ -61,22 +107,55 @@ def fetch_jira_issues_batch( logger.warning(f"Found Jira object not of type Issue {jira}") continue + if labels_to_skip and any( + label in jira.fields.labels for label in labels_to_skip + ): + logger.info( + f"Skipping {jira.key} because it has a label to skip. Found " + f"labels: {jira.fields.labels}. Labels to skip: {labels_to_skip}." + ) + continue + + comments = _get_comment_strs(jira, comment_email_blacklist) semantic_rep = f"{jira.fields.description}\n" + "\n".join( - [f"Comment: {comment.body}" for comment in jira.fields.comment.comments] + [f"Comment: {comment}" for comment in comments] ) page_url = f"{jira_client.client_info()}/browse/{jira.key}" - author = None + people = set() try: - author = BasicExpertInfo( - display_name=jira.fields.creator.displayName, - email=jira.fields.creator.emailAddress, + people.add( + BasicExpertInfo( + display_name=jira.fields.creator.displayName, + email=jira.fields.creator.emailAddress, + ) ) except Exception: # Author should exist but if not, doesn't matter pass + try: + people.add( + BasicExpertInfo( + display_name=jira.fields.assignee.displayName, # type: ignore + email=jira.fields.assignee.emailAddress, # type: ignore + ) + ) + except Exception: + # Author should exist but if not, doesn't matter + pass + + metadata_dict = {} + if jira.fields.priority: + metadata_dict["priority"] = jira.fields.priority.name + if jira.fields.status: + metadata_dict["status"] = jira.fields.status.name + if jira.fields.resolution: + metadata_dict["resolution"] = jira.fields.resolution.name + if jira.fields.labels: + metadata_dict["label"] = jira.fields.labels + doc_batch.append( Document( id=page_url, @@ -84,9 +163,9 @@ def fetch_jira_issues_batch( source=DocumentSource.JIRA, semantic_identifier=jira.fields.summary, doc_updated_at=time_str_to_utc(jira.fields.updated), - primary_owners=[author] if author is not None else None, - # TODO add secondary_owners if needed - metadata={"label": jira.fields.labels} if jira.fields.labels else {}, + primary_owners=list(people) or None, + # TODO add secondary_owners (commenters) if needed + metadata=metadata_dict, ) ) return doc_batch, len(batch) @@ -96,16 +175,40 @@ class JiraConnector(LoadConnector, PollConnector): def __init__( self, jira_project_url: str, + comment_email_blacklist: list[str] | None = None, batch_size: int = INDEX_BATCH_SIZE, + # if a ticket has one of the labels specified in this list, we will just + # skip it. This is generally used to avoid indexing extra sensitive + # tickets. + labels_to_skip: list[str] = JIRA_CONNECTOR_LABELS_TO_SKIP, ) -> None: self.batch_size = batch_size self.jira_base, self.jira_project = extract_jira_project(jira_project_url) self.jira_client: JIRA | None = None + self._comment_email_blacklist = comment_email_blacklist or [] + + self.labels_to_skip = set(labels_to_skip) + + @property + def comment_email_blacklist(self) -> tuple: + return tuple(email.strip() for email in self._comment_email_blacklist) def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: - email = credentials["jira_user_email"] api_token = credentials["jira_api_token"] - self.jira_client = JIRA(basic_auth=(email, api_token), server=self.jira_base) + # if user provide an email we assume it's cloud + if "jira_user_email" in credentials: + email = credentials["jira_user_email"] + self.jira_client = JIRA( + basic_auth=(email, api_token), + server=self.jira_base, + options={"rest_api_version": JIRA_API_VERSION}, + ) + else: + self.jira_client = JIRA( + token_auth=api_token, + server=self.jira_base, + options={"rest_api_version": JIRA_API_VERSION}, + ) return None def load_from_state(self) -> GenerateDocumentsOutput: @@ -115,10 +218,12 @@ class JiraConnector(LoadConnector, PollConnector): start_ind = 0 while True: doc_batch, fetched_batch_size = fetch_jira_issues_batch( - f"project = {self.jira_project}", - start_ind, - self.jira_client, - self.batch_size, + jql=f"project = {self.jira_project}", + start_index=start_ind, + jira_client=self.jira_client, + batch_size=self.batch_size, + comment_email_blacklist=self.comment_email_blacklist, + labels_to_skip=self.labels_to_skip, ) if doc_batch: @@ -150,10 +255,12 @@ class JiraConnector(LoadConnector, PollConnector): start_ind = 0 while True: doc_batch, fetched_batch_size = fetch_jira_issues_batch( - jql, - start_ind, - self.jira_client, - self.batch_size, + jql=jql, + start_index=start_ind, + jira_client=self.jira_client, + batch_size=self.batch_size, + comment_email_blacklist=self.comment_email_blacklist, + labels_to_skip=self.labels_to_skip, ) if doc_batch: @@ -167,7 +274,9 @@ class JiraConnector(LoadConnector, PollConnector): if __name__ == "__main__": import os - connector = JiraConnector(os.environ["JIRA_PROJECT_URL"]) + connector = JiraConnector( + os.environ["JIRA_PROJECT_URL"], comment_email_blacklist=[] + ) connector.load_credentials( { "jira_user_email": os.environ["JIRA_USER_EMAIL"], diff --git a/backend/danswer/connectors/danswer_jira/utils.py b/backend/danswer/connectors/danswer_jira/utils.py new file mode 100644 index 000000000..506f5eff7 --- /dev/null +++ b/backend/danswer/connectors/danswer_jira/utils.py @@ -0,0 +1,92 @@ +"""Module with custom fields processing functions""" +from typing import Any +from typing import List + +from jira import JIRA +from jira.resources import CustomFieldOption +from jira.resources import Issue +from jira.resources import User + +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +class CustomFieldExtractor: + @staticmethod + def _process_custom_field_value(value: Any) -> str: + """ + Process a custom field value to a string + """ + try: + if isinstance(value, str): + return value + elif isinstance(value, CustomFieldOption): + return value.value + elif isinstance(value, User): + return value.displayName + elif isinstance(value, List): + return " ".join( + [CustomFieldExtractor._process_custom_field_value(v) for v in value] + ) + else: + return str(value) + except Exception as e: + logger.error(f"Error processing custom field value {value}: {e}") + return "" + + @staticmethod + def get_issue_custom_fields( + jira: Issue, custom_fields: dict, max_value_length: int = 250 + ) -> dict: + """ + Process all custom fields of an issue to a dictionary of strings + :param jira: jira_issue, bug or similar + :param custom_fields: custom fields dictionary + :param max_value_length: maximum length of the value to be processed, if exceeded, it will be truncated + """ + + issue_custom_fields = { + custom_fields[key]: value + for key, value in jira.fields.__dict__.items() + if value and key in custom_fields.keys() + } + + processed_fields = {} + + if issue_custom_fields: + for key, value in issue_custom_fields.items(): + processed = CustomFieldExtractor._process_custom_field_value(value) + # We need max length parameter, because there are some plugins that often has very long description + # and there is just a technical information so we just avoid long values + if len(processed) < max_value_length: + processed_fields[key] = processed + + return processed_fields + + @staticmethod + def get_all_custom_fields(jira_client: JIRA) -> dict: + """Get all custom fields from Jira""" + fields = jira_client.fields() + fields_dct = { + field["id"]: field["name"] for field in fields if field["custom"] is True + } + return fields_dct + + +class CommonFieldExtractor: + @staticmethod + def get_issue_common_fields(jira: Issue) -> dict: + return { + "Priority": jira.fields.priority.name if jira.fields.priority else None, + "Reporter": jira.fields.reporter.displayName + if jira.fields.reporter + else None, + "Assignee": jira.fields.assignee.displayName + if jira.fields.assignee + else None, + "Status": jira.fields.status.name if jira.fields.status else None, + "Resolution": jira.fields.resolution.name + if jira.fields.resolution + else None, + } diff --git a/backend/danswer/connectors/discourse/__init__.py b/backend/danswer/connectors/discourse/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/danswer/connectors/discourse/connector.py b/backend/danswer/connectors/discourse/connector.py new file mode 100644 index 000000000..1bf64a6b3 --- /dev/null +++ b/backend/danswer/connectors/discourse/connector.py @@ -0,0 +1,215 @@ +import time +import urllib.parse +from datetime import datetime +from datetime import timezone +from typing import Any + +import requests +from pydantic import BaseModel +from requests import Response + +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import DocumentSource +from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc +from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import PollConnector +from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.models import BasicExpertInfo +from danswer.connectors.models import ConnectorMissingCredentialError +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.file_processing.html_utils import parse_html_page_basic +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +class DiscoursePerms(BaseModel): + api_key: str + api_username: str + + +@retry_builder() +def discourse_request( + endpoint: str, perms: DiscoursePerms, params: dict | None = None +) -> Response: + headers = {"Api-Key": perms.api_key, "Api-Username": perms.api_username} + + response = requests.get(endpoint, headers=headers, params=params) + response.raise_for_status() + + return response + + +class DiscourseConnector(PollConnector): + def __init__( + self, + base_url: str, + categories: list[str] | None = None, + batch_size: int = INDEX_BATCH_SIZE, + ) -> None: + parsed_url = urllib.parse.urlparse(base_url) + if not parsed_url.scheme: + base_url = "https://" + base_url + self.base_url = base_url + + self.categories = [c.lower() for c in categories] if categories else [] + self.category_id_map: dict[int, str] = {} + + self.batch_size = batch_size + + self.permissions: DiscoursePerms | None = None + + def _get_categories_map( + self, + ) -> None: + assert self.permissions is not None + categories_endpoint = urllib.parse.urljoin(self.base_url, "categories.json") + response = discourse_request( + endpoint=categories_endpoint, + perms=self.permissions, + params={"include_subcategories": True}, + ) + categories = response.json()["category_list"]["categories"] + + self.category_id_map = { + category["id"]: category["name"] + for category in categories + if not self.categories or category["name"].lower() in self.categories + } + + def _get_latest_topics( + self, start: datetime | None, end: datetime | None + ) -> list[int]: + assert self.permissions is not None + topic_ids = [] + + valid_categories = set(self.category_id_map.keys()) + + latest_endpoint = urllib.parse.urljoin(self.base_url, "latest.json") + response = discourse_request(endpoint=latest_endpoint, perms=self.permissions) + topics = response.json()["topic_list"]["topics"] + for topic in topics: + last_time = topic.get("last_posted_at") + if not last_time: + continue + last_time_dt = time_str_to_utc(last_time) + + if start and start > last_time_dt: + continue + if end and end < last_time_dt: + continue + + if valid_categories and topic.get("category_id") not in valid_categories: + continue + + topic_ids.append(topic["id"]) + + return topic_ids + + def _get_doc_from_topic(self, topic_id: int) -> Document: + assert self.permissions is not None + topic_endpoint = urllib.parse.urljoin(self.base_url, f"t/{topic_id}.json") + response = discourse_request( + endpoint=topic_endpoint, + perms=self.permissions, + ) + topic = response.json() + + topic_url = urllib.parse.urljoin(self.base_url, f"t/{topic['slug']}") + + sections = [] + poster = None + responders = [] + seen_names = set() + for ind, post in enumerate(topic["post_stream"]["posts"]): + if ind == 0: + poster_name = post.get("name") + if poster_name: + seen_names.add(poster_name) + poster = BasicExpertInfo(display_name=poster_name) + else: + responder_name = post.get("name") + if responder_name and responder_name not in seen_names: + seen_names.add(responder_name) + responders.append(BasicExpertInfo(display_name=responder_name)) + + sections.append( + Section(link=topic_url, text=parse_html_page_basic(post["cooked"])) + ) + + metadata: dict[str, str | list[str]] = { + "category": self.category_id_map[topic["category_id"]], + } + if topic.get("tags"): + metadata["tags"] = topic["tags"] + + doc = Document( + id="_".join([DocumentSource.DISCOURSE.value, str(topic["id"])]), + sections=sections, + source=DocumentSource.DISCOURSE, + semantic_identifier=topic["title"], + doc_updated_at=time_str_to_utc(topic["last_posted_at"]), + primary_owners=[poster] if poster else None, + secondary_owners=responders or None, + metadata=metadata, + ) + return doc + + def _yield_discourse_documents( + self, topic_ids: list[int] + ) -> GenerateDocumentsOutput: + doc_batch: list[Document] = [] + for topic_id in topic_ids: + doc_batch.append(self._get_doc_from_topic(topic_id)) + + if len(doc_batch) >= self.batch_size: + yield doc_batch + doc_batch = [] + + if doc_batch: + yield doc_batch + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + self.permissions = DiscoursePerms( + api_key=credentials["discourse_api_key"], + api_username=credentials["discourse_api_username"], + ) + + return None + + def poll_source( + self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch + ) -> GenerateDocumentsOutput: + if self.permissions is None: + raise ConnectorMissingCredentialError("Discourse") + start_datetime = datetime.utcfromtimestamp(start).replace(tzinfo=timezone.utc) + end_datetime = datetime.utcfromtimestamp(end).replace(tzinfo=timezone.utc) + + self._get_categories_map() + + latest_topic_ids = self._get_latest_topics( + start=start_datetime, end=end_datetime + ) + + yield from self._yield_discourse_documents(latest_topic_ids) + + +if __name__ == "__main__": + import os + + connector = DiscourseConnector(base_url=os.environ["DISCOURSE_BASE_URL"]) + connector.load_credentials( + { + "discourse_api_key": os.environ["DISCOURSE_API_KEY"], + "discourse_api_username": os.environ["DISCOURSE_API_USERNAME"], + } + ) + + current = time.time() + one_year_ago = current - 24 * 60 * 60 * 360 + + latest_docs = connector.poll_source(one_year_ago, current) + + print(next(latest_docs)) diff --git a/backend/danswer/connectors/document360/connector.py b/backend/danswer/connectors/document360/connector.py index 82ac51f17..6a9f4ba6a 100644 --- a/backend/danswer/connectors/document360/connector.py +++ b/backend/danswer/connectors/document360/connector.py @@ -8,25 +8,27 @@ import requests from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource -from danswer.connectors.cross_connector_utils.html_utils import parse_html_page_basic from danswer.connectors.cross_connector_utils.rate_limit_wrapper import ( rate_limit_builder, ) from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder +from danswer.connectors.document360.utils import flatten_child_categories from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.models import BasicExpertInfo from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.file_processing.html_utils import parse_html_page_basic # Limitations and Potential Improvements # 1. The "Categories themselves contain potentially relevant information" but they're not pulled in # 2. Only the HTML Articles are supported, Document360 also has a Markdown and "Block" format # 3. The contents are not as cleaned up as other HTML connectors -DOCUMENT360_BASE_URL = "https://preview.portal.document360.io/" +DOCUMENT360_BASE_URL = "https://portal.document360.io" DOCUMENT360_API_BASE_URL = "https://apihub.document360.io/v2" @@ -96,13 +98,16 @@ class Document360Connector(LoadConnector, PollConnector): {"id": article["id"], "category_name": category["name"]} ) for child_category in category["child_categories"]: - for article in child_category["articles"]: - articles_with_category.append( - { - "id": article["id"], - "category_name": child_category["name"], - } - ) + all_nested_categories = flatten_child_categories(child_category) + for nested_category in all_nested_categories: + for article in nested_category["articles"]: + articles_with_category.append( + { + "id": article["id"], + "category_name": nested_category["name"], + } + ) + return articles_with_category def _process_articles( @@ -130,15 +135,23 @@ class Document360Connector(LoadConnector, PollConnector): continue authors = [ - author["email_id"] + BasicExpertInfo( + display_name=author.get("name"), email=author["email_id"] + ) for author in article_details.get("authors", []) if author["email_id"] ] - doc_link = f"{DOCUMENT360_BASE_URL}/{self.portal_id}/document/v1/view/{article['id']}" + doc_link = ( + article_details["url"] + if article_details.get("url") + else f"{DOCUMENT360_BASE_URL}/{self.portal_id}/document/v1/view/{article['id']}" + ) html_content = article_details["html_content"] - article_content = parse_html_page_basic(html_content) + article_content = ( + parse_html_page_basic(html_content) if html_content is not None else "" + ) doc_text = ( f"{article_details.get('description', '')}\n{article_content}".strip() ) diff --git a/backend/danswer/connectors/document360/utils.py b/backend/danswer/connectors/document360/utils.py new file mode 100644 index 000000000..87ef880da --- /dev/null +++ b/backend/danswer/connectors/document360/utils.py @@ -0,0 +1,8 @@ +def flatten_child_categories(category: dict) -> list[dict]: + if not category["child_categories"]: + return [category] + else: + flattened_categories = [category] + for child_category in category["child_categories"]: + flattened_categories.extend(flatten_child_categories(child_category)) + return flattened_categories diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index 4272de302..cb0d41b8a 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -2,9 +2,11 @@ from typing import Any from typing import Type from danswer.configs.constants import DocumentSource +from danswer.connectors.axero.connector import AxeroConnector from danswer.connectors.bookstack.connector import BookstackConnector from danswer.connectors.confluence.connector import ConfluenceConnector from danswer.connectors.danswer_jira.connector import JiraConnector +from danswer.connectors.discourse.connector import DiscourseConnector from danswer.connectors.document360.connector import Document360Connector from danswer.connectors.file.connector import LocalFileConnector from danswer.connectors.github.connector import GithubConnector @@ -21,6 +23,7 @@ from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector from danswer.connectors.linear.connector import LinearConnector from danswer.connectors.loopio.connector import LoopioConnector +from danswer.connectors.mediawiki.wiki import MediaWikiConnector from danswer.connectors.models import InputType from danswer.connectors.notion.connector import NotionConnector from danswer.connectors.productboard.connector import ProductboardConnector @@ -31,6 +34,7 @@ from danswer.connectors.slab.connector import SlabConnector from danswer.connectors.slack.connector import SlackPollConnector from danswer.connectors.slack.load_connector import SlackLoadConnector from danswer.connectors.web.connector import WebConnector +from danswer.connectors.wikipedia.connector import WikipediaConnector from danswer.connectors.zendesk.connector import ZendeskConnector from danswer.connectors.zulip.connector import ZulipConnector @@ -72,6 +76,10 @@ def identify_connector_class( DocumentSource.LOOPIO: LoopioConnector, DocumentSource.SHAREPOINT: SharepointConnector, DocumentSource.TEAMS: TeamsConnector, + DocumentSource.DISCOURSE: DiscourseConnector, + DocumentSource.AXERO: AxeroConnector, + DocumentSource.MEDIAWIKI: MediaWikiConnector, + DocumentSource.WIKIPEDIA: WikipediaConnector, } connector_by_source = connector_map.get(source, {}) diff --git a/backend/danswer/connectors/file/connector.py b/backend/danswer/connectors/file/connector.py index 3de5eb0eb..2e6a9081d 100644 --- a/backend/danswer/connectors/file/connector.py +++ b/backend/danswer/connectors/file/connector.py @@ -1,56 +1,63 @@ import os -from collections.abc import Generator +from collections.abc import Iterator from datetime import datetime from datetime import timezone from pathlib import Path from typing import Any from typing import IO +from sqlalchemy.orm import Session + from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource -from danswer.connectors.cross_connector_utils.file_utils import detect_encoding -from danswer.connectors.cross_connector_utils.file_utils import load_files_from_zip -from danswer.connectors.cross_connector_utils.file_utils import read_file -from danswer.connectors.cross_connector_utils.file_utils import read_pdf_file from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc -from danswer.connectors.file.utils import check_file_ext_is_valid -from danswer.connectors.file.utils import get_file_ext from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.models import BasicExpertInfo from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.db.engine import get_sqlalchemy_engine +from danswer.file_processing.extract_file_text import check_file_ext_is_valid +from danswer.file_processing.extract_file_text import detect_encoding +from danswer.file_processing.extract_file_text import extract_file_text +from danswer.file_processing.extract_file_text import get_file_ext +from danswer.file_processing.extract_file_text import is_text_file_extension +from danswer.file_processing.extract_file_text import load_files_from_zip +from danswer.file_processing.extract_file_text import pdf_to_text +from danswer.file_processing.extract_file_text import read_text_file +from danswer.file_store.file_store import get_default_file_store from danswer.utils.logger import setup_logger - logger = setup_logger() -def _open_files_at_location( - file_path: str | Path, -) -> Generator[tuple[str, IO[Any], dict[str, Any]], Any, None]: - extension = get_file_ext(file_path) +def _read_files_and_metadata( + file_name: str, + db_session: Session, +) -> Iterator[tuple[str, IO, dict[str, Any]]]: + """Reads the file into IO, in the case of a zip file, yields each individual + file contained within, also includes the metadata dict if packaged in the zip""" + extension = get_file_ext(file_name) metadata: dict[str, Any] = {} + directory_path = os.path.dirname(file_name) + + file_content = get_default_file_store(db_session).read_file(file_name, mode="b") if extension == ".zip": for file_info, file, metadata in load_files_from_zip( - file_path, ignore_dirs=True + file_content, ignore_dirs=True ): - yield file_info.filename, file, metadata - elif extension in [".txt", ".md", ".mdx"]: - encoding = detect_encoding(file_path) - with open(file_path, "r", encoding=encoding, errors="replace") as file: - yield os.path.basename(file_path), file, metadata - elif extension == ".pdf": - with open(file_path, "rb") as file: - yield os.path.basename(file_path), file, metadata + yield os.path.join(directory_path, file_info.filename), file, metadata + elif check_file_ext_is_valid(extension): + yield file_name, file_content, metadata else: - logger.warning(f"Skipping file '{file_path}' with extension '{extension}'") + logger.warning(f"Skipping file '{file_name}' with extension '{extension}'") def _process_file( file_name: str, file: IO[Any], - metadata: dict[str, Any] = {}, + metadata: dict[str, Any] | None = None, pdf_pass: str | None = None, ) -> list[Document]: extension = get_file_ext(file_name) @@ -60,25 +67,36 @@ def _process_file( file_metadata: dict[str, Any] = {} - if extension == ".pdf": - file_content_raw = read_pdf_file( - file=file, file_name=file_name, pdf_pass=pdf_pass - ) - else: - file_content_raw, file_metadata = read_file(file) - file_metadata = {**metadata, **file_metadata} + if is_text_file_extension(file_name): + encoding = detect_encoding(file) + file_content_raw, file_metadata = read_text_file(file, encoding=encoding) - time_updated = file_metadata.get("time_updated", datetime.now(timezone.utc)) + # Using the PDF reader function directly to pass in password cleanly + elif extension == ".pdf": + file_content_raw = pdf_to_text(file=file, pdf_pass=pdf_pass) + + else: + file_content_raw = extract_file_text( + file_name=file_name, + file=file, + ) + + all_metadata = {**metadata, **file_metadata} if metadata else file_metadata + + # If this is set, we will show this in the UI as the "name" of the file + file_display_name_override = all_metadata.get("file_display_name") + + time_updated = all_metadata.get("time_updated", datetime.now(timezone.utc)) if isinstance(time_updated, str): time_updated = time_str_to_utc(time_updated) - dt_str = metadata.get("doc_updated_at") + dt_str = all_metadata.get("doc_updated_at") final_time_updated = time_str_to_utc(dt_str) if dt_str else time_updated - # add tags + # Metadata tags separate from the Danswer specific fields metadata_tags = { k: v - for k, v in file_metadata.items() + for k, v in all_metadata.items() if k not in [ "time_updated", @@ -87,20 +105,35 @@ def _process_file( "primary_owners", "secondary_owners", "filename", + "file_display_name", ] } + p_owner_names = all_metadata.get("primary_owners") + s_owner_names = all_metadata.get("secondary_owners") + p_owners = ( + [BasicExpertInfo(display_name=name) for name in p_owner_names] + if p_owner_names + else None + ) + s_owners = ( + [BasicExpertInfo(display_name=name) for name in s_owner_names] + if s_owner_names + else None + ) + return [ Document( - id=file_name, + id=f"FILE_CONNECTOR__{file_name}", # add a prefix to avoid conflicts with other connectors sections=[ - Section(link=metadata.get("link"), text=file_content_raw.strip()) + Section(link=all_metadata.get("link"), text=file_content_raw.strip()) ], source=DocumentSource.FILE, - semantic_identifier=file_name, + semantic_identifier=file_display_name_override + or os.path.basename(file_name), doc_updated_at=final_time_updated, - primary_owners=metadata.get("primary_owners"), - secondary_owners=metadata.get("secondary_owners"), + primary_owners=p_owners, + secondary_owners=s_owners, # currently metadata just houses tags, other stuff like owners / updated at have dedicated fields metadata=metadata_tags, ) @@ -123,24 +156,27 @@ class LocalFileConnector(LoadConnector): def load_from_state(self) -> GenerateDocumentsOutput: documents: list[Document] = [] - for file_location in self.file_locations: - current_datetime = datetime.now(timezone.utc) - files = _open_files_at_location(file_location) - - for file_name, file, metadata in files: - metadata["time_updated"] = metadata.get( - "time_updated", current_datetime - ) - documents.extend( - _process_file(file_name, file, metadata, self.pdf_pass) + with Session(get_sqlalchemy_engine()) as db_session: + for file_path in self.file_locations: + current_datetime = datetime.now(timezone.utc) + files = _read_files_and_metadata( + file_name=str(file_path), db_session=db_session ) - if len(documents) >= self.batch_size: - yield documents - documents = [] + for file_name, file, metadata in files: + metadata["time_updated"] = metadata.get( + "time_updated", current_datetime + ) + documents.extend( + _process_file(file_name, file, metadata, self.pdf_pass) + ) - if documents: - yield documents + if len(documents) >= self.batch_size: + yield documents + documents = [] + + if documents: + yield documents if __name__ == "__main__": diff --git a/backend/danswer/connectors/file/utils.py b/backend/danswer/connectors/file/utils.py deleted file mode 100644 index cb1f26f63..000000000 --- a/backend/danswer/connectors/file/utils.py +++ /dev/null @@ -1,54 +0,0 @@ -import os -import shutil -import time -import uuid -from pathlib import Path -from typing import Any -from typing import IO - -from danswer.configs.app_configs import FILE_CONNECTOR_TMP_STORAGE_PATH - -_VALID_FILE_EXTENSIONS = [".txt", ".zip", ".pdf", ".md", ".mdx"] - - -def get_file_ext(file_path_or_name: str | Path) -> str: - _, extension = os.path.splitext(file_path_or_name) - return extension - - -def check_file_ext_is_valid(ext: str) -> bool: - return ext in _VALID_FILE_EXTENSIONS - - -def write_temp_files( - files: list[tuple[str, IO[Any]]], - base_path: Path | str = FILE_CONNECTOR_TMP_STORAGE_PATH, -) -> list[str]: - """Writes temporary files to disk and returns their paths - - NOTE: need to pass in (file_name, File) tuples since FastAPI's `UploadFile` class - exposed SpooledTemporaryFile does not include a name. - """ - file_location = Path(base_path) / str(uuid.uuid4()) - os.makedirs(file_location, exist_ok=True) - - file_paths: list[str] = [] - for file_name, file in files: - extension = get_file_ext(file_name) - if not check_file_ext_is_valid(extension): - raise ValueError( - f"Invalid file extension for file: '{file_name}'. Must be one of {_VALID_FILE_EXTENSIONS}" - ) - - file_path = file_location / file_name - with open(file_path, "wb") as buffer: - # copy file content from uploaded file to the newly created file - shutil.copyfileobj(file, buffer) - - file_paths.append(str(file_path.absolute())) - - return file_paths - - -def file_age_in_hours(filepath: str | Path) -> float: - return (time.time() - os.path.getmtime(filepath)) / (60 * 60) diff --git a/backend/danswer/connectors/gmail/connector_auth.py b/backend/danswer/connectors/gmail/connector_auth.py index f6cfa5a74..ca08f7198 100644 --- a/backend/danswer/connectors/gmail/connector_auth.py +++ b/backend/danswer/connectors/gmail/connector_auth.py @@ -24,7 +24,7 @@ from danswer.connectors.gmail.constants import GMAIL_SERVICE_ACCOUNT_KEY from danswer.connectors.gmail.constants import SCOPES from danswer.db.credentials import update_credential_json from danswer.db.models import User -from danswer.dynamic_configs import get_dynamic_config_store +from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.server.documents.models import CredentialBase from danswer.server.documents.models import GoogleAppCredentials from danswer.server.documents.models import GoogleServiceAccountKey @@ -91,7 +91,7 @@ def get_gmail_auth_url(credential_id: int) -> str: parsed_url = cast(ParseResult, urlparse(auth_url)) params = parse_qs(parsed_url.query) - get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0]) # type: ignore + get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True) # type: ignore return str(auth_url) @@ -108,7 +108,7 @@ def get_auth_url(credential_id: int) -> str: parsed_url = cast(ParseResult, urlparse(auth_url)) params = parse_qs(parsed_url.query) - get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0]) # type: ignore + get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True) # type: ignore return str(auth_url) @@ -157,7 +157,9 @@ def get_google_app_gmail_cred() -> GoogleAppCredentials: def upsert_google_app_gmail_cred(app_credentials: GoogleAppCredentials) -> None: - get_dynamic_config_store().store(GMAIL_CRED_KEY, app_credentials.json()) + get_dynamic_config_store().store( + GMAIL_CRED_KEY, app_credentials.json(), encrypt=True + ) def delete_google_app_gmail_cred() -> None: @@ -173,13 +175,13 @@ def upsert_gmail_service_account_key( service_account_key: GoogleServiceAccountKey, ) -> None: get_dynamic_config_store().store( - GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json() + GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True ) def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None: get_dynamic_config_store().store( - GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json() + GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True ) diff --git a/backend/danswer/connectors/gong/connector.py b/backend/danswer/connectors/gong/connector.py index 711bdf11b..9ad24a5c9 100644 --- a/backend/danswer/connectors/gong/connector.py +++ b/backend/danswer/connectors/gong/connector.py @@ -198,7 +198,10 @@ class GongConnector(LoadConnector, PollConnector): f"Indexing Gong call from {call_time_str.split('T', 1)[0]}: {call_title}" ) - call_parties = call_details["parties"] + call_parties = cast(list[dict] | None, call_details.get("parties")) + if call_parties is None: + logger.error(f"Couldn't get parties for Call ID: {call_id}") + call_parties = [] id_to_name_map = self._parse_parties(call_parties) diff --git a/backend/danswer/connectors/google_drive/connector.py b/backend/danswer/connectors/google_drive/connector.py index 15c9894a6..73a541267 100644 --- a/backend/danswer/connectors/google_drive/connector.py +++ b/backend/danswer/connectors/google_drive/connector.py @@ -1,5 +1,4 @@ import io -import tempfile from collections.abc import Iterator from collections.abc import Sequence from datetime import datetime @@ -9,7 +8,6 @@ from itertools import chain from typing import Any from typing import cast -import docx2txt # type:ignore from google.auth.credentials import Credentials # type: ignore from googleapiclient import discovery # type: ignore from googleapiclient.errors import HttpError # type: ignore @@ -21,7 +19,6 @@ from danswer.configs.app_configs import GOOGLE_DRIVE_ONLY_ORG_PUBLIC from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.configs.constants import IGNORE_FOR_QA -from danswer.connectors.cross_connector_utils.file_utils import read_pdf_file from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder from danswer.connectors.google_drive.connector_auth import ( get_google_drive_creds_for_authorized_user, @@ -42,6 +39,8 @@ from danswer.connectors.interfaces import PollConnector from danswer.connectors.interfaces import SecondsSinceUnixEpoch from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.file_processing.extract_file_text import docx_to_text +from danswer.file_processing.extract_file_text import pdf_to_text from danswer.utils.batching import batch_generator from danswer.utils.logger import setup_logger @@ -321,15 +320,10 @@ def extract_text(file: dict[str, str], service: discovery.Resource) -> str: ) elif mime_type == GDriveMimeType.WORD_DOC.value: response = service.files().get_media(fileId=file["id"]).execute() - word_stream = io.BytesIO(response) - with tempfile.NamedTemporaryFile(delete=False) as temp: - temp.write(word_stream.getvalue()) - temp_path = temp.name - return docx2txt.process(temp_path) + return docx_to_text(file=io.BytesIO(response)) elif mime_type == GDriveMimeType.PDF.value: response = service.files().get_media(fileId=file["id"]).execute() - file_contents = read_pdf_file(file=io.BytesIO(response), file_name=file["name"]) - return file_contents + return pdf_to_text(file=io.BytesIO(response)) return UNSUPPORTED_FILE_TYPE_CONTENT @@ -388,7 +382,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector): def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None: """Checks for two different types of credentials. - (1) A credential which holds a token acquired via a user going thorugh + (1) A credential which holds a token acquired via a user going thorough the Google OAuth flow. (2) A credential which holds a service account key JSON file, which can then be used to impersonate any user in the workspace. diff --git a/backend/danswer/connectors/google_drive/connector_auth.py b/backend/danswer/connectors/google_drive/connector_auth.py index f65e17772..c467516f6 100644 --- a/backend/danswer/connectors/google_drive/connector_auth.py +++ b/backend/danswer/connectors/google_drive/connector_auth.py @@ -24,7 +24,7 @@ from danswer.connectors.google_drive.constants import GOOGLE_DRIVE_SERVICE_ACCOU from danswer.connectors.google_drive.constants import SCOPES from danswer.db.credentials import update_credential_json from danswer.db.models import User -from danswer.dynamic_configs import get_dynamic_config_store +from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.server.documents.models import CredentialBase from danswer.server.documents.models import GoogleAppCredentials from danswer.server.documents.models import GoogleServiceAccountKey @@ -91,7 +91,7 @@ def get_auth_url(credential_id: int) -> str: parsed_url = cast(ParseResult, urlparse(auth_url)) params = parse_qs(parsed_url.query) - get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0]) # type: ignore + get_dynamic_config_store().store(CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True) # type: ignore return str(auth_url) @@ -140,7 +140,9 @@ def get_google_app_cred() -> GoogleAppCredentials: def upsert_google_app_cred(app_credentials: GoogleAppCredentials) -> None: - get_dynamic_config_store().store(GOOGLE_DRIVE_CRED_KEY, app_credentials.json()) + get_dynamic_config_store().store( + GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True + ) def delete_google_app_cred() -> None: @@ -154,7 +156,7 @@ def get_service_account_key() -> GoogleServiceAccountKey: def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None: get_dynamic_config_store().store( - GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY, service_account_key.json() + GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True ) diff --git a/backend/danswer/connectors/google_site/connector.py b/backend/danswer/connectors/google_site/connector.py index 2a2be5ebe..9cfcf224e 100644 --- a/backend/danswer/connectors/google_site/connector.py +++ b/backend/danswer/connectors/google_site/connector.py @@ -5,16 +5,19 @@ from typing import cast from bs4 import BeautifulSoup from bs4 import Tag +from sqlalchemy.orm import Session from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource -from danswer.connectors.cross_connector_utils.file_utils import load_files_from_zip -from danswer.connectors.cross_connector_utils.file_utils import read_file -from danswer.connectors.cross_connector_utils.html_utils import web_html_cleanup from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.db.engine import get_sqlalchemy_engine +from danswer.file_processing.extract_file_text import load_files_from_zip +from danswer.file_processing.extract_file_text import read_text_file +from danswer.file_processing.html_utils import web_html_cleanup +from danswer.file_store.file_store import get_default_file_store from danswer.utils.logger import setup_logger logger = setup_logger() @@ -66,8 +69,13 @@ class GoogleSitesConnector(LoadConnector): def load_from_state(self) -> GenerateDocumentsOutput: documents: list[Document] = [] + with Session(get_sqlalchemy_engine()) as db_session: + file_content_io = get_default_file_store(db_session).read_file( + self.zip_path, mode="b" + ) + # load the HTML files - files = load_files_from_zip(self.zip_path) + files = load_files_from_zip(file_content_io) count = 0 for file_info, file_io, _metadata in files: # skip non-published files @@ -78,7 +86,7 @@ class GoogleSitesConnector(LoadConnector): if extension != ".html": continue - file_content, _ = read_file(file_io) + file_content, _ = read_text_file(file_io) soup = BeautifulSoup(file_content, "html.parser") # get the link out of the navbar diff --git a/backend/danswer/connectors/guru/connector.py b/backend/danswer/connectors/guru/connector.py index b51b1d4e8..3c3c873b1 100644 --- a/backend/danswer/connectors/guru/connector.py +++ b/backend/danswer/connectors/guru/connector.py @@ -7,7 +7,6 @@ import requests from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource -from danswer.connectors.cross_connector_utils.html_utils import parse_html_page_basic from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector @@ -17,6 +16,7 @@ from danswer.connectors.models import BasicExpertInfo from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.file_processing.html_utils import parse_html_page_basic from danswer.utils.logger import setup_logger # Potential Improvements diff --git a/backend/danswer/connectors/hubspot/connector.py b/backend/danswer/connectors/hubspot/connector.py index 861f53ee6..bd13c1e75 100644 --- a/backend/danswer/connectors/hubspot/connector.py +++ b/backend/danswer/connectors/hubspot/connector.py @@ -94,6 +94,8 @@ class HubSpotConnector(LoadConnector, PollConnector): note = api_client.crm.objects.notes.basic_api.get_by_id( note_id=note.id, properties=["content", "hs_body_preview"] ) + if note.properties["hs_body_preview"] is None: + continue associated_notes.append(note.properties["hs_body_preview"]) associated_emails_str = " ,".join(associated_emails) diff --git a/backend/danswer/connectors/loopio/connector.py b/backend/danswer/connectors/loopio/connector.py index 503d6bd3f..e10bed876 100644 --- a/backend/danswer/connectors/loopio/connector.py +++ b/backend/danswer/connectors/loopio/connector.py @@ -9,10 +9,6 @@ from requests_oauthlib import OAuth2Session # type: ignore from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource -from danswer.connectors.cross_connector_utils.html_utils import parse_html_page_basic -from danswer.connectors.cross_connector_utils.html_utils import ( - strip_excessive_newlines_and_spaces, -) from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector @@ -22,6 +18,8 @@ from danswer.connectors.models import BasicExpertInfo from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.file_processing.html_utils import parse_html_page_basic +from danswer.file_processing.html_utils import strip_excessive_newlines_and_spaces from danswer.utils.logger import setup_logger LOOPIO_API_BASE = "https://api.loopio.com/" diff --git a/backend/danswer/connectors/mediawiki/__init__.py b/backend/danswer/connectors/mediawiki/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/danswer/connectors/mediawiki/family.py b/backend/danswer/connectors/mediawiki/family.py new file mode 100644 index 000000000..0d9530667 --- /dev/null +++ b/backend/danswer/connectors/mediawiki/family.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +import builtins +import functools +import itertools +from typing import Any +from unittest import mock +from urllib.parse import urlparse +from urllib.parse import urlunparse + +from pywikibot import family # type: ignore[import-untyped] +from pywikibot import pagegenerators # type: ignore[import-untyped] +from pywikibot.scripts import generate_family_file # type: ignore[import-untyped] +from pywikibot.scripts.generate_user_files import pywikibot # type: ignore[import-untyped] + +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + + +@mock.patch.object( + builtins, "print", lambda *args: logger.info("\t".join(map(str, args))) +) +class FamilyFileGeneratorInMemory(generate_family_file.FamilyFileGenerator): + """A subclass of FamilyFileGenerator that writes the family file to memory instead of to disk.""" + + def __init__( + self, + url: str, + name: str, + dointerwiki: str | bool = True, + verify: str | bool = True, + ): + """Initialize the FamilyFileGeneratorInMemory.""" + + url_parse = urlparse(url, "https") + if not url_parse.netloc and url_parse.path: + url = urlunparse( + (url_parse.scheme, url_parse.path, url_parse.netloc, *url_parse[3:]) + ) + else: + url = urlunparse(url_parse) + assert isinstance(url, str) + + if any(x not in generate_family_file.NAME_CHARACTERS for x in name): + raise ValueError( + 'ERROR: Name of family "{}" must be ASCII letters and digits [a-zA-Z0-9]', + name, + ) + + if isinstance(dointerwiki, bool): + dointerwiki = "Y" if dointerwiki else "N" + assert isinstance(dointerwiki, str) + + if isinstance(verify, bool): + verify = "Y" if verify else "N" + assert isinstance(verify, str) + + super().__init__(url, name, dointerwiki, verify) + self.family_definition: type[family.Family] | None = None + + def get_params(self) -> bool: + """Get the parameters for the family class definition. + + This override prevents the method from prompting the user for input (which would be impossible in this context). + We do all the input validation in the constructor. + """ + return True + + def writefile(self, verify: Any) -> None: + """Write the family file. + + This overrides the method in the parent class to write the family definition to memory instead of to disk. + + Args: + verify: unused argument necessary to match the signature of the method in the parent class. + """ + code_hostname_pairs = { + f"{k}": f"{urlparse(w.server).netloc}" for k, w in self.wikis.items() + } + + code_path_pairs = {f"{k}": f"{w.scriptpath}" for k, w in self.wikis.items()} + + code_protocol_pairs = { + f"{k}": f"{urlparse(w.server).scheme}" for k, w in self.wikis.items() + } + + class Family(family.Family): # noqa: D101 + """The family definition for the wiki.""" + + name = "%(name)s" + langs = code_hostname_pairs + + def scriptpath(self, code: str) -> str: + return code_path_pairs[code] + + def protocol(self, code: str) -> str: + return code_protocol_pairs[code] + + self.family_definition = Family + + +@functools.lru_cache(maxsize=None) +def generate_family_class(url: str, name: str) -> type[family.Family]: + """Generate a family file for a given URL and name. + + Args: + url: The URL of the wiki. + name: The short name of the wiki (customizable by the user). + + Returns: + The family definition. + + Raises: + ValueError: If the family definition was not generated. + """ + + generator = FamilyFileGeneratorInMemory(url, name, "Y", "Y") + generator.run() + if generator.family_definition is None: + raise ValueError("Family definition was not generated.") + return generator.family_definition + + +def family_class_dispatch(url: str, name: str) -> type[family.Family]: + """Find or generate a family class for a given URL and name. + + Args: + url: The URL of the wiki. + name: The short name of the wiki (customizable by the user). + + """ + if "wikipedia" in url: + import pywikibot.families.wikipedia_family # type: ignore[import-untyped] + + return pywikibot.families.wikipedia_family.Family + # TODO: Support additional families pre-defined in `pywikibot.families.*_family.py` files + return generate_family_class(url, name) + + +if __name__ == "__main__": + url = "fallout.fandom.com/wiki/Fallout_Wiki" + name = "falloutfandom" + + categories: list[str] = [] + pages = ["Fallout: New Vegas"] + recursion_depth = 1 + family_type = generate_family_class(url, name) + + site = pywikibot.Site(fam=family_type(), code="en") + categories = [ + pywikibot.Category(site, f"Category:{category.replace(' ', '_')}") + for category in categories + ] + pages = [pywikibot.Page(site, page) for page in pages] + all_pages = itertools.chain( + pages, + *[ + pagegenerators.CategorizedPageGenerator(category, recurse=recursion_depth) + for category in categories + ], + ) + for page in all_pages: + print(page.title()) + print(page.text[:1000]) diff --git a/backend/danswer/connectors/mediawiki/wiki.py b/backend/danswer/connectors/mediawiki/wiki.py new file mode 100644 index 000000000..2283d8130 --- /dev/null +++ b/backend/danswer/connectors/mediawiki/wiki.py @@ -0,0 +1,225 @@ +from __future__ import annotations + +import datetime +import itertools +from collections.abc import Generator +from typing import Any +from typing import ClassVar + +import pywikibot.time # type: ignore[import-untyped] +from pywikibot import pagegenerators # type: ignore[import-untyped] +from pywikibot import textlib # type: ignore[import-untyped] + +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import DocumentSource +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.interfaces import PollConnector +from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.mediawiki.family import family_class_dispatch +from danswer.connectors.models import Document +from danswer.connectors.models import Section + + +def pywikibot_timestamp_to_utc_datetime( + timestamp: pywikibot.time.Timestamp, +) -> datetime.datetime: + """Convert a pywikibot timestamp to a datetime object in UTC. + + Args: + timestamp: The pywikibot timestamp to convert. + + Returns: + A datetime object in UTC. + """ + return datetime.datetime.astimezone(timestamp, tz=datetime.timezone.utc) + + +def get_doc_from_page( + page: pywikibot.Page, site: pywikibot.Site | None, source_type: DocumentSource +) -> Document: + """Generate Danswer Document from a MediaWiki page object. + + Args: + page: Page from a MediaWiki site. + site: MediaWiki site (used to parse the sections of the page using the site template, if available). + source_type: Source of the document. + + Returns: + Generated document. + """ + page_text = page.text + sections_extracted: textlib.Content = textlib.extract_sections(page_text, site) + + sections = [ + Section( + link=f"{page.full_url()}#" + section.heading.replace(" ", "_"), + text=section.title + section.content, + ) + for section in sections_extracted.sections + ] + sections.append( + Section( + link=page.full_url(), + text=sections_extracted.header, + ) + ) + + return Document( + source=source_type, + title=page.title(), + doc_updated_at=pywikibot_timestamp_to_utc_datetime( + page.latest_revision.timestamp + ), + sections=sections, + semantic_identifier=page.title(), + metadata={"categories": [category.title() for category in page.categories()]}, + id=page.pageid, + ) + + +class MediaWikiConnector(LoadConnector, PollConnector): + """A connector for MediaWiki wikis. + + Args: + hostname: The hostname of the wiki. + categories: The categories to include in the index. + pages: The pages to include in the index. + recurse_depth: The depth to recurse into categories. -1 means unbounded recursion. + connector_name: The name of the connector. + language_code: The language code of the wiki. + batch_size: The batch size for loading documents. + + Raises: + ValueError: If `recurse_depth` is not an integer greater than or equal to -1. + """ + + document_source_type: ClassVar[DocumentSource] = DocumentSource.MEDIAWIKI + """DocumentSource type for all documents generated by instances of this class. Can be overridden for connectors + tailored for specific sites.""" + + def __init__( + self, + hostname: str, + categories: list[str], + pages: list[str], + recurse_depth: int, + connector_name: str, + language_code: str = "en", + batch_size: int = INDEX_BATCH_SIZE, + ) -> None: + if recurse_depth < -1: + raise ValueError( + f"recurse_depth must be an integer greater than or equal to -1. Got {recurse_depth} instead." + ) + # -1 means infinite recursion, which `pywikibot` will only do with `True` + self.recurse_depth: bool | int = True if recurse_depth == -1 else recurse_depth + + self.batch_size = batch_size + + # short names can only have ascii letters and digits + self.connector_name = connector_name + connector_name = "".join(ch for ch in connector_name if ch.isalnum()) + + self.family = family_class_dispatch(hostname, connector_name)() + self.site = pywikibot.Site(fam=self.family, code=language_code) + self.categories = [ + pywikibot.Category(self.site, f"Category:{category.replace(' ', '_')}") + for category in categories + ] + self.pages = [pywikibot.Page(self.site, page) for page in pages] + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + """Load credentials for a MediaWiki site. + + Note: + For most read-only operations, MediaWiki API credentials are not necessary. + This method can be overridden in the event that a particular MediaWiki site + requires credentials. + """ + return None + + def _get_doc_batch( + self, + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, + ) -> Generator[list[Document], None, None]: + """Request batches of pages from a MediaWiki site. + + Args: + start: The beginning of the time period of pages to request. + end: The end of the time period of pages to request. + + Yields: + Lists of Documents containing each parsed page in a batch. + """ + doc_batch: list[Document] = [] + + # Pywikibot can handle batching for us, including only loading page contents when we finally request them. + category_pages = [ + pagegenerators.PreloadingGenerator( + pagegenerators.EdittimeFilterPageGenerator( + pagegenerators.CategorizedPageGenerator( + category, recurse=self.recurse_depth + ), + last_edit_start=datetime.datetime.fromtimestamp(start) + if start + else None, + last_edit_end=datetime.datetime.fromtimestamp(end) if end else None, + ), + groupsize=self.batch_size, + ) + for category in self.categories + ] + + # Since we can specify both individual pages and categories, we need to iterate over all of them. + all_pages = itertools.chain(self.pages, *category_pages) + for page in all_pages: + doc_batch.append( + get_doc_from_page(page, self.site, self.document_source_type) + ) + if len(doc_batch) >= self.batch_size: + yield doc_batch + doc_batch = [] + if doc_batch: + yield doc_batch + + def load_from_state(self) -> GenerateDocumentsOutput: + """Load all documents from the source. + + Returns: + A generator of documents. + """ + return self.poll_source(None, None) + + def poll_source( + self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None + ) -> GenerateDocumentsOutput: + """Poll the source for new documents. + + Args: + start: The start of the time range to poll. + end: The end of the time range to poll. + + Returns: + A generator of documents. + """ + return self._get_doc_batch(start, end) + + +if __name__ == "__main__": + HOSTNAME = "fallout.fandom.com" + test_connector = MediaWikiConnector( + connector_name="Fallout", + hostname=HOSTNAME, + categories=["Fallout:_New_Vegas_factions"], + pages=["Fallout: New Vegas"], + recurse_depth=1, + ) + + all_docs = list(test_connector.load_from_state()) + print("All docs", all_docs) + current = datetime.datetime.now().timestamp() + one_day_ago = current - 30 * 24 * 60 * 60 # 30 days + latest_docs = list(test_connector.poll_source(one_day_ago, current)) + print("Latest docs", latest_docs) diff --git a/backend/danswer/connectors/models.py b/backend/danswer/connectors/models.py index dc29833df..85df7bfc9 100644 --- a/backend/danswer/connectors/models.py +++ b/backend/danswer/connectors/models.py @@ -1,5 +1,6 @@ from datetime import datetime from enum import Enum +from typing import Any from pydantic import BaseModel @@ -61,6 +62,34 @@ class BasicExpertInfo(BaseModel): return "Unknown" + def __eq__(self, other: Any) -> bool: + if not isinstance(other, BasicExpertInfo): + return False + return ( + self.display_name, + self.first_name, + self.middle_initial, + self.last_name, + self.email, + ) == ( + other.display_name, + other.first_name, + other.middle_initial, + other.last_name, + other.email, + ) + + def __hash__(self) -> int: + return hash( + ( + self.display_name, + self.first_name, + self.middle_initial, + self.last_name, + self.email, + ) + ) + class DocumentBase(BaseModel): """Used for Danswer ingestion api, the ID is inferred before use if not provided""" diff --git a/backend/danswer/connectors/notion/connector.py b/backend/danswer/connectors/notion/connector.py index 4bb81dc57..cadf10b32 100644 --- a/backend/danswer/connectors/notion/connector.py +++ b/backend/danswer/connectors/notion/connector.py @@ -7,12 +7,14 @@ from datetime import timezone from typing import Any from typing import Optional -import requests from retry import retry from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.app_configs import NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP from danswer.configs.constants import DocumentSource +from danswer.connectors.cross_connector_utils.rate_limit_wrapper import ( + rl_requests, +) from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector @@ -84,7 +86,7 @@ class NotionConnector(LoadConnector, PollConnector): self.indexed_pages: set[str] = set() self.root_page_id = root_page_id # if enabled, will recursively index child pages as they are found rather - # relying entirely on the `search` API. We have recieved reports that the + # relying entirely on the `search` API. We have received reports that the # `search` API misses many pages - in those cases, this might need to be # turned on. It's not currently known why/when this is required. # NOTE: this also removes all benefits polling, since we need to traverse @@ -93,12 +95,14 @@ class NotionConnector(LoadConnector, PollConnector): self.recursive_index_enabled = recursive_index_enabled or self.root_page_id @retry(tries=3, delay=1, backoff=2) - def _fetch_blocks(self, block_id: str, cursor: str | None = None) -> dict[str, Any]: + def _fetch_child_blocks( + self, block_id: str, cursor: str | None = None + ) -> dict[str, Any] | None: """Fetch all child blocks via the Notion API.""" logger.debug(f"Fetching children of block with ID '{block_id}'") block_url = f"https://api.notion.com/v1/blocks/{block_id}/children" query_params = None if not cursor else {"start_cursor": cursor} - res = requests.get( + res = rl_requests.get( block_url, headers=self.headers, params=query_params, @@ -107,6 +111,15 @@ class NotionConnector(LoadConnector, PollConnector): try: res.raise_for_status() except Exception as e: + if res.status_code == 404: + # this happens when a page is not shared with the integration + # in this case, we should just ignore the page + logger.error( + f"Unable to access block with ID '{block_id}'. " + f"This is likely due to the block not being shared " + f"with the Danswer integration. Exact exception:\n\n{e}" + ) + return None logger.exception(f"Error fetching blocks - {res.json()}") raise e return res.json() @@ -116,7 +129,7 @@ class NotionConnector(LoadConnector, PollConnector): """Fetch a page from it's ID via the Notion API.""" logger.debug(f"Fetching page for ID '{page_id}'") block_url = f"https://api.notion.com/v1/pages/{page_id}" - res = requests.get( + res = rl_requests.get( block_url, headers=self.headers, timeout=_NOTION_CALL_TIMEOUT, @@ -136,7 +149,7 @@ class NotionConnector(LoadConnector, PollConnector): logger.debug(f"Fetching database for ID '{database_id}'") block_url = f"https://api.notion.com/v1/databases/{database_id}/query" body = None if not cursor else {"start_cursor": cursor} - res = requests.post( + res = rl_requests.post( block_url, headers=self.headers, json=body, @@ -187,29 +200,43 @@ class NotionConnector(LoadConnector, PollConnector): return result_pages def _read_blocks( - self, page_block_id: str + self, base_block_id: str ) -> tuple[list[tuple[str, str]], list[str]]: - """Reads blocks for a page""" + """Reads all child blocks for the specified block""" result_lines: list[tuple[str, str]] = [] child_pages: list[str] = [] cursor = None while True: - data = self._fetch_blocks(page_block_id, cursor) + data = self._fetch_child_blocks(base_block_id, cursor) + + # this happens when a block is not shared with the integration + if data is None: + return result_lines, child_pages for result in data["results"]: - logger.debug(f"Found block for page '{page_block_id}': {result}") + logger.debug( + f"Found child block for block with ID '{base_block_id}': {result}" + ) result_block_id = result["id"] result_type = result["type"] result_obj = result[result_type] if result_type == "ai_block": logger.warning( - f"Skipping 'ai_block' ('{result_block_id}') for page '{page_block_id}': " + f"Skipping 'ai_block' ('{result_block_id}') for base block '{base_block_id}': " f"Notion API does not currently support reading AI blocks (as of 24/02/09) " f"(discussion: https://github.com/danswer-ai/danswer/issues/1053)" ) continue + if result_type == "unsupported": + logger.warning( + f"Skipping unsupported block type '{result_type}' " + f"('{result_block_id}') for base block '{base_block_id}': " + f"(discussion: https://github.com/danswer-ai/danswer/issues/1230)" + ) + continue + cur_result_text_arr = [] if "rich_text" in result_obj: for rich_text in result_obj["rich_text"]: @@ -310,7 +337,7 @@ class NotionConnector(LoadConnector, PollConnector): """Search for pages from a Notion database. Includes some small number of retries to handle misc, flakey failures.""" logger.debug(f"Searching for pages in Notion with query_dict: {query_dict}") - res = requests.post( + res = rl_requests.post( "https://api.notion.com/v1/search", headers=self.headers, json=query_dict, @@ -416,8 +443,10 @@ class NotionConnector(LoadConnector, PollConnector): ) if len(pages) > 0: yield from batch_generator(self._read_pages(pages), self.batch_size) - if db_res.has_more: - query_dict["start_cursor"] = db_res.next_cursor + if db_res.has_more: + query_dict["start_cursor"] = db_res.next_cursor + else: + break else: break diff --git a/backend/danswer/connectors/productboard/connector.py b/backend/danswer/connectors/productboard/connector.py index 1c013f42b..9ef301aa7 100644 --- a/backend/danswer/connectors/productboard/connector.py +++ b/backend/danswer/connectors/productboard/connector.py @@ -207,7 +207,7 @@ class ProductboardConnector(PollConnector): ): return True else: - logger.error(f"Unable to find updated_at for document '{document.id}'") + logger.debug(f"Unable to find updated_at for document '{document.id}'") return False diff --git a/backend/danswer/connectors/sharepoint/connector.py b/backend/danswer/connectors/sharepoint/connector.py index 56ac2829e..0c7497d0c 100644 --- a/backend/danswer/connectors/sharepoint/connector.py +++ b/backend/danswer/connectors/sharepoint/connector.py @@ -1,22 +1,16 @@ import io import os -import tempfile from datetime import datetime from datetime import timezone from typing import Any -import docx # type: ignore import msal # type: ignore -import openpyxl # type: ignore -import pptx # type: ignore from office365.graph_client import GraphClient # type: ignore from office365.onedrive.driveitems.driveItem import DriveItem # type: ignore from office365.onedrive.sites.site import Site # type: ignore from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource -from danswer.connectors.cross_connector_utils.file_utils import is_text_file_extension -from danswer.connectors.cross_connector_utils.file_utils import read_pdf_file from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector @@ -25,6 +19,12 @@ from danswer.connectors.models import BasicExpertInfo from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.file_processing.extract_file_text import docx_to_text +from danswer.file_processing.extract_file_text import file_io_to_text +from danswer.file_processing.extract_file_text import is_text_file_extension +from danswer.file_processing.extract_file_text import pdf_to_text +from danswer.file_processing.extract_file_text import pptx_to_text +from danswer.file_processing.extract_file_text import xlsx_to_text from danswer.utils.logger import setup_logger UNSUPPORTED_FILE_TYPE_CONTENT = "" # idea copied from the google drive side of things @@ -35,62 +35,28 @@ logger = setup_logger() def get_text_from_xlsx_driveitem(driveitem_object: DriveItem) -> str: file_content = driveitem_object.get_content().execute_query().value - excel_file = io.BytesIO(file_content) - workbook = openpyxl.load_workbook(excel_file, read_only=True) - - full_text = [] - for sheet in workbook.worksheets: - sheet_string = "\n".join( - ",".join(map(str, row)) - for row in sheet.iter_rows(min_row=1, values_only=True) - ) - full_text.append(sheet_string) - - return "\n".join(full_text) + return xlsx_to_text(file=io.BytesIO(file_content)) def get_text_from_docx_driveitem(driveitem_object: DriveItem) -> str: file_content = driveitem_object.get_content().execute_query().value - full_text = [] - - with tempfile.TemporaryDirectory() as local_path: - with open(os.path.join(local_path, driveitem_object.name), "wb") as local_file: - local_file.write(file_content) - doc = docx.Document(local_file.name) - for para in doc.paragraphs: - full_text.append(para.text) - return "\n".join(full_text) + return docx_to_text(file=io.BytesIO(file_content)) def get_text_from_pdf_driveitem(driveitem_object: DriveItem) -> str: file_content = driveitem_object.get_content().execute_query().value - file_text = read_pdf_file( - file=io.BytesIO(file_content), file_name=driveitem_object.name - ) + file_text = pdf_to_text(file=io.BytesIO(file_content)) return file_text def get_text_from_txt_driveitem(driveitem_object: DriveItem) -> str: file_content: bytes = driveitem_object.get_content().execute_query().value - text_string = file_content.decode("utf-8") - return text_string + return file_io_to_text(file=io.BytesIO(file_content)) def get_text_from_pptx_driveitem(driveitem_object: DriveItem) -> str: file_content = driveitem_object.get_content().execute_query().value - pptx_stream = io.BytesIO(file_content) - with tempfile.NamedTemporaryFile() as temp: - temp.write(pptx_stream.getvalue()) - presentation = pptx.Presentation(temp.name) - extracted_text = "" - for slide_number, slide in enumerate(presentation.slides, start=1): - extracted_text += f"\nSlide {slide_number}:\n" - - for shape in slide.shapes: - if hasattr(shape, "text"): - extracted_text += shape.text + "\n" - - return extracted_text + return pptx_to_text(file=io.BytesIO(file_content)) class SharepointConnector(LoadConnector, PollConnector): @@ -141,7 +107,7 @@ class SharepointConnector(LoadConnector, PollConnector): site_list_objects = site_object.lists.get().execute_query() for site_list_object in site_list_objects: try: - query = site_list_object.drive.root.get_files(True) + query = site_list_object.drive.root.get_files(True, 1000) if filter_str: query = query.filter(filter_str) driveitems = query.execute_query() @@ -186,10 +152,11 @@ class SharepointConnector(LoadConnector, PollConnector): end=end, ) - # goes over all urls, converts them into Document objects and then yjelds them in batches + # goes over all urls, converts them into Document objects and then yields them in batches doc_batch: list[Document] = [] batch_count = 0 for driveitem_object in driveitem_list: + logger.debug(f"Processing: {driveitem_object.web_url}") doc_batch.append( self.convert_driveitem_object_to_document(driveitem_object) ) diff --git a/backend/danswer/connectors/web/connector.py b/backend/danswer/connectors/web/connector.py index 99a1abb3a..1a0c7e39d 100644 --- a/backend/danswer/connectors/web/connector.py +++ b/backend/danswer/connectors/web/connector.py @@ -1,4 +1,6 @@ import io +import ipaddress +import socket from enum import Enum from typing import Any from typing import cast @@ -18,13 +20,14 @@ from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_ID from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_CLIENT_SECRET from danswer.configs.app_configs import WEB_CONNECTOR_OAUTH_TOKEN_URL +from danswer.configs.app_configs import WEB_CONNECTOR_VALIDATE_URLS from danswer.configs.constants import DocumentSource -from danswer.connectors.cross_connector_utils.file_utils import read_pdf_file -from danswer.connectors.cross_connector_utils.html_utils import web_html_cleanup from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.file_processing.extract_file_text import pdf_to_text +from danswer.file_processing.html_utils import web_html_cleanup from danswer.utils.logger import setup_logger logger = setup_logger() @@ -41,6 +44,48 @@ class WEB_CONNECTOR_VALID_SETTINGS(str, Enum): UPLOAD = "upload" +def protected_url_check(url: str) -> None: + """Couple considerations: + - DNS mapping changes over time so we don't want to cache the results + - Fetching this is assumed to be relatively fast compared to other bottlenecks like reading + the page or embedding the contents + - To be extra safe, all IPs associated with the URL must be global + - This is to prevent misuse and not explicit attacks + """ + if not WEB_CONNECTOR_VALIDATE_URLS: + return + + parse = urlparse(url) + if parse.scheme != "http" and parse.scheme != "https": + raise ValueError("URL must be of scheme https?://") + + if not parse.hostname: + raise ValueError("URL must include a hostname") + + try: + # This may give a large list of IP addresses for domains with extensive DNS configurations + # such as large distributed systems of CDNs + info = socket.getaddrinfo(parse.hostname, None) + except socket.gaierror as e: + raise ConnectionError(f"DNS resolution failed for {parse.hostname}: {e}") + + for address in info: + ip = address[4][0] + if not ipaddress.ip_address(ip).is_global: + raise ValueError( + f"Non-global IP address detected: {ip}, skipping page {url}. " + f"The Web Connector is not allowed to read loopback, link-local, or private ranges" + ) + + +def check_internet_connection(url: str) -> None: + try: + response = requests.get(url, timeout=3) + response.raise_for_status() + except (requests.RequestException, ValueError): + raise Exception(f"Unable to reach {url} - check your internet connection") + + def is_valid_url(url: str) -> bool: try: result = urlparse(url) @@ -100,9 +145,16 @@ def extract_urls_from_sitemap(sitemap_url: str) -> list[str]: response.raise_for_status() soup = BeautifulSoup(response.content, "html.parser") - urls = [loc_tag.text for loc_tag in soup.find_all("loc")] + return [ + _ensure_absolute_url(sitemap_url, loc_tag.text) + for loc_tag in soup.find_all("loc") + ] - return urls + +def _ensure_absolute_url(source_url: str, maybe_relative_url: str) -> str: + if not urlparse(maybe_relative_url).netloc: + return urljoin(source_url, maybe_relative_url) + return maybe_relative_url def _ensure_valid_url(url: str) -> str: @@ -141,6 +193,10 @@ class WebConnector(LoadConnector): self.to_visit_list = extract_urls_from_sitemap(_ensure_valid_url(base_url)) elif web_connector_type == WEB_CONNECTOR_VALID_SETTINGS.UPLOAD: + logger.warning( + "This is not a UI supported Web Connector flow, " + "are you sure you want to do this?" + ) self.to_visit_list = _read_urls_file(base_url) else: @@ -161,6 +217,10 @@ class WebConnector(LoadConnector): base_url = to_visit[0] # For the recursive case doc_batch: list[Document] = [] + # Needed to report error + at_least_one_doc = False + last_error = None + playwright, context = start_playwright() restart_playwright = False while to_visit: @@ -169,9 +229,17 @@ class WebConnector(LoadConnector): continue visited_links.add(current_url) + try: + protected_url_check(current_url) + except Exception as e: + last_error = f"Invalid URL {current_url} due to {e}" + logger.warning(last_error) + continue + logger.info(f"Visiting {current_url}") try: + check_internet_connection(current_url) if restart_playwright: playwright, context = start_playwright() restart_playwright = False @@ -179,9 +247,7 @@ class WebConnector(LoadConnector): if current_url.split(".")[-1] == "pdf": # PDF files are not checked for links response = requests.get(current_url) - page_text = read_pdf_file( - file=io.BytesIO(response.content), file_name=current_url - ) + page_text = pdf_to_text(file=io.BytesIO(response.content)) doc_batch.append( Document( @@ -195,10 +261,11 @@ class WebConnector(LoadConnector): continue page = context.new_page() - page.goto(current_url) + page_response = page.goto(current_url) final_page = page.url if final_page != current_url: logger.info(f"Redirected to {final_page}") + protected_url_check(final_page) current_url = final_page if current_url in visited_links: logger.info("Redirected page already indexed") @@ -214,6 +281,11 @@ class WebConnector(LoadConnector): if link not in visited_links: to_visit.append(link) + if page_response and str(page_response.status)[0] in ("4", "5"): + last_error = f"Skipped indexing {current_url} due to HTTP {page_response.status} response" + logger.info(last_error) + continue + parsed_html = web_html_cleanup(soup, self.mintlify_cleanup) doc_batch.append( @@ -230,7 +302,8 @@ class WebConnector(LoadConnector): page.close() except Exception as e: - logger.error(f"Failed to fetch '{current_url}': {e}") + last_error = f"Failed to fetch '{current_url}': {e}" + logger.error(last_error) playwright.stop() restart_playwright = True continue @@ -238,13 +311,20 @@ class WebConnector(LoadConnector): if len(doc_batch) >= self.batch_size: playwright.stop() restart_playwright = True + at_least_one_doc = True yield doc_batch doc_batch = [] if doc_batch: playwright.stop() + at_least_one_doc = True yield doc_batch + if not at_least_one_doc: + if last_error: + raise RuntimeError(last_error) + raise RuntimeError("No valid pages found.") + if __name__ == "__main__": connector = WebConnector("https://docs.danswer.dev/") diff --git a/backend/danswer/connectors/wikipedia/__init__.py b/backend/danswer/connectors/wikipedia/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/danswer/connectors/wikipedia/connector.py b/backend/danswer/connectors/wikipedia/connector.py new file mode 100644 index 000000000..2788c22c1 --- /dev/null +++ b/backend/danswer/connectors/wikipedia/connector.py @@ -0,0 +1,30 @@ +from typing import ClassVar + +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import DocumentSource +from danswer.connectors.mediawiki import wiki + + +class WikipediaConnector(wiki.MediaWikiConnector): + """Connector for Wikipedia.""" + + document_source_type: ClassVar[DocumentSource] = DocumentSource.WIKIPEDIA + + def __init__( + self, + categories: list[str], + pages: list[str], + recurse_depth: int, + connector_name: str, + language_code: str = "en", + batch_size: int = INDEX_BATCH_SIZE, + ) -> None: + super().__init__( + hostname="wikipedia.org", + categories=categories, + pages=pages, + recurse_depth=recurse_depth, + connector_name=connector_name, + language_code=language_code, + batch_size=batch_size, + ) diff --git a/backend/danswer/connectors/zendesk/connector.py b/backend/danswer/connectors/zendesk/connector.py index 5e03b6be0..fc9b703c6 100644 --- a/backend/danswer/connectors/zendesk/connector.py +++ b/backend/danswer/connectors/zendesk/connector.py @@ -5,8 +5,9 @@ from zenpy.lib.api_objects.help_centre_objects import Article # type: ignore from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource -from danswer.connectors.cross_connector_utils.html_utils import parse_html_page_basic -from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc +from danswer.connectors.cross_connector_utils.miscellaneous_utils import ( + time_str_to_utc, +) from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector @@ -14,6 +15,7 @@ from danswer.connectors.interfaces import SecondsSinceUnixEpoch from danswer.connectors.models import BasicExpertInfo from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.file_processing.html_utils import parse_html_page_basic def _article_to_document(article: Article) -> Document: @@ -21,6 +23,8 @@ def _article_to_document(article: Article) -> Document: display_name=article.author.name, email=article.author.email ) update_time = time_str_to_utc(article.updated_at) + labels = [str(label) for label in article.label_names] + return Document( id=f"article:{article.id}", sections=[ @@ -30,7 +34,7 @@ def _article_to_document(article: Article) -> Document: semantic_identifier=article.title, doc_updated_at=update_time, primary_owners=[author], - metadata={"type": "article"}, + metadata={"labels": labels} if labels else {}, ) @@ -45,8 +49,15 @@ class ZendeskConnector(LoadConnector, PollConnector): self.zendesk_client: Zenpy | None = None def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + # Subdomain is actually the whole URL + subdomain = ( + credentials["zendesk_subdomain"] + .replace("https://", "") + .split(".zendesk.com")[0] + ) + self.zendesk_client = Zenpy( - subdomain=credentials["zendesk_subdomain"], + subdomain=subdomain, email=credentials["zendesk_email"], token=credentials["zendesk_token"], ) @@ -70,7 +81,7 @@ class ZendeskConnector(LoadConnector, PollConnector): ) doc_batch = [] for article in articles: - if article.body is None: + if article.body is None or article.draft: continue doc_batch.append(_article_to_document(article)) diff --git a/backend/danswer/danswerbot/slack/blocks.py b/backend/danswer/danswerbot/slack/blocks.py index 8851ecf45..a881819c5 100644 --- a/backend/danswer/danswerbot/slack/blocks.py +++ b/backend/danswer/danswerbot/slack/blocks.py @@ -1,15 +1,20 @@ +import re from datetime import datetime +from re import Match import pytz import timeago # type: ignore from slack_sdk.models.blocks import ActionsBlock from slack_sdk.models.blocks import Block from slack_sdk.models.blocks import ButtonElement +from slack_sdk.models.blocks import ContextBlock from slack_sdk.models.blocks import DividerBlock from slack_sdk.models.blocks import HeaderBlock from slack_sdk.models.blocks import Option from slack_sdk.models.blocks import RadioButtonsElement from slack_sdk.models.blocks import SectionBlock +from slack_sdk.models.blocks.basic_components import MarkdownTextObject +from slack_sdk.models.blocks.block_elements import ImageElement from danswer.chat.models import DanswerQuote from danswer.configs.app_configs import DISABLE_GENERATIVE_AI @@ -22,6 +27,7 @@ from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_ACTION_ID from danswer.danswerbot.slack.constants import FOLLOWUP_BUTTON_RESOLVED_ACTION_ID from danswer.danswerbot.slack.constants import IMMEDIATE_RESOLVED_BUTTON_ACTION_ID from danswer.danswerbot.slack.constants import LIKE_BLOCK_ACTION_ID +from danswer.danswerbot.slack.icons import source_to_github_img_link from danswer.danswerbot.slack.utils import build_feedback_id from danswer.danswerbot.slack.utils import remove_slack_text_interactions from danswer.danswerbot.slack.utils import translate_vespa_highlight_to_slack @@ -29,22 +35,89 @@ from danswer.search.models import SavedSearchDoc from danswer.utils.text_processing import decode_escapes from danswer.utils.text_processing import replace_whitespaces_w_space -_MAX_BLURB_LEN = 75 +_MAX_BLURB_LEN = 45 -def build_qa_feedback_block(message_id: int) -> Block: +def get_feedback_reminder_blocks(thread_link: str, include_followup: bool) -> Block: + text = ( + f"Please provide feedback on <{thread_link}|this answer>. " + "This is essential to help us to improve the quality of the answers. " + "Please rate it by clicking the `Helpful` or `Not helpful` button. " + ) + if include_followup: + text += "\n\nIf you need more help, click the `I need more help from a human!` button. " + + text += "\n\nThanks!" + + return SectionBlock(text=text) + + +def _process_citations_for_slack(text: str) -> str: + """ + Converts instances of [[x]](LINK) in the input text to Slack's link format . + + Args: + - text (str): The input string containing markdown links. + + Returns: + - str: The string with markdown links converted to Slack format. + """ + # Regular expression to find all instances of [[x]](LINK) + pattern = r"\[\[(.*?)\]\]\((.*?)\)" + + # Function to replace each found instance with Slack's format + def slack_link_format(match: Match) -> str: + link_text = match.group(1) + link_url = match.group(2) + return f"<{link_url}|[{link_text}]>" + + # Substitute all matches in the input text + return re.sub(pattern, slack_link_format, text) + + +def _split_text(text: str, limit: int = 3000) -> list[str]: + if len(text) <= limit: + return [text] + + chunks = [] + while text: + if len(text) <= limit: + chunks.append(text) + break + + # Find the nearest space before the limit to avoid splitting a word + split_at = text.rfind(" ", 0, limit) + if split_at == -1: # No spaces found, force split + split_at = limit + + chunk = text[:split_at] + chunks.append(chunk) + text = text[split_at:].lstrip() # Remove leading spaces from the next chunk + + return chunks + + +def clean_markdown_link_text(text: str) -> str: + # Remove any newlines within the text + return text.replace("\n", " ").strip() + + +def build_qa_feedback_block( + message_id: int, feedback_reminder_id: str | None = None +) -> Block: return ActionsBlock( block_id=build_feedback_id(message_id), elements=[ ButtonElement( action_id=LIKE_BLOCK_ACTION_ID, - text="👍", + text="👍 Helpful", style="primary", + value=feedback_reminder_id, ), ButtonElement( action_id=DISLIKE_BLOCK_ACTION_ID, - text="👎", - style="danger", + text="👎 Not helpful", + value=feedback_reminder_id, ), ], ) @@ -164,6 +237,80 @@ def build_documents_blocks( return section_blocks +def build_sources_blocks( + cited_documents: list[tuple[int, SavedSearchDoc]], + num_docs_to_display: int = DANSWER_BOT_NUM_DOCS_TO_DISPLAY, +) -> list[Block]: + if not cited_documents: + return [ + SectionBlock( + text="*Warning*: no sources were cited for this answer, so it may be unreliable 😔" + ) + ] + + seen_docs_identifiers = set() + section_blocks: list[Block] = [SectionBlock(text="*Sources:*")] + included_docs = 0 + for citation_num, d in cited_documents: + if d.document_id in seen_docs_identifiers: + continue + seen_docs_identifiers.add(d.document_id) + + doc_sem_id = d.semantic_identifier + if d.source_type == DocumentSource.SLACK.value: + # for legacy reasons, before the switch to how Slack semantic identifiers are constructed + if "#" not in doc_sem_id: + doc_sem_id = "#" + doc_sem_id + + # this is needed to try and prevent the line from overflowing + # if it does overflow, the image gets placed above the title and it + # looks bad + doc_sem_id = ( + doc_sem_id[:_MAX_BLURB_LEN] + "..." + if len(doc_sem_id) > _MAX_BLURB_LEN + else doc_sem_id + ) + + owner_str = f"By {d.primary_owners[0]}" if d.primary_owners else None + days_ago_str = ( + timeago.format(d.updated_at, datetime.now(pytz.utc)) + if d.updated_at + else None + ) + final_metadata_str = " | ".join( + ([owner_str] if owner_str else []) + + ([days_ago_str] if days_ago_str else []) + ) + + document_title = clean_markdown_link_text(doc_sem_id) + img_link = source_to_github_img_link(d.source_type) + + section_blocks.append( + ContextBlock( + elements=( + [ + ImageElement( + image_url=img_link, + alt_text=f"{d.source_type.value} logo", + ) + ] + if img_link + else [] + ) + + [ + MarkdownTextObject( + text=f"*<{d.link}|[{citation_num}] {document_title}>*\n{final_metadata_str}" + ), + ] + ) + ) + + if included_docs >= num_docs_to_display: + break + + return section_blocks + + def build_quotes_block( quotes: list[DanswerQuote], ) -> list[Block]: @@ -214,15 +361,15 @@ def build_qa_response_blocks( time_cutoff: datetime | None, favor_recent: bool, skip_quotes: bool = False, + process_message_for_citations: bool = False, skip_ai_feedback: bool = False, + feedback_reminder_id: str | None = None, ) -> list[Block]: if DISABLE_GENERATIVE_AI: return [] quotes_blocks: list[Block] = [] - ai_answer_header = HeaderBlock(text="AI Answer") - filter_block: Block | None = None if time_cutoff or favor_recent or source_filters: filter_text = "Filters: " @@ -242,12 +389,18 @@ def build_qa_response_blocks( filter_block = SectionBlock(text=f"_{filter_text}_") if not answer: - answer_block = SectionBlock( - text="Sorry, I was unable to find an answer, but I did find some potentially relevant docs 🤓" - ) + answer_blocks = [ + SectionBlock( + text="Sorry, I was unable to find an answer, but I did find some potentially relevant docs 🤓" + ) + ] else: answer_processed = decode_escapes(remove_slack_text_interactions(answer)) - answer_block = SectionBlock(text=answer_processed) + if process_message_for_citations: + answer_processed = _process_citations_for_slack(answer_processed) + answer_blocks = [ + SectionBlock(text=text) for text in _split_text(answer_processed) + ] if quotes: quotes_blocks = build_quotes_block(quotes) @@ -259,19 +412,22 @@ def build_qa_response_blocks( ) ] - response_blocks: list[Block] = [ai_answer_header] + response_blocks: list[Block] = [] if filter_block is not None: response_blocks.append(filter_block) - response_blocks.append(answer_block) + response_blocks.extend(answer_blocks) if message_id is not None and not skip_ai_feedback: - response_blocks.append(build_qa_feedback_block(message_id=message_id)) + response_blocks.append( + build_qa_feedback_block( + message_id=message_id, feedback_reminder_id=feedback_reminder_id + ) + ) if not skip_quotes: response_blocks.extend(quotes_blocks) - response_blocks.append(DividerBlock()) return response_blocks diff --git a/backend/danswer/danswerbot/slack/constants.py b/backend/danswer/danswerbot/slack/constants.py index a4930b593..1e524025f 100644 --- a/backend/danswer/danswerbot/slack/constants.py +++ b/backend/danswer/danswerbot/slack/constants.py @@ -1,3 +1,5 @@ +from enum import Enum + LIKE_BLOCK_ACTION_ID = "feedback-like" DISLIKE_BLOCK_ACTION_ID = "feedback-dislike" FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID = "feedback-doc-button" @@ -6,3 +8,9 @@ FOLLOWUP_BUTTON_ACTION_ID = "followup-button" FOLLOWUP_BUTTON_RESOLVED_ACTION_ID = "followup-resolved-button" SLACK_CHANNEL_ID = "channel_id" VIEW_DOC_FEEDBACK_ID = "view-doc-feedback" + + +class FeedbackVisibility(str, Enum): + PRIVATE = "private" + ANONYMOUS = "anonymous" + PUBLIC = "public" diff --git a/backend/danswer/danswerbot/slack/handlers/handle_buttons.py b/backend/danswer/danswerbot/slack/handlers/handle_buttons.py index 0ca030612..3a0209b07 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_buttons.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_buttons.py @@ -15,13 +15,18 @@ from danswer.danswerbot.slack.blocks import build_follow_up_resolved_blocks from danswer.danswerbot.slack.blocks import get_document_feedback_blocks from danswer.danswerbot.slack.config import get_slack_bot_config_for_channel from danswer.danswerbot.slack.constants import DISLIKE_BLOCK_ACTION_ID +from danswer.danswerbot.slack.constants import FeedbackVisibility from danswer.danswerbot.slack.constants import LIKE_BLOCK_ACTION_ID from danswer.danswerbot.slack.constants import VIEW_DOC_FEEDBACK_ID +from danswer.danswerbot.slack.handlers.handle_message import ( + remove_scheduled_feedback_reminder, +) from danswer.danswerbot.slack.utils import build_feedback_id from danswer.danswerbot.slack.utils import decompose_action_id from danswer.danswerbot.slack.utils import fetch_groupids_from_names from danswer.danswerbot.slack.utils import fetch_userids_from_emails from danswer.danswerbot.slack.utils import get_channel_name_from_id +from danswer.danswerbot.slack.utils import get_feedback_visibility from danswer.danswerbot.slack.utils import respond_in_thread from danswer.danswerbot.slack.utils import update_emote_react from danswer.db.engine import get_sqlalchemy_engine @@ -70,6 +75,7 @@ def handle_doc_feedback_button( def handle_slack_feedback( feedback_id: str, feedback_type: str, + feedback_msg_reminder: str, client: WebClient, user_id_to_post_confirmation: str, channel_id_to_post_confirmation: str, @@ -88,6 +94,11 @@ def handle_slack_feedback( user_id=None, # no "user" for Slack bot for now db_session=db_session, ) + remove_scheduled_feedback_reminder( + client=client, + channel=user_id_to_post_confirmation, + msg_id=feedback_msg_reminder, + ) elif feedback_type in [ SearchFeedbackType.ENDORSE.value, SearchFeedbackType.REJECT.value, @@ -120,13 +131,33 @@ def handle_slack_feedback( else: logger_base.error(f"Feedback type '{feedback_type}' not supported") - # post message to slack confirming that feedback was received - client.chat_postEphemeral( - channel=channel_id_to_post_confirmation, - user=user_id_to_post_confirmation, - thread_ts=thread_ts_to_post_confirmation, - text="Thanks for your feedback!", - ) + if get_feedback_visibility() == FeedbackVisibility.PRIVATE or feedback_type not in [ + LIKE_BLOCK_ACTION_ID, + DISLIKE_BLOCK_ACTION_ID, + ]: + client.chat_postEphemeral( + channel=channel_id_to_post_confirmation, + user=user_id_to_post_confirmation, + thread_ts=thread_ts_to_post_confirmation, + text="Thanks for your feedback!", + ) + else: + feedback_response_txt = ( + "liked" if feedback_type == LIKE_BLOCK_ACTION_ID else "disliked" + ) + + if get_feedback_visibility() == FeedbackVisibility.ANONYMOUS: + msg = f"A user has {feedback_response_txt} the AI Answer" + else: + msg = f"<@{user_id_to_post_confirmation}> has {feedback_response_txt} the AI Answer" + + respond_in_thread( + client=client, + channel=channel_id_to_post_confirmation, + text=msg, + thread_ts=thread_ts_to_post_confirmation, + unfurl=False, + ) def handle_followup_button( diff --git a/backend/danswer/danswerbot/slack/handlers/handle_message.py b/backend/danswer/danswerbot/slack/handlers/handle_message.py index 2bd9759ff..90b3b354c 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_message.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_message.py @@ -1,3 +1,4 @@ +import datetime import functools import logging from collections.abc import Callable @@ -9,22 +10,28 @@ from typing import TypeVar from retry import retry from slack_sdk import WebClient from slack_sdk.errors import SlackApiError +from slack_sdk.models.blocks import DividerBlock +from slack_sdk.models.blocks import SectionBlock from sqlalchemy.orm import Session -from danswer.chat.chat_utils import compute_max_document_tokens +from danswer.configs.app_configs import DISABLE_GENERATIVE_AI from danswer.configs.danswerbot_configs import DANSWER_BOT_ANSWER_GENERATION_TIMEOUT from danswer.configs.danswerbot_configs import DANSWER_BOT_DISABLE_COT from danswer.configs.danswerbot_configs import DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER from danswer.configs.danswerbot_configs import DANSWER_BOT_DISPLAY_ERROR_MSGS +from danswer.configs.danswerbot_configs import DANSWER_BOT_FEEDBACK_REMINDER from danswer.configs.danswerbot_configs import DANSWER_BOT_NUM_RETRIES from danswer.configs.danswerbot_configs import DANSWER_BOT_TARGET_CHUNK_PERCENTAGE +from danswer.configs.danswerbot_configs import DANSWER_BOT_USE_QUOTES +from danswer.configs.danswerbot_configs import DANSWER_FOLLOWUP_EMOJI from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI from danswer.configs.danswerbot_configs import DISABLE_DANSWER_BOT_FILTER_DETECT from danswer.configs.danswerbot_configs import ENABLE_DANSWERBOT_REFLEXION -from danswer.configs.model_configs import GEN_AI_MODEL_VERSION from danswer.danswerbot.slack.blocks import build_documents_blocks from danswer.danswerbot.slack.blocks import build_follow_up_block from danswer.danswerbot.slack.blocks import build_qa_response_blocks +from danswer.danswerbot.slack.blocks import build_sources_blocks +from danswer.danswerbot.slack.blocks import get_feedback_reminder_blocks from danswer.danswerbot.slack.blocks import get_restate_blocks from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID from danswer.danswerbot.slack.models import SlackMessageInfo @@ -35,7 +42,14 @@ from danswer.danswerbot.slack.utils import slack_usage_report from danswer.danswerbot.slack.utils import SlackRateLimiter from danswer.danswerbot.slack.utils import update_emote_react from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.models import Persona from danswer.db.models import SlackBotConfig +from danswer.db.models import SlackBotResponseType +from danswer.db.persona import fetch_persona_by_id +from danswer.llm.answering.prompts.citations_prompt import ( + compute_max_document_tokens_for_persona, +) +from danswer.llm.factory import get_llm_for_persona from danswer.llm.utils import check_number_of_tokens from danswer.llm.utils import get_max_input_tokens from danswer.one_shot_answer.answer_question import get_search_answer @@ -45,6 +59,7 @@ from danswer.search.models import BaseFilters from danswer.search.models import OptionalSearchSetting from danswer.search.models import RetrievalDetails from danswer.utils.logger import setup_logger +from shared_configs.configs import ENABLE_RERANKING_ASYNC_FLOW logger_base = setup_logger() @@ -92,10 +107,75 @@ def send_msg_ack_to_user(details: SlackMessageInfo, client: WebClient) -> None: ) +def schedule_feedback_reminder( + details: SlackMessageInfo, include_followup: bool, client: WebClient +) -> str | None: + logger = cast( + logging.Logger, + ChannelIdAdapter( + logger_base, extra={SLACK_CHANNEL_ID: details.channel_to_respond} + ), + ) + if not DANSWER_BOT_FEEDBACK_REMINDER: + logger.info("Scheduled feedback reminder disabled...") + return None + + try: + permalink = client.chat_getPermalink( + channel=details.channel_to_respond, + message_ts=details.msg_to_respond, # type:ignore + ) + except SlackApiError as e: + logger.error(f"Unable to generate the feedback reminder permalink: {e}") + return None + + now = datetime.datetime.now() + future = now + datetime.timedelta(minutes=DANSWER_BOT_FEEDBACK_REMINDER) + + try: + response = client.chat_scheduleMessage( + channel=details.sender, # type:ignore + post_at=int(future.timestamp()), + blocks=[ + get_feedback_reminder_blocks( + thread_link=permalink.data["permalink"], # type:ignore + include_followup=include_followup, + ) + ], + text="", + ) + logger.info("Scheduled feedback reminder configured") + return response.data["scheduled_message_id"] # type:ignore + except SlackApiError as e: + logger.error(f"Unable to generate the feedback reminder message: {e}") + return None + + +def remove_scheduled_feedback_reminder( + client: WebClient, channel: str | None, msg_id: str +) -> None: + logger = cast( + logging.Logger, + ChannelIdAdapter(logger_base, extra={SLACK_CHANNEL_ID: channel}), + ) + + try: + client.chat_deleteScheduledMessage( + channel=channel, scheduled_message_id=msg_id # type:ignore + ) + logger.info("Scheduled feedback reminder deleted") + except SlackApiError as e: + if e.response["error"] == "invalid_scheduled_message_id": + logger.info( + "Unable to delete the scheduled message. It must have already been posted" + ) + + def handle_message( message_info: SlackMessageInfo, channel_config: SlackBotConfig | None, client: WebClient, + feedback_reminder_id: str | None, num_retries: int = DANSWER_BOT_NUM_RETRIES, answer_generation_timeout: int = DANSWER_BOT_ANSWER_GENERATION_TIMEOUT, should_respond_with_error_msgs: bool = DANSWER_BOT_DISPLAY_ERROR_MSGS, @@ -137,6 +217,13 @@ def handle_message( should_respond_even_with_no_docs = persona.num_chunks == 0 if persona else False + # figure out if we want to use citations or quotes + use_citations = ( + not DANSWER_BOT_USE_QUOTES + if channel_config is None + else channel_config.response_type == SlackBotResponseType.CITATIONS + ) + # List of user id to send message to, if None, send to everyone in channel send_to: list[str] | None = None respond_tag_only = False @@ -210,7 +297,7 @@ def handle_message( logger=logger, ) @rate_limits(client=client, channel=channel, thread_ts=message_ts_to_respond_to) - def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse: + def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | None: action = "slack_message" if is_bot_msg: action = "slack_slash_message" @@ -223,32 +310,41 @@ def handle_message( max_document_tokens: int | None = None max_history_tokens: int | None = None - if len(new_message_request.messages) > 1: - llm_name = GEN_AI_MODEL_VERSION - if persona and persona.llm_model_version_override: - llm_name = persona.llm_model_version_override - - # In cases of threads, split the available tokens between docs and thread context - input_tokens = get_max_input_tokens(model_name=llm_name) - max_history_tokens = int(input_tokens * thread_context_percent) - - remaining_tokens = input_tokens - max_history_tokens - - query_text = new_message_request.messages[0].message - if persona: - max_document_tokens = compute_max_document_tokens( - persona=persona, - actual_user_input=query_text, - max_llm_token_override=remaining_tokens, - ) - else: - max_document_tokens = ( - remaining_tokens - - 512 # Needs to be more than any of the QA prompts - - check_number_of_tokens(query_text) - ) with Session(get_sqlalchemy_engine()) as db_session: + if len(new_message_request.messages) > 1: + persona = cast( + Persona, + fetch_persona_by_id(db_session, new_message_request.persona_id), + ) + llm = get_llm_for_persona(persona) + + # In cases of threads, split the available tokens between docs and thread context + input_tokens = get_max_input_tokens( + model_name=llm.config.model_name, + model_provider=llm.config.model_provider, + ) + max_history_tokens = int(input_tokens * thread_context_percent) + + remaining_tokens = input_tokens - max_history_tokens + + query_text = new_message_request.messages[0].message + if persona: + max_document_tokens = compute_max_document_tokens_for_persona( + persona=persona, + actual_user_input=query_text, + max_llm_token_override=remaining_tokens, + ) + else: + max_document_tokens = ( + remaining_tokens + - 512 # Needs to be more than any of the QA prompts + - check_number_of_tokens(query_text) + ) + + if DISABLE_GENERATIVE_AI: + return None + # This also handles creating the query event in postgres answer = get_search_answer( query_req=new_message_request, @@ -259,6 +355,8 @@ def handle_message( answer_generation_timeout=answer_generation_timeout, enable_reflexion=reflexion, bypass_acl=bypass_acl, + use_citations=use_citations, + danswerbot_flow=True, ) if not answer.error_msg: return answer @@ -296,6 +394,7 @@ def handle_message( persona_id=persona.id if persona is not None else 0, retrieval_options=retrieval_details, chain_of_thought=not disable_cot, + skip_rerank=not ENABLE_RERANKING_ASYNC_FLOW, ) ) except Exception as e: @@ -328,6 +427,46 @@ def handle_message( return True + # Edge case handling, for tracking down the Slack usage issue + if answer is None: + assert DISABLE_GENERATIVE_AI is True + try: + respond_in_thread( + client=client, + channel=channel, + receiver_ids=send_to, + text="Hello! Danswer has some results for you!", + blocks=[ + SectionBlock( + text="Danswer is down for maintenance.\nWe're working hard on recharging the AI!" + ) + ], + thread_ts=message_ts_to_respond_to, + # don't unfurl, since otherwise we will have 5+ previews which makes the message very long + unfurl=False, + ) + + # For DM (ephemeral message), we need to create a thread via a normal message so the user can see + # the ephemeral message. This also will give the user a notification which ephemeral message does not. + if respond_team_member_list: + respond_in_thread( + client=client, + channel=channel, + text=( + "👋 Hi, we've just gathered and forwarded the relevant " + + "information to the team. They'll get back to you shortly!" + ), + thread_ts=message_ts_to_respond_to, + ) + + return False + + except Exception: + logger.exception( + f"Unable to process message - could not respond in slack in {num_retries} attempts" + ) + return True + # Got an answer at this point, can remove reaction and give results try: update_emote_react( @@ -344,6 +483,14 @@ def handle_message( logger.info( "Answer was evaluated to be invalid, throwing it away without responding." ) + update_emote_react( + emoji=DANSWER_FOLLOWUP_EMOJI, + channel=message_info.channel_to_respond, + message_ts=message_info.msg_to_respond, + remove=False, + client=client, + ) + if answer.answer: logger.debug(answer.answer) return True @@ -387,7 +534,11 @@ def handle_message( source_filters=retrieval_info.applied_source_filters, time_cutoff=retrieval_info.applied_time_cutoff, favor_recent=retrieval_info.recency_bias_multiplier > 1, - skip_quotes=persona is not None, # currently Personas don't support quotes + # currently Personas don't support quotes + # if citations are enabled, also don't use quotes + skip_quotes=persona is not None or use_citations, + process_message_for_citations=use_citations, + feedback_reminder_id=feedback_reminder_id, ) # Get the chunks fed to the LLM only, then fill with other docs @@ -397,16 +548,33 @@ def handle_message( doc for idx, doc in enumerate(top_docs) if idx not in llm_doc_inds ] priority_ordered_docs = llm_docs + remaining_docs - document_blocks = ( - build_documents_blocks( + + document_blocks = [] + citations_block = [] + # if citations are enabled, only show cited documents + if use_citations: + citations = answer.citations or [] + cited_docs = [] + for citation in citations: + matching_doc = next( + (d for d in top_docs if d.document_id == citation.document_id), + None, + ) + if matching_doc: + cited_docs.append((citation.citation_num, matching_doc)) + + cited_docs.sort() + citations_block = build_sources_blocks(cited_documents=cited_docs) + elif priority_ordered_docs: + document_blocks = build_documents_blocks( documents=priority_ordered_docs, message_id=answer.chat_message_id, ) - if priority_ordered_docs - else [] - ) + document_blocks = [DividerBlock()] + document_blocks - all_blocks = restate_question_block + answer_blocks + document_blocks + all_blocks = ( + restate_question_block + answer_blocks + citations_block + document_blocks + ) if channel_conf and channel_conf.get("follow_up_tags") is not None: all_blocks.append(build_follow_up_block(message_id=answer.chat_message_id)) diff --git a/backend/danswer/danswerbot/slack/icons.py b/backend/danswer/danswerbot/slack/icons.py new file mode 100644 index 000000000..d2e8ea917 --- /dev/null +++ b/backend/danswer/danswerbot/slack/icons.py @@ -0,0 +1,58 @@ +from danswer.configs.constants import DocumentSource + + +def source_to_github_img_link(source: DocumentSource) -> str | None: + # TODO: store these images somewhere better + if source == DocumentSource.WEB.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Web.png" + if source == DocumentSource.FILE.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png" + if source == DocumentSource.GOOGLE_SITES.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleSites.png" + if source == DocumentSource.SLACK.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Slack.png" + if source == DocumentSource.GMAIL.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gmail.png" + if source == DocumentSource.GOOGLE_DRIVE.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/GoogleDrive.png" + if source == DocumentSource.GITHUB.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Github.png" + if source == DocumentSource.GITLAB.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gitlab.png" + if source == DocumentSource.CONFLUENCE.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Confluence.png" + if source == DocumentSource.JIRA.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Jira.png" + if source == DocumentSource.NOTION.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Notion.png" + if source == DocumentSource.ZENDESK.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Zendesk.png" + if source == DocumentSource.GONG.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Gong.png" + if source == DocumentSource.LINEAR.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Linear.png" + if source == DocumentSource.PRODUCTBOARD.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Productboard.webp" + if source == DocumentSource.SLAB.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/SlabLogo.png" + if source == DocumentSource.ZULIP.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Zulip.png" + if source == DocumentSource.GURU.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/Guru.png" + if source == DocumentSource.HUBSPOT.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/HubSpot.png" + if source == DocumentSource.DOCUMENT360.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Document360.png" + if source == DocumentSource.BOOKSTACK.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Bookstack.png" + if source == DocumentSource.LOOPIO.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Loopio.png" + if source == DocumentSource.SHAREPOINT.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/main/web/public/Sharepoint.png" + if source == DocumentSource.REQUESTTRACKER.value: + # just use file icon for now + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png" + if source == DocumentSource.INGESTION_API.value: + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png" + + return "https://raw.githubusercontent.com/danswer-ai/danswer/improve-slack-flow/backend/slackbot_images/File.png" diff --git a/backend/danswer/danswerbot/slack/listener.py b/backend/danswer/danswerbot/slack/listener.py index fc7055577..ce7c9eda2 100644 --- a/backend/danswer/danswerbot/slack/listener.py +++ b/backend/danswer/danswerbot/slack/listener.py @@ -3,7 +3,6 @@ from threading import Event from typing import Any from typing import cast -import nltk # type: ignore from slack_sdk import WebClient from slack_sdk.socket_mode import SocketModeClient from slack_sdk.socket_mode.request import SocketModeRequest @@ -13,7 +12,6 @@ from sqlalchemy.orm import Session from danswer.configs.constants import MessageType from danswer.configs.danswerbot_configs import DANSWER_BOT_RESPOND_EVERY_CHANNEL from danswer.configs.danswerbot_configs import NOTIFY_SLACKBOT_NO_ANSWER -from danswer.configs.model_configs import ENABLE_RERANKING_ASYNC_FLOW from danswer.danswerbot.slack.config import get_slack_bot_config_for_channel from danswer.danswerbot.slack.constants import DISLIKE_BLOCK_ACTION_ID from danswer.danswerbot.slack.constants import FEEDBACK_DOC_BUTTON_BLOCK_ACTION_ID @@ -30,6 +28,10 @@ from danswer.danswerbot.slack.handlers.handle_buttons import ( ) from danswer.danswerbot.slack.handlers.handle_buttons import handle_slack_feedback from danswer.danswerbot.slack.handlers.handle_message import handle_message +from danswer.danswerbot.slack.handlers.handle_message import ( + remove_scheduled_feedback_reminder, +) +from danswer.danswerbot.slack.handlers.handle_message import schedule_feedback_reminder from danswer.danswerbot.slack.models import SlackMessageInfo from danswer.danswerbot.slack.tokens import fetch_tokens from danswer.danswerbot.slack.utils import ChannelIdAdapter @@ -43,9 +45,12 @@ from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.engine import get_sqlalchemy_engine from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.one_shot_answer.models import ThreadMessage -from danswer.search.search_nlp_models import warm_up_models +from danswer.search.retrieval.search_runner import download_nltk_data +from danswer.search.search_nlp_models import warm_up_encoders from danswer.server.manage.models import SlackBotTokens from danswer.utils.logger import setup_logger +from shared_configs.configs import MODEL_SERVER_HOST +from shared_configs.configs import MODEL_SERVER_PORT logger = setup_logger() @@ -152,6 +157,7 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool ) return False + logger.debug(f"Handling Slack request with Payload: '{req.payload}'") return True @@ -159,6 +165,7 @@ def process_feedback(req: SocketModeRequest, client: SocketModeClient) -> None: if actions := req.payload.get("actions"): action = cast(dict[str, Any], actions[0]) feedback_type = cast(str, action.get("action_id")) + feedback_msg_reminder = cast(str, action.get("value")) feedback_id = cast(str, action.get("block_id")) channel_id = cast(str, req.payload["container"]["channel_id"]) thread_ts = cast(str, req.payload["container"]["thread_ts"]) @@ -171,6 +178,7 @@ def process_feedback(req: SocketModeRequest, client: SocketModeClient) -> None: handle_slack_feedback( feedback_id=feedback_id, feedback_type=feedback_type, + feedback_msg_reminder=feedback_msg_reminder, client=client.web_client, user_id_to_post_confirmation=user_id, channel_id_to_post_confirmation=channel_id, @@ -285,15 +293,32 @@ def process_message( ): return + follow_up = bool( + slack_bot_config + and slack_bot_config.channel_config + and slack_bot_config.channel_config.get("follow_up_tags") is not None + ) + feedback_reminder_id = schedule_feedback_reminder( + details=details, client=client.web_client, include_followup=follow_up + ) + failed = handle_message( message_info=details, channel_config=slack_bot_config, client=client.web_client, + feedback_reminder_id=feedback_reminder_id, ) - # Skipping answering due to pre-filtering is not considered a failure - if failed and notify_no_answer: - apologize_for_fail(details, client) + if failed: + if feedback_reminder_id: + remove_scheduled_feedback_reminder( + client=client.web_client, + channel=details.sender, + msg_id=feedback_reminder_id, + ) + # Skipping answering due to pre-filtering is not considered a failure + if notify_no_answer: + apologize_for_fail(details, client) def acknowledge_message(req: SocketModeRequest, client: SocketModeClient) -> None: @@ -374,8 +399,7 @@ if __name__ == "__main__": socket_client: SocketModeClient | None = None logger.info("Verifying query preprocessing (NLTK) data is downloaded") - nltk.download("stopwords", quiet=True) - nltk.download("punkt", quiet=True) + download_nltk_data() while True: try: @@ -390,10 +414,11 @@ if __name__ == "__main__": with Session(get_sqlalchemy_engine()) as db_session: embedding_model = get_current_db_embedding_model(db_session) - warm_up_models( + warm_up_encoders( model_name=embedding_model.model_name, normalize=embedding_model.normalize, - skip_cross_encoders=not ENABLE_RERANKING_ASYNC_FLOW, + model_server_host=MODEL_SERVER_HOST, + model_server_port=MODEL_SERVER_PORT, ) slack_bot_tokens = latest_slack_bot_tokens diff --git a/backend/danswer/danswerbot/slack/tokens.py b/backend/danswer/danswerbot/slack/tokens.py index c9c128628..16014574a 100644 --- a/backend/danswer/danswerbot/slack/tokens.py +++ b/backend/danswer/danswerbot/slack/tokens.py @@ -1,7 +1,7 @@ import os from typing import cast -from danswer.dynamic_configs import get_dynamic_config_store +from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.server.manage.models import SlackBotTokens @@ -26,6 +26,5 @@ def save_tokens( ) -> None: dynamic_config_store = get_dynamic_config_store() dynamic_config_store.store( - key=_SLACK_BOT_TOKENS_CONFIG_KEY, - val=dict(tokens), + key=_SLACK_BOT_TOKENS_CONFIG_KEY, val=dict(tokens), encrypt=True ) diff --git a/backend/danswer/danswerbot/slack/utils.py b/backend/danswer/danswerbot/slack/utils.py index 753897e10..5895dc52f 100644 --- a/backend/danswer/danswerbot/slack/utils.py +++ b/backend/danswer/danswerbot/slack/utils.py @@ -18,11 +18,13 @@ from sqlalchemy.orm import Session from danswer.configs.app_configs import DISABLE_TELEMETRY from danswer.configs.constants import ID_SEPARATOR from danswer.configs.constants import MessageType +from danswer.configs.danswerbot_configs import DANSWER_BOT_FEEDBACK_VISIBILITY from danswer.configs.danswerbot_configs import DANSWER_BOT_MAX_QPM from danswer.configs.danswerbot_configs import DANSWER_BOT_MAX_WAIT_TIME from danswer.configs.danswerbot_configs import DANSWER_BOT_NUM_RETRIES from danswer.connectors.slack.utils import make_slack_api_rate_limited from danswer.connectors.slack.utils import SlackTextCleaner +from danswer.danswerbot.slack.constants import FeedbackVisibility from danswer.danswerbot.slack.constants import SLACK_CHANNEL_ID from danswer.danswerbot.slack.tokens import fetch_tokens from danswer.db.engine import get_sqlalchemy_engine @@ -346,8 +348,12 @@ def read_slack_thread( if len(blocks) <= 1: continue - # The useful block is the second one after the header block that says AI Answer - message = reply["blocks"][1]["text"]["text"] + # For the old flow, the useful block is the second one after the header block that says AI Answer + if reply["blocks"][0]["text"]["text"] == "AI Answer": + message = reply["blocks"][1]["text"]["text"] + else: + # for the new flow, the answer is the first block + message = reply["blocks"][0]["text"]["text"] if message.startswith("_Filters"): if len(blocks) <= 2: @@ -445,3 +451,10 @@ class SlackRateLimiter: self.refill() del self.waiting_questions[0] + + +def get_feedback_visibility() -> FeedbackVisibility: + try: + return FeedbackVisibility(DANSWER_BOT_FEEDBACK_VISIBILITY.lower()) + except ValueError: + return FeedbackVisibility.PRIVATE diff --git a/backend/danswer/db/auth.py b/backend/danswer/db/auth.py index 1883e8abd..6d726c2f9 100644 --- a/backend/danswer/db/auth.py +++ b/backend/danswer/db/auth.py @@ -3,8 +3,8 @@ from typing import Any from typing import Dict from fastapi import Depends -from fastapi_users.db import SQLAlchemyUserDatabase from fastapi_users.models import UP +from fastapi_users_db_sqlalchemy import SQLAlchemyUserDatabase from fastapi_users_db_sqlalchemy.access_token import SQLAlchemyAccessTokenDatabase from sqlalchemy import func from sqlalchemy.ext.asyncio import AsyncSession diff --git a/backend/danswer/db/chat.py b/backend/danswer/db/chat.py index 921f4f7df..f0361da30 100644 --- a/backend/danswer/db/chat.py +++ b/backend/danswer/db/chat.py @@ -1,25 +1,41 @@ from collections.abc import Sequence +from functools import lru_cache from uuid import UUID +from fastapi import HTTPException from sqlalchemy import delete +from sqlalchemy import func from sqlalchemy import not_ from sqlalchemy import nullsfirst from sqlalchemy import or_ from sqlalchemy import select +from sqlalchemy import update from sqlalchemy.exc import MultipleResultsFound from sqlalchemy.orm import Session +from danswer.auth.schemas import UserRole from danswer.configs.chat_configs import HARD_DELETE_CHATS from danswer.configs.constants import MessageType from danswer.db.constants import SLACK_BOT_PERSONA_PREFIX +from danswer.db.engine import get_sqlalchemy_engine from danswer.db.models import ChatMessage from danswer.db.models import ChatSession +from danswer.db.models import ChatSessionSharedStatus from danswer.db.models import DocumentSet as DBDocumentSet from danswer.db.models import Persona +from danswer.db.models import Persona__User +from danswer.db.models import Persona__UserGroup from danswer.db.models import Prompt from danswer.db.models import SearchDoc from danswer.db.models import SearchDoc as DBSearchDoc -from danswer.search.models import RecencyBiasSetting +from danswer.db.models import StarterMessage +from danswer.db.models import Tool +from danswer.db.models import User +from danswer.db.models import User__UserGroup +from danswer.file_store.models import FileDescriptor +from danswer.llm.override_models import LLMOverride +from danswer.llm.override_models import PromptOverride +from danswer.search.enums import RecencyBiasSetting from danswer.search.models import RetrievalDocs from danswer.search.models import SavedSearchDoc from danswer.search.models import SearchDoc as ServerSearchDoc @@ -30,11 +46,23 @@ logger = setup_logger() def get_chat_session_by_id( - chat_session_id: int, user_id: UUID | None, db_session: Session + chat_session_id: int, + user_id: UUID | None, + db_session: Session, + include_deleted: bool = False, + is_shared: bool = False, ) -> ChatSession: - stmt = select(ChatSession).where( - ChatSession.id == chat_session_id, ChatSession.user_id == user_id - ) + stmt = select(ChatSession).where(ChatSession.id == chat_session_id) + + if is_shared: + stmt = stmt.where(ChatSession.shared_status == ChatSessionSharedStatus.PUBLIC) + else: + # if user_id is None, assume this is an admin who should be able + # to view all chat sessions + if user_id is not None: + stmt = stmt.where( + or_(ChatSession.user_id == user_id, ChatSession.user_id.is_(None)) + ) result = db_session.execute(stmt) chat_session = result.scalar_one_or_none() @@ -42,7 +70,7 @@ def get_chat_session_by_id( if not chat_session: raise ValueError("Invalid Chat Session ID provided") - if chat_session.deleted: + if not include_deleted and chat_session.deleted: raise ValueError("Chat session has been deleted") return chat_session @@ -73,13 +101,19 @@ def create_chat_session( description: str, user_id: UUID | None, persona_id: int | None = None, + llm_override: LLMOverride | None = None, + prompt_override: PromptOverride | None = None, one_shot: bool = False, + danswerbot_flow: bool = False, ) -> ChatSession: chat_session = ChatSession( user_id=user_id, persona_id=persona_id, description=description, + llm_override=llm_override, + prompt_override=prompt_override, one_shot=one_shot, + danswerbot_flow=danswerbot_flow, ) db_session.add(chat_session) @@ -89,7 +123,11 @@ def create_chat_session( def update_chat_session( - user_id: UUID | None, chat_session_id: int, description: str, db_session: Session + db_session: Session, + user_id: UUID | None, + chat_session_id: int, + description: str | None = None, + sharing_status: ChatSessionSharedStatus | None = None, ) -> ChatSession: chat_session = get_chat_session_by_id( chat_session_id=chat_session_id, user_id=user_id, db_session=db_session @@ -98,7 +136,10 @@ def update_chat_session( if chat_session.deleted: raise ValueError("Trying to rename a deleted chat session") - chat_session.description = description + if description is not None: + chat_session.description = description + if sharing_status is not None: + chat_session.shared_status = sharing_status db_session.commit() @@ -220,6 +261,7 @@ def create_new_chat_message( token_count: int, message_type: MessageType, db_session: Session, + files: list[FileDescriptor] | None = None, rephrased_query: str | None = None, error: str | None = None, reference_docs: list[DBSearchDoc] | None = None, @@ -237,6 +279,7 @@ def create_new_chat_message( token_count=token_count, message_type=message_type, citations=citations, + files=files, error=error, ) @@ -277,15 +320,29 @@ def set_as_latest_chat_message( db_session.commit() +def attach_files_to_chat_message( + chat_message: ChatMessage, + files: list[FileDescriptor], + db_session: Session, + commit: bool = True, +) -> None: + chat_message.files = files + if commit: + db_session.commit() + + def get_prompt_by_id( prompt_id: int, - user_id: UUID | None, + user: User | None, db_session: Session, include_deleted: bool = False, ) -> Prompt: - stmt = select(Prompt).where( - Prompt.id == prompt_id, or_(Prompt.user_id == user_id, Prompt.user_id.is_(None)) - ) + stmt = select(Prompt).where(Prompt.id == prompt_id) + + # if user is not specified OR they are an admin, they should + # have access to all prompts, so this where clause is not needed + if user and user.role != UserRole.ADMIN: + stmt = stmt.where(or_(Prompt.user_id == user.id, Prompt.user_id.is_(None))) if not include_deleted: stmt = stmt.where(Prompt.deleted.is_(False)) @@ -301,16 +358,32 @@ def get_prompt_by_id( return prompt +@lru_cache() +def get_default_prompt() -> Prompt: + with Session(get_sqlalchemy_engine()) as db_session: + stmt = select(Prompt).where(Prompt.id == 0) + + result = db_session.execute(stmt) + prompt = result.scalar_one_or_none() + + if prompt is None: + raise RuntimeError("Default Prompt not found") + + return prompt + + def get_persona_by_id( persona_id: int, - # if user_id is `None` assume the user is an admin or auth is disabled - user_id: UUID | None, + # if user is `None` assume the user is an admin or auth is disabled + user: User | None, db_session: Session, include_deleted: bool = False, ) -> Persona: stmt = select(Persona).where(Persona.id == persona_id) - if user_id is not None: - stmt = stmt.where(or_(Persona.user_id == user_id, Persona.user_id.is_(None))) + + # if user is an admin, they should have access to all Personas + if user is not None and user.role != UserRole.ADMIN: + stmt = stmt.where(or_(Persona.user_id == user.id, Persona.user_id.is_(None))) if not include_deleted: stmt = stmt.where(Persona.deleted.is_(False)) @@ -326,6 +399,23 @@ def get_persona_by_id( return persona +def check_user_can_edit_persona(user: User | None, persona: Persona) -> None: + # if user is None, assume that no-auth is turned on + if user is None: + return + + # admins can edit everything + if user.role == UserRole.ADMIN: + return + + # otherwise, make sure user owns persona + if persona.user_id != user.id: + raise HTTPException( + status_code=403, + detail=f"User not authorized to edit persona with ID {persona.id}", + ) + + def get_prompts_by_ids(prompt_ids: list[int], db_session: Session) -> Sequence[Prompt]: """Unsafe, can fetch prompts from all users""" if not prompt_ids: @@ -349,33 +439,33 @@ def get_personas_by_ids( def get_prompt_by_name( - prompt_name: str, user_id: UUID | None, shared: bool, db_session: Session + prompt_name: str, user: User | None, db_session: Session ) -> Prompt | None: - """Cannot do shared and user owned simultaneously as there may be two of those""" stmt = select(Prompt).where(Prompt.name == prompt_name) - if shared: - stmt = stmt.where(Prompt.user_id.is_(None)) - else: - stmt = stmt.where(Prompt.user_id == user_id) + + # if user is not specified OR they are an admin, they should + # have access to all prompts, so this where clause is not needed + if user and user.role != UserRole.ADMIN: + stmt = stmt.where(Prompt.user_id == user.id) + result = db_session.execute(stmt).scalar_one_or_none() return result def get_persona_by_name( - persona_name: str, user_id: UUID | None, shared: bool, db_session: Session + persona_name: str, user: User | None, db_session: Session ) -> Persona | None: - """Cannot do shared and user owned simultaneously as there may be two of those""" + """Admins can see all, regular users can only fetch their own. + If user is None, assume the user is an admin or auth is disabled.""" stmt = select(Persona).where(Persona.name == persona_name) - if shared: - stmt = stmt.where(Persona.user_id.is_(None)) - else: - stmt = stmt.where(Persona.user_id == user_id) + if user and user.role != UserRole.ADMIN: + stmt = stmt.where(Persona.user_id == user.id) result = db_session.execute(stmt).scalar_one_or_none() return result def upsert_prompt( - user_id: UUID | None, + user: User | None, name: str, description: str, system_prompt: str, @@ -383,7 +473,6 @@ def upsert_prompt( include_citations: bool, datetime_aware: bool, personas: list[Persona] | None, - shared: bool, db_session: Session, prompt_id: int | None = None, default_prompt: bool = True, @@ -392,9 +481,7 @@ def upsert_prompt( if prompt_id is not None: prompt = db_session.query(Prompt).filter_by(id=prompt_id).first() else: - prompt = get_prompt_by_name( - prompt_name=name, user_id=user_id, shared=shared, db_session=db_session - ) + prompt = get_prompt_by_name(prompt_name=name, user=user, db_session=db_session) if prompt: if not default_prompt and prompt.default_prompt: @@ -415,7 +502,7 @@ def upsert_prompt( else: prompt = Prompt( id=prompt_id, - user_id=None if shared else user_id, + user_id=user.id if user else None, name=name, description=description, system_prompt=system_prompt, @@ -437,7 +524,7 @@ def upsert_prompt( def upsert_persona( - user_id: UUID | None, + user: User | None, name: str, description: str, num_chunks: float, @@ -446,9 +533,12 @@ def upsert_persona( recency_bias: RecencyBiasSetting, prompts: list[Prompt] | None, document_sets: list[DBDocumentSet] | None, + llm_model_provider_override: str | None, llm_model_version_override: str | None, - shared: bool, + starter_messages: list[StarterMessage] | None, + is_public: bool, db_session: Session, + tool_ids: list[int] | None = None, persona_id: int | None = None, default_persona: bool = False, commit: bool = True, @@ -457,13 +547,22 @@ def upsert_persona( persona = db_session.query(Persona).filter_by(id=persona_id).first() else: persona = get_persona_by_name( - persona_name=name, user_id=user_id, shared=shared, db_session=db_session + persona_name=name, user=user, db_session=db_session ) + # Fetch and attach tools by IDs + tools = None + if tool_ids is not None: + tools = db_session.query(Tool).filter(Tool.id.in_(tool_ids)).all() + if not tools and tool_ids: + raise ValueError("Tools not found") + if persona: if not default_persona and persona.default_persona: raise ValueError("Cannot update default persona with non-default.") + check_user_can_edit_persona(user=user, persona=persona) + persona.name = name persona.description = description persona.num_chunks = num_chunks @@ -471,8 +570,11 @@ def upsert_persona( persona.llm_filter_extraction = llm_filter_extraction persona.recency_bias = recency_bias persona.default_persona = default_persona + persona.llm_model_provider_override = llm_model_provider_override persona.llm_model_version_override = llm_model_version_override + persona.starter_messages = starter_messages persona.deleted = False # Un-delete if previously deleted + persona.is_public = is_public # Do not delete any associations manually added unless # a new updated list is provided @@ -484,10 +586,14 @@ def upsert_persona( persona.prompts.clear() persona.prompts = prompts + if tools is not None: + persona.tools = tools + else: persona = Persona( id=persona_id, - user_id=None if shared else user_id, + user_id=user.id if user else None, + is_public=is_public, name=name, description=description, num_chunks=num_chunks, @@ -497,7 +603,10 @@ def upsert_persona( default_persona=default_persona, prompts=prompts or [], document_sets=document_sets or [], + llm_model_provider_override=llm_model_provider_override, llm_model_version_override=llm_model_version_override, + starter_messages=starter_messages, + tools=tools or [], ) db_session.add(persona) @@ -512,25 +621,64 @@ def upsert_persona( def mark_prompt_as_deleted( prompt_id: int, - user_id: UUID | None, + user: User | None, db_session: Session, ) -> None: - prompt = get_prompt_by_id( - prompt_id=prompt_id, user_id=user_id, db_session=db_session - ) + prompt = get_prompt_by_id(prompt_id=prompt_id, user=user, db_session=db_session) prompt.deleted = True db_session.commit() def mark_persona_as_deleted( persona_id: int, - user_id: UUID | None, + user: User | None, + db_session: Session, +) -> None: + persona = get_persona_by_id(persona_id=persona_id, user=user, db_session=db_session) + persona.deleted = True + db_session.commit() + + +def mark_persona_as_not_deleted( + persona_id: int, + user: User | None, db_session: Session, ) -> None: persona = get_persona_by_id( - persona_id=persona_id, user_id=user_id, db_session=db_session + persona_id=persona_id, user=user, db_session=db_session, include_deleted=True ) - persona.deleted = True + if persona.deleted: + persona.deleted = False + db_session.commit() + else: + raise ValueError(f"Persona with ID {persona_id} is not deleted.") + + +def mark_delete_persona_by_name( + persona_name: str, db_session: Session, is_default: bool = True +) -> None: + stmt = ( + update(Persona) + .where(Persona.name == persona_name, Persona.default_persona == is_default) + .values(deleted=True) + ) + + db_session.execute(stmt) + db_session.commit() + + +def delete_old_default_personas( + db_session: Session, +) -> None: + """Note, this locks out the Summarize and Paraphrase personas for now + Need a more graceful fix later or those need to never have IDs""" + stmt = ( + update(Persona) + .where(Persona.default_persona, Persona.id > 0) + .values(deleted=True, name=func.concat(Persona.name, "_old")) + ) + + db_session.execute(stmt) db_session.commit() @@ -539,9 +687,7 @@ def update_persona_visibility( is_visible: bool, db_session: Session, ) -> None: - persona = get_persona_by_id( - persona_id=persona_id, user_id=None, db_session=db_session - ) + persona = get_persona_by_id(persona_id=persona_id, user=None, db_session=db_session) persona.is_visible = is_visible db_session.commit() @@ -588,9 +734,28 @@ def get_personas( include_slack_bot_personas: bool = False, include_deleted: bool = False, ) -> Sequence[Persona]: - stmt = select(Persona) + stmt = select(Persona).distinct() if user_id is not None: - stmt = stmt.where(or_(Persona.user_id == user_id, Persona.user_id.is_(None))) + # Subquery to find all groups the user belongs to + user_groups_subquery = ( + select(User__UserGroup.user_group_id) + .where(User__UserGroup.user_id == user_id) + .subquery() + ) + + # Include personas where the user is directly related or part of a user group that has access + access_conditions = or_( + Persona.is_public == True, # noqa: E712 + Persona.id.in_( # User has access through list of users with access + select(Persona__User.persona_id).where(Persona__User.user_id == user_id) + ), + Persona.id.in_( # User is part of a group that has access + select(Persona__UserGroup.persona_id).where( + Persona__UserGroup.user_group_id.in_(user_groups_subquery) # type: ignore + ) + ), + ) + stmt = stmt.where(access_conditions) if not include_default: stmt = stmt.where(Persona.default_persona.is_(False)) @@ -643,7 +808,8 @@ def create_db_search_doc( boost=server_search_doc.boost, hidden=server_search_doc.hidden, doc_metadata=server_search_doc.metadata, - score=server_search_doc.score, + # For docs further down that aren't reranked, we can't use the retrieval score + score=server_search_doc.score or 0.0, match_highlights=server_search_doc.match_highlights, updated_at=server_search_doc.updated_at, primary_owners=server_search_doc.primary_owners, @@ -664,6 +830,7 @@ def get_db_search_doc_by_id(doc_id: int, db_session: Session) -> DBSearchDoc | N def translate_db_search_doc_to_server_search_doc( db_search_doc: SearchDoc, + remove_doc_content: bool = False, ) -> SavedSearchDoc: return SavedSearchDoc( db_doc_id=db_search_doc.id, @@ -671,22 +838,30 @@ def translate_db_search_doc_to_server_search_doc( chunk_ind=db_search_doc.chunk_ind, semantic_identifier=db_search_doc.semantic_id, link=db_search_doc.link, - blurb=db_search_doc.blurb, + blurb=db_search_doc.blurb if not remove_doc_content else "", source_type=db_search_doc.source_type, boost=db_search_doc.boost, hidden=db_search_doc.hidden, - metadata=db_search_doc.doc_metadata, + metadata=db_search_doc.doc_metadata if not remove_doc_content else {}, score=db_search_doc.score, - match_highlights=db_search_doc.match_highlights, - updated_at=db_search_doc.updated_at, - primary_owners=db_search_doc.primary_owners, - secondary_owners=db_search_doc.secondary_owners, + match_highlights=db_search_doc.match_highlights + if not remove_doc_content + else [], + updated_at=db_search_doc.updated_at if not remove_doc_content else None, + primary_owners=db_search_doc.primary_owners if not remove_doc_content else [], + secondary_owners=db_search_doc.secondary_owners + if not remove_doc_content + else [], ) -def get_retrieval_docs_from_chat_message(chat_message: ChatMessage) -> RetrievalDocs: +def get_retrieval_docs_from_chat_message( + chat_message: ChatMessage, remove_doc_content: bool = False +) -> RetrievalDocs: top_documents = [ - translate_db_search_doc_to_server_search_doc(db_doc) + translate_db_search_doc_to_server_search_doc( + db_doc, remove_doc_content=remove_doc_content + ) for db_doc in chat_message.search_docs ] top_documents = sorted(top_documents, key=lambda doc: doc.score, reverse=True) # type: ignore @@ -694,7 +869,7 @@ def get_retrieval_docs_from_chat_message(chat_message: ChatMessage) -> Retrieval def translate_db_message_to_chat_message_detail( - chat_message: ChatMessage, + chat_message: ChatMessage, remove_doc_content: bool = False ) -> ChatMessageDetail: chat_msg_detail = ChatMessageDetail( message_id=chat_message.id, @@ -702,10 +877,25 @@ def translate_db_message_to_chat_message_detail( latest_child_message=chat_message.latest_child_message, message=chat_message.message, rephrased_query=chat_message.rephrased_query, - context_docs=get_retrieval_docs_from_chat_message(chat_message), + context_docs=get_retrieval_docs_from_chat_message( + chat_message, remove_doc_content=remove_doc_content + ), message_type=chat_message.message_type, time_sent=chat_message.time_sent, citations=chat_message.citations, + files=chat_message.files or [], ) return chat_msg_detail + + +def delete_persona_by_name( + persona_name: str, db_session: Session, is_default: bool = True +) -> None: + stmt = delete(Persona).where( + Persona.name == persona_name, Persona.default_persona == is_default + ) + + db_session.execute(stmt) + + db_session.commit() diff --git a/backend/danswer/db/connector_credential_pair.py b/backend/danswer/db/connector_credential_pair.py index 25c646072..31f2982db 100644 --- a/backend/danswer/db/connector_credential_pair.py +++ b/backend/danswer/db/connector_credential_pair.py @@ -4,7 +4,6 @@ from fastapi import HTTPException from sqlalchemy import delete from sqlalchemy import desc from sqlalchemy import select -from sqlalchemy import update from sqlalchemy.orm import Session from danswer.db.connector import fetch_connector_by_id @@ -96,7 +95,6 @@ def update_connector_credential_pair( db_session: Session, connector_id: int, credential_id: int, - attempt_status: IndexingStatus, net_docs: int | None = None, run_dt: datetime | None = None, ) -> None: @@ -107,13 +105,9 @@ def update_connector_credential_pair( f"and credential id {credential_id}" ) return - cc_pair.last_attempt_status = attempt_status # simply don't update last_successful_index_time if run_dt is not specified # at worst, this would result in re-indexing documents that were already indexed - if ( - attempt_status == IndexingStatus.SUCCESS - or attempt_status == IndexingStatus.IN_PROGRESS - ) and run_dt is not None: + if run_dt is not None: cc_pair.last_successful_index_time = run_dt if net_docs is not None: cc_pair.total_docs_indexed += net_docs @@ -132,20 +126,6 @@ def delete_connector_credential_pair__no_commit( db_session.execute(stmt) -def mark_all_in_progress_cc_pairs_failed( - db_session: Session, -) -> None: - stmt = ( - update(ConnectorCredentialPair) - .where( - ConnectorCredentialPair.last_attempt_status == IndexingStatus.IN_PROGRESS - ) - .values(last_attempt_status=IndexingStatus.FAILED) - ) - db_session.execute(stmt) - db_session.commit() - - def associate_default_cc_pair(db_session: Session) -> None: existing_association = ( db_session.query(ConnectorCredentialPair) @@ -297,12 +277,4 @@ def resync_cc_pair( last_success.time_started if last_success else None ) - last_run = find_latest_index_attempt( - connector_id=cc_pair.connector_id, - credential_id=cc_pair.credential_id, - only_include_success=False, - db_session=db_session, - ) - cc_pair.last_attempt_status = last_run.status if last_run else None - db_session.commit() diff --git a/backend/danswer/db/deletion_attempt.py b/backend/danswer/db/deletion_attempt.py index 00ae13d5b..b66e6f585 100644 --- a/backend/danswer/db/deletion_attempt.py +++ b/backend/danswer/db/deletion_attempt.py @@ -1,20 +1,52 @@ +from sqlalchemy.orm import Session + +from danswer.db.embedding_model import get_current_db_embedding_model +from danswer.db.index_attempt import get_last_attempt from danswer.db.models import ConnectorCredentialPair from danswer.db.models import IndexingStatus def check_deletion_attempt_is_allowed( connector_credential_pair: ConnectorCredentialPair, -) -> bool: + db_session: Session, + allow_scheduled: bool = False, +) -> str | None: """ To be deletable: (1) connector should be disabled (2) there should be no in-progress/planned index attempts + + Returns an error message if the deletion attempt is not allowed, otherwise None. """ - return bool( - connector_credential_pair.connector.disabled - and ( - connector_credential_pair.last_attempt_status != IndexingStatus.IN_PROGRESS - and connector_credential_pair.last_attempt_status - != IndexingStatus.NOT_STARTED - ) + base_error_msg = ( + f"Connector with ID '{connector_credential_pair.connector_id}' and credential ID " + f"'{connector_credential_pair.credential_id}' is not deletable." ) + + if not connector_credential_pair.connector.disabled: + return base_error_msg + " Connector must be paused." + + connector_id = connector_credential_pair.connector_id + credential_id = connector_credential_pair.credential_id + current_embedding_model = get_current_db_embedding_model(db_session) + + last_indexing = get_last_attempt( + connector_id=connector_id, + credential_id=credential_id, + embedding_model_id=current_embedding_model.id, + db_session=db_session, + ) + + if not last_indexing: + return None + + if last_indexing.status == IndexingStatus.IN_PROGRESS or ( + last_indexing.status == IndexingStatus.NOT_STARTED and not allow_scheduled + ): + return ( + base_error_msg + + " There is an ongoing / planned indexing attempt. " + + "The indexing attempt must be completed or cancelled before deletion." + ) + + return None diff --git a/backend/danswer/db/document.py b/backend/danswer/db/document.py index b1620fb60..1ff3391b7 100644 --- a/backend/danswer/db/document.py +++ b/backend/danswer/db/document.py @@ -1,4 +1,6 @@ +import contextlib import time +from collections.abc import Generator from collections.abc import Sequence from datetime import datetime from uuid import UUID @@ -9,15 +11,17 @@ from sqlalchemy import func from sqlalchemy import or_ from sqlalchemy import select from sqlalchemy.dialects.postgresql import insert +from sqlalchemy.engine.util import TransactionalContext +from sqlalchemy.exc import OperationalError from sqlalchemy.orm import Session from danswer.configs.constants import DEFAULT_BOOST -from danswer.db.feedback import delete_document_feedback_for_documents +from danswer.db.feedback import delete_document_feedback_for_documents__no_commit from danswer.db.models import ConnectorCredentialPair from danswer.db.models import Credential from danswer.db.models import Document as DbDocument from danswer.db.models import DocumentByConnectorCredentialPair -from danswer.db.tag import delete_document_tags_for_documents +from danswer.db.tag import delete_document_tags_for_documents__no_commit from danswer.db.utils import model_to_dict from danswer.document_index.interfaces import DocumentMetadata from danswer.server.documents.models import ConnectorCredentialPairIdentifier @@ -242,7 +246,7 @@ def upsert_documents_complete( ) -def delete_document_by_connector_credential_pair( +def delete_document_by_connector_credential_pair__no_commit( db_session: Session, document_ids: list[str], connector_credential_pair_identifier: ConnectorCredentialPairIdentifier @@ -263,19 +267,22 @@ def delete_document_by_connector_credential_pair( db_session.execute(stmt) -def delete_documents(db_session: Session, document_ids: list[str]) -> None: +def delete_documents__no_commit(db_session: Session, document_ids: list[str]) -> None: db_session.execute(delete(DbDocument).where(DbDocument.id.in_(document_ids))) -def delete_documents_complete(db_session: Session, document_ids: list[str]) -> None: +def delete_documents_complete__no_commit( + db_session: Session, document_ids: list[str] +) -> None: logger.info(f"Deleting {len(document_ids)} documents from the DB") - delete_document_by_connector_credential_pair(db_session, document_ids) - delete_document_feedback_for_documents( + delete_document_by_connector_credential_pair__no_commit(db_session, document_ids) + delete_document_feedback_for_documents__no_commit( document_ids=document_ids, db_session=db_session ) - delete_document_tags_for_documents(document_ids=document_ids, db_session=db_session) - delete_documents(db_session, document_ids) - db_session.commit() + delete_document_tags_for_documents__no_commit( + document_ids=document_ids, db_session=db_session + ) + delete_documents__no_commit(db_session, document_ids) def acquire_document_locks(db_session: Session, document_ids: list[str]) -> bool: @@ -288,12 +295,18 @@ def acquire_document_locks(db_session: Session, document_ids: list[str]) -> bool document IDs in a single call). """ stmt = ( - select(DbDocument) + select(DbDocument.id) .where(DbDocument.id.in_(document_ids)) .with_for_update(nowait=True) ) # will raise exception if any of the documents are already locked - db_session.execute(stmt) + documents = db_session.scalars(stmt).all() + + # make sure we found every document + if len(documents) != len(set(document_ids)): + logger.warning("Didn't find row for all specified document IDs. Aborting.") + return False + return True @@ -301,23 +314,71 @@ _NUM_LOCK_ATTEMPTS = 10 _LOCK_RETRY_DELAY = 30 -def prepare_to_modify_documents(db_session: Session, document_ids: list[str]) -> None: +@contextlib.contextmanager +def prepare_to_modify_documents( + db_session: Session, document_ids: list[str], retry_delay: int = _LOCK_RETRY_DELAY +) -> Generator[TransactionalContext, None, None]: """Try and acquire locks for the documents to prevent other jobs from modifying them at the same time (e.g. avoid race conditions). This should be called ahead of any modification to Vespa. Locks should be released by the - caller as soon as updates are complete by finishing the transaction.""" + caller as soon as updates are complete by finishing the transaction. + + NOTE: only one commit is allowed within the context manager returned by this funtion. + Multiple commits will result in a sqlalchemy.exc.InvalidRequestError. + NOTE: this function will commit any existing transaction. + """ + db_session.commit() # ensure that we're not in a transaction + lock_acquired = False for _ in range(_NUM_LOCK_ATTEMPTS): try: - lock_acquired = acquire_document_locks( - db_session=db_session, document_ids=document_ids - ) - except Exception as e: + with db_session.begin() as transaction: + lock_acquired = acquire_document_locks( + db_session=db_session, document_ids=document_ids + ) + if lock_acquired: + yield transaction + break + except OperationalError as e: logger.info(f"Failed to acquire locks for documents, retrying. Error: {e}") - time.sleep(_LOCK_RETRY_DELAY) + + time.sleep(retry_delay) if not lock_acquired: raise RuntimeError( f"Failed to acquire locks after {_NUM_LOCK_ATTEMPTS} attempts " f"for documents: {document_ids}" ) + + +def get_ingestion_documents( + db_session: Session, +) -> list[DbDocument]: + # TODO add the option to filter by DocumentSource + stmt = select(DbDocument).where(DbDocument.from_ingestion_api.is_(True)) + documents = db_session.execute(stmt).scalars().all() + return list(documents) + + +def get_documents_by_cc_pair( + cc_pair_id: int, + db_session: Session, +) -> list[DbDocument]: + return ( + db_session.query(DbDocument) + .join( + DocumentByConnectorCredentialPair, + DbDocument.id == DocumentByConnectorCredentialPair.id, + ) + .join( + ConnectorCredentialPair, + and_( + DocumentByConnectorCredentialPair.connector_id + == ConnectorCredentialPair.connector_id, + DocumentByConnectorCredentialPair.credential_id + == ConnectorCredentialPair.credential_id, + ), + ) + .filter(ConnectorCredentialPair.id == cc_pair_id) + .all() + ) diff --git a/backend/danswer/db/document_set.py b/backend/danswer/db/document_set.py index 848f50883..51064f78e 100644 --- a/backend/danswer/db/document_set.py +++ b/backend/danswer/db/document_set.py @@ -16,6 +16,7 @@ from danswer.db.models import DocumentSet as DocumentSetDBModel from danswer.db.models import DocumentSet__ConnectorCredentialPair from danswer.server.features.document_set.models import DocumentSetCreationRequest from danswer.server.features.document_set.models import DocumentSetUpdateRequest +from danswer.utils.variable_functionality import fetch_versioned_implementation def _delete_document_set_cc_pairs__no_commit( @@ -41,6 +42,12 @@ def _mark_document_set_cc_pairs_as_outdated__no_commit( row.is_current = False +def delete_document_set_privacy__no_commit( + document_set_id: int, db_session: Session +) -> None: + """No private document sets in Danswer MIT""" + + def get_document_set_by_id( db_session: Session, document_set_id: int ) -> DocumentSetDBModel | None: @@ -67,13 +74,25 @@ def get_document_sets_by_ids( ).all() +def make_doc_set_private( + document_set_id: int, + user_ids: list[UUID] | None, + group_ids: list[int] | None, + db_session: Session, +) -> None: + # May cause error if someone switches down to MIT from EE + if user_ids or group_ids: + raise NotImplementedError("Danswer MIT does not support private Document Sets") + + def insert_document_set( document_set_creation_request: DocumentSetCreationRequest, user_id: UUID | None, db_session: Session, ) -> tuple[DocumentSetDBModel, list[DocumentSet__ConnectorCredentialPair]]: if not document_set_creation_request.cc_pair_ids: - raise ValueError("Cannot create a document set with no CC pairs") + # It's cc-pairs in actuality but the UI displays this error + raise ValueError("Cannot create a document set with no Connectors") # start a transaction db_session.begin() @@ -83,6 +102,7 @@ def insert_document_set( name=document_set_creation_request.name, description=document_set_creation_request.description, user_id=user_id, + is_public=document_set_creation_request.is_public, ) db_session.add(new_document_set_row) db_session.flush() # ensure the new document set gets assigned an ID @@ -96,6 +116,19 @@ def insert_document_set( for cc_pair_id in document_set_creation_request.cc_pair_ids ] db_session.add_all(ds_cc_pairs) + + versioned_private_doc_set_fn = fetch_versioned_implementation( + "danswer.db.document_set", "make_doc_set_private" + ) + + # Private Document Sets + versioned_private_doc_set_fn( + document_set_id=new_document_set_row.id, + user_ids=document_set_creation_request.users, + group_ids=document_set_creation_request.groups, + db_session=db_session, + ) + db_session.commit() except: db_session.rollback() @@ -108,7 +141,8 @@ def update_document_set( document_set_update_request: DocumentSetUpdateRequest, db_session: Session ) -> tuple[DocumentSetDBModel, list[DocumentSet__ConnectorCredentialPair]]: if not document_set_update_request.cc_pair_ids: - raise ValueError("Cannot create a document set with no CC pairs") + # It's cc-pairs in actuality but the UI displays this error + raise ValueError("Cannot create a document set with no Connectors") # start a transaction db_session.begin() @@ -130,6 +164,19 @@ def update_document_set( document_set_row.description = document_set_update_request.description document_set_row.is_up_to_date = False + document_set_row.is_public = document_set_update_request.is_public + + versioned_private_doc_set_fn = fetch_versioned_implementation( + "danswer.db.document_set", "make_doc_set_private" + ) + + # Private Document Sets + versioned_private_doc_set_fn( + document_set_id=document_set_row.id, + user_ids=document_set_update_request.users, + group_ids=document_set_update_request.groups, + db_session=db_session, + ) # update the attached CC pairs # first, mark all existing CC pairs as not current @@ -205,6 +252,15 @@ def mark_document_set_as_to_be_deleted( _delete_document_set_cc_pairs__no_commit( db_session=db_session, document_set_id=document_set_id ) + + # delete all private document set information + versioned_delete_private_fn = fetch_versioned_implementation( + "danswer.db.document_set", "delete_document_set_privacy__no_commit" + ) + versioned_delete_private_fn( + document_set_id=document_set_id, db_session=db_session + ) + # mark the row as needing a sync, it will be deleted there since there # are no more relationships to cc pairs document_set_row.is_up_to_date = False @@ -248,7 +304,7 @@ def mark_cc_pair__document_set_relationships_to_be_deleted__no_commit( def fetch_document_sets( - db_session: Session, include_outdated: bool = False + user_id: UUID | None, db_session: Session, include_outdated: bool = False ) -> list[tuple[DocumentSetDBModel, list[ConnectorCredentialPair]]]: """Return is a list where each element contains a tuple of: 1. The document set itself @@ -301,9 +357,38 @@ def fetch_document_sets( ] -def fetch_documents_for_document_set( - document_set_id: int, db_session: Session, current_only: bool = True -) -> Sequence[Document]: +def fetch_all_document_sets(db_session: Session) -> Sequence[DocumentSetDBModel]: + """Used for Admin UI where they should have visibility into all document sets""" + return db_session.scalars(select(DocumentSetDBModel)).all() + + +def fetch_user_document_sets( + user_id: UUID | None, db_session: Session +) -> list[tuple[DocumentSetDBModel, list[ConnectorCredentialPair]]]: + # If Auth is turned off, all document sets become visible + # document sets are not permission enforced, only for organizational purposes + # the documents themselves are permission enforced + if user_id is None: + return fetch_document_sets( + user_id=user_id, db_session=db_session, include_outdated=True + ) + + versioned_fetch_doc_sets_fn = fetch_versioned_implementation( + "danswer.db.document_set", "fetch_document_sets" + ) + + return versioned_fetch_doc_sets_fn( + user_id=user_id, db_session=db_session, include_outdated=True + ) + + +def fetch_documents_for_document_set_paginated( + document_set_id: int, + db_session: Session, + current_only: bool = True, + last_document_id: str | None = None, + limit: int = 100, +) -> tuple[Sequence[Document], str | None]: stmt = ( select(Document) .join( @@ -330,14 +415,19 @@ def fetch_documents_for_document_set( == DocumentSet__ConnectorCredentialPair.document_set_id, ) .where(DocumentSetDBModel.id == document_set_id) + .order_by(Document.id) + .limit(limit) ) + if last_document_id is not None: + stmt = stmt.where(Document.id > last_document_id) if current_only: stmt = stmt.where( DocumentSet__ConnectorCredentialPair.is_current == True # noqa: E712 ) stmt = stmt.distinct() - return db_session.scalars(stmt).all() + documents = db_session.scalars(stmt).all() + return documents, documents[-1].id if documents else None def fetch_document_sets_for_documents( @@ -404,6 +494,8 @@ def check_document_sets_are_public( db_session: Session, document_set_ids: list[int], ) -> bool: + """Checks if any of the CC-Pairs are Non Public (meaning that some documents in this document + set is not Public""" connector_credential_pair_ids = ( db_session.query( DocumentSet__ConnectorCredentialPair.connector_credential_pair_id diff --git a/backend/danswer/db/embedding_model.py b/backend/danswer/db/embedding_model.py index 790763310..ae2b98d51 100644 --- a/backend/danswer/db/embedding_model.py +++ b/backend/danswer/db/embedding_model.py @@ -1,6 +1,15 @@ from sqlalchemy import select from sqlalchemy.orm import Session +from danswer.configs.model_configs import ASYM_PASSAGE_PREFIX +from danswer.configs.model_configs import ASYM_QUERY_PREFIX +from danswer.configs.model_configs import DEFAULT_DOCUMENT_ENCODER_MODEL +from danswer.configs.model_configs import DOC_EMBEDDING_DIM +from danswer.configs.model_configs import DOCUMENT_ENCODER_MODEL +from danswer.configs.model_configs import NORMALIZE_EMBEDDINGS +from danswer.configs.model_configs import OLD_DEFAULT_DOCUMENT_ENCODER_MODEL +from danswer.configs.model_configs import OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM +from danswer.configs.model_configs import OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS from danswer.db.models import EmbeddingModel from danswer.db.models import IndexModelStatus from danswer.indexing.models import EmbeddingModelDetail @@ -65,3 +74,42 @@ def update_embedding_model_status( ) -> None: embedding_model.status = new_status db_session.commit() + + +def user_has_overridden_embedding_model() -> bool: + return DOCUMENT_ENCODER_MODEL != DEFAULT_DOCUMENT_ENCODER_MODEL + + +def get_old_default_embedding_model() -> EmbeddingModel: + is_overridden = user_has_overridden_embedding_model() + return EmbeddingModel( + model_name=( + DOCUMENT_ENCODER_MODEL + if is_overridden + else OLD_DEFAULT_DOCUMENT_ENCODER_MODEL + ), + model_dim=( + DOC_EMBEDDING_DIM if is_overridden else OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM + ), + normalize=( + NORMALIZE_EMBEDDINGS + if is_overridden + else OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS + ), + query_prefix=(ASYM_QUERY_PREFIX if is_overridden else ""), + passage_prefix=(ASYM_PASSAGE_PREFIX if is_overridden else ""), + status=IndexModelStatus.PRESENT, + index_name="danswer_chunk", + ) + + +def get_new_default_embedding_model(is_present: bool) -> EmbeddingModel: + return EmbeddingModel( + model_name=DOCUMENT_ENCODER_MODEL, + model_dim=DOC_EMBEDDING_DIM, + normalize=NORMALIZE_EMBEDDINGS, + query_prefix=ASYM_QUERY_PREFIX, + passage_prefix=ASYM_PASSAGE_PREFIX, + status=IndexModelStatus.PRESENT if is_present else IndexModelStatus.FUTURE, + index_name=f"danswer_chunk_{clean_model_name(DOCUMENT_ENCODER_MODEL)}", + ) diff --git a/backend/danswer/db/engine.py b/backend/danswer/db/engine.py index b28647f6d..4171cf944 100644 --- a/backend/danswer/db/engine.py +++ b/backend/danswer/db/engine.py @@ -1,6 +1,8 @@ +import contextlib from collections.abc import AsyncGenerator from collections.abc import Generator from datetime import datetime +from typing import ContextManager from sqlalchemy import text from sqlalchemy.engine import create_engine @@ -9,6 +11,7 @@ from sqlalchemy.ext.asyncio import AsyncEngine from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import create_async_engine from sqlalchemy.orm import Session +from sqlalchemy.orm import sessionmaker from danswer.configs.app_configs import POSTGRES_DB from danswer.configs.app_configs import POSTGRES_HOST @@ -56,7 +59,7 @@ def get_sqlalchemy_engine() -> Engine: global _SYNC_ENGINE if _SYNC_ENGINE is None: connection_string = build_connection_string(db_api=SYNC_DB_API) - _SYNC_ENGINE = create_engine(connection_string) + _SYNC_ENGINE = create_engine(connection_string, pool_size=40, max_overflow=10) return _SYNC_ENGINE @@ -64,11 +67,20 @@ def get_sqlalchemy_async_engine() -> AsyncEngine: global _ASYNC_ENGINE if _ASYNC_ENGINE is None: connection_string = build_connection_string() - _ASYNC_ENGINE = create_async_engine(connection_string) + _ASYNC_ENGINE = create_async_engine( + connection_string, pool_size=40, max_overflow=10 + ) return _ASYNC_ENGINE +def get_session_context_manager() -> ContextManager[Session]: + return contextlib.contextmanager(get_session)() + + def get_session() -> Generator[Session, None, None]: + # The line below was added to monitor the latency caused by Postgres connections + # during API calls. + # with tracer.trace("db.get_session"): with Session(get_sqlalchemy_engine(), expire_on_commit=False) as session: yield session @@ -78,3 +90,29 @@ async def get_async_session() -> AsyncGenerator[AsyncSession, None]: get_sqlalchemy_async_engine(), expire_on_commit=False ) as async_session: yield async_session + + +async def warm_up_connections( + sync_connections_to_warm_up: int = 10, async_connections_to_warm_up: int = 10 +) -> None: + sync_postgres_engine = get_sqlalchemy_engine() + connections = [ + sync_postgres_engine.connect() for _ in range(sync_connections_to_warm_up) + ] + for conn in connections: + conn.execute(text("SELECT 1")) + for conn in connections: + conn.close() + + async_postgres_engine = get_sqlalchemy_async_engine() + async_connections = [ + await async_postgres_engine.connect() + for _ in range(async_connections_to_warm_up) + ] + for async_conn in async_connections: + await async_conn.execute(text("SELECT 1")) + for async_conn in async_connections: + await async_conn.close() + + +SessionFactory = sessionmaker(bind=get_sqlalchemy_engine()) diff --git a/backend/danswer/db/enums.py b/backend/danswer/db/enums.py new file mode 100644 index 000000000..2a02e078c --- /dev/null +++ b/backend/danswer/db/enums.py @@ -0,0 +1,35 @@ +from enum import Enum as PyEnum + + +class IndexingStatus(str, PyEnum): + NOT_STARTED = "not_started" + IN_PROGRESS = "in_progress" + SUCCESS = "success" + FAILED = "failed" + + +# these may differ in the future, which is why we're okay with this duplication +class DeletionStatus(str, PyEnum): + NOT_STARTED = "not_started" + IN_PROGRESS = "in_progress" + SUCCESS = "success" + FAILED = "failed" + + +# Consistent with Celery task statuses +class TaskStatus(str, PyEnum): + PENDING = "PENDING" + STARTED = "STARTED" + SUCCESS = "SUCCESS" + FAILURE = "FAILURE" + + +class IndexModelStatus(str, PyEnum): + PAST = "PAST" + PRESENT = "PRESENT" + FUTURE = "FUTURE" + + +class ChatSessionSharedStatus(str, PyEnum): + PUBLIC = "public" + PRIVATE = "private" diff --git a/backend/danswer/db/feedback.py b/backend/danswer/db/feedback.py index 474e46a3a..bb7da0864 100644 --- a/backend/danswer/db/feedback.py +++ b/backend/danswer/db/feedback.py @@ -129,7 +129,7 @@ def create_doc_retrieval_feedback( db_session.commit() -def delete_document_feedback_for_documents( +def delete_document_feedback_for_documents__no_commit( document_ids: list[str], db_session: Session ) -> None: """NOTE: does not commit transaction so that this can be used as part of a @@ -148,8 +148,14 @@ def create_chat_message_feedback( db_session: Session, # Slack user requested help from human required_followup: bool | None = None, + predefined_feedback: str | None = None, # Added predefined_feedback parameter ) -> None: - if is_positive is None and feedback_text is None and required_followup is None: + if ( + is_positive is None + and feedback_text is None + and required_followup is None + and predefined_feedback is None + ): raise ValueError("No feedback provided") chat_message = get_chat_message( @@ -164,6 +170,7 @@ def create_chat_message_feedback( is_positive=is_positive, feedback_text=feedback_text, required_followup=required_followup, + predefined_feedback=predefined_feedback, ) db_session.add(message_feedback) diff --git a/backend/danswer/db/folder.py b/backend/danswer/db/folder.py new file mode 100644 index 000000000..77e543a8d --- /dev/null +++ b/backend/danswer/db/folder.py @@ -0,0 +1,132 @@ +from uuid import UUID + +from sqlalchemy.orm import Session + +from danswer.db.chat import delete_chat_session +from danswer.db.models import ChatFolder +from danswer.db.models import ChatSession +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def get_user_folders( + user_id: UUID | None, + db_session: Session, +) -> list[ChatFolder]: + return db_session.query(ChatFolder).filter(ChatFolder.user_id == user_id).all() + + +def update_folder_display_priority( + user_id: UUID | None, + display_priority_map: dict[int, int], + db_session: Session, +) -> None: + folders = get_user_folders(user_id=user_id, db_session=db_session) + folder_ids = {folder.id for folder in folders} + if folder_ids != set(display_priority_map.keys()): + raise ValueError("Invalid Folder IDs provided") + + for folder in folders: + folder.display_priority = display_priority_map[folder.id] + + db_session.commit() + + +def get_folder_by_id( + user_id: UUID | None, + folder_id: int, + db_session: Session, +) -> ChatFolder: + folder = ( + db_session.query(ChatFolder).filter(ChatFolder.id == folder_id).one_or_none() + ) + if not folder: + raise ValueError("Folder by specified id does not exist") + + if folder.user_id != user_id: + raise PermissionError(f"Folder does not belong to user: {user_id}") + + return folder + + +def create_folder( + user_id: UUID | None, folder_name: str | None, db_session: Session +) -> int: + new_folder = ChatFolder( + user_id=user_id, + name=folder_name, + ) + db_session.add(new_folder) + db_session.commit() + + return new_folder.id + + +def rename_folder( + user_id: UUID | None, folder_id: int, folder_name: str | None, db_session: Session +) -> None: + folder = get_folder_by_id( + user_id=user_id, folder_id=folder_id, db_session=db_session + ) + + folder.name = folder_name + db_session.commit() + + +def add_chat_to_folder( + user_id: UUID | None, folder_id: int, chat_session: ChatSession, db_session: Session +) -> None: + folder = get_folder_by_id( + user_id=user_id, folder_id=folder_id, db_session=db_session + ) + + chat_session.folder_id = folder.id + + db_session.commit() + + +def remove_chat_from_folder( + user_id: UUID | None, folder_id: int, chat_session: ChatSession, db_session: Session +) -> None: + folder = get_folder_by_id( + user_id=user_id, folder_id=folder_id, db_session=db_session + ) + + if chat_session.folder_id != folder.id: + raise ValueError("The chat session is not in the specified folder.") + + if folder.user_id != user_id: + raise ValueError( + f"Tried to remove a chat session from a folder that does not below to " + f"this user, user id: {user_id}" + ) + + chat_session.folder_id = None + if chat_session in folder.chat_sessions: + folder.chat_sessions.remove(chat_session) + + db_session.commit() + + +def delete_folder( + user_id: UUID | None, + folder_id: int, + including_chats: bool, + db_session: Session, +) -> None: + folder = get_folder_by_id( + user_id=user_id, folder_id=folder_id, db_session=db_session + ) + + # Assuming there will not be a massive number of chats in any given folder + if including_chats: + for chat_session in folder.chat_sessions: + delete_chat_session( + user_id=user_id, + chat_session_id=chat_session.id, + db_session=db_session, + ) + + db_session.delete(folder) + db_session.commit() diff --git a/backend/danswer/db/index_attempt.py b/backend/danswer/db/index_attempt.py index 7e08c167b..51c41c719 100644 --- a/backend/danswer/db/index_attempt.py +++ b/backend/danswer/db/index_attempt.py @@ -75,14 +75,11 @@ def get_not_started_index_attempts(db_session: Session) -> list[IndexAttempt]: return list(new_attempts.all()) -def mark_attempt_in_progress( +def mark_attempt_in_progress__no_commit( index_attempt: IndexAttempt, - db_session: Session, ) -> None: index_attempt.status = IndexingStatus.IN_PROGRESS index_attempt.time_started = index_attempt.time_started or func.now() # type: ignore - db_session.add(index_attempt) - db_session.commit() def mark_attempt_succeeded( @@ -115,9 +112,11 @@ def update_docs_indexed( index_attempt: IndexAttempt, total_docs_indexed: int, new_docs_indexed: int, + docs_removed_from_index: int, ) -> None: index_attempt.total_docs_indexed = total_docs_indexed index_attempt.new_docs_indexed = new_docs_indexed + index_attempt.docs_removed_from_index = docs_removed_from_index db_session.add(index_attempt) db_session.commit() @@ -229,13 +228,24 @@ def expire_index_attempts( embedding_model_id: int, db_session: Session, ) -> None: + delete_query = ( + delete(IndexAttempt) + .where(IndexAttempt.embedding_model_id == embedding_model_id) + .where(IndexAttempt.status == IndexingStatus.NOT_STARTED) + ) + db_session.execute(delete_query) + update_query = ( update(IndexAttempt) .where(IndexAttempt.embedding_model_id == embedding_model_id) .where(IndexAttempt.status != IndexingStatus.SUCCESS) - .values(status=IndexingStatus.FAILED, error_msg="Embedding model swapped") + .values( + status=IndexingStatus.FAILED, + error_msg="Canceled due to embedding model swap", + ) ) db_session.execute(update_query) + db_session.commit() @@ -244,20 +254,15 @@ def cancel_indexing_attempts_for_connector( db_session: Session, include_secondary_index: bool = False, ) -> None: - subquery = select(EmbeddingModel.id).where( - EmbeddingModel.status != IndexModelStatus.FUTURE - ) - - stmt = ( - update(IndexAttempt) - .where( - IndexAttempt.connector_id == connector_id, - IndexAttempt.status == IndexingStatus.NOT_STARTED, - ) - .values(status=IndexingStatus.FAILED) + stmt = delete(IndexAttempt).where( + IndexAttempt.connector_id == connector_id, + IndexAttempt.status == IndexingStatus.NOT_STARTED, ) if not include_secondary_index: + subquery = select(EmbeddingModel.id).where( + EmbeddingModel.status != IndexModelStatus.FUTURE + ) stmt = stmt.where(IndexAttempt.embedding_model_id.in_(subquery)) db_session.execute(stmt) @@ -283,20 +288,18 @@ def cancel_indexing_attempts_past_model( db_session.commit() -def count_unique_cc_pairs_with_index_attempts( +def count_unique_cc_pairs_with_successful_index_attempts( embedding_model_id: int | None, db_session: Session, ) -> int: + """Collect all of the Index Attempts that are successful and for the specified embedding model + Then do distinct by connector_id and credential_id which is equivalent to the cc-pair. Finally, + do a count to get the total number of unique cc-pairs with successful attempts""" unique_pairs_count = ( db_session.query(IndexAttempt.connector_id, IndexAttempt.credential_id) .filter( IndexAttempt.embedding_model_id == embedding_model_id, - # Should not be able to hang since indexing jobs expire after a limit - # It will then be marked failed, and the next cycle it will be in a completed state - or_( - IndexAttempt.status == IndexingStatus.SUCCESS, - IndexAttempt.status == IndexingStatus.FAILED, - ), + IndexAttempt.status == IndexingStatus.SUCCESS, ) .distinct() .count() diff --git a/backend/danswer/db/llm.py b/backend/danswer/db/llm.py new file mode 100644 index 000000000..f969dbf68 --- /dev/null +++ b/backend/danswer/db/llm.py @@ -0,0 +1,98 @@ +from sqlalchemy import delete +from sqlalchemy import select +from sqlalchemy.orm import Session + +from danswer.db.models import LLMProvider as LLMProviderModel +from danswer.server.manage.llm.models import FullLLMProvider +from danswer.server.manage.llm.models import LLMProviderUpsertRequest + + +def upsert_llm_provider( + db_session: Session, llm_provider: LLMProviderUpsertRequest +) -> FullLLMProvider: + existing_llm_provider = db_session.scalar( + select(LLMProviderModel).where(LLMProviderModel.name == llm_provider.name) + ) + if existing_llm_provider: + existing_llm_provider.provider = llm_provider.provider + existing_llm_provider.api_key = llm_provider.api_key + existing_llm_provider.api_base = llm_provider.api_base + existing_llm_provider.api_version = llm_provider.api_version + existing_llm_provider.custom_config = llm_provider.custom_config + existing_llm_provider.default_model_name = llm_provider.default_model_name + existing_llm_provider.fast_default_model_name = ( + llm_provider.fast_default_model_name + ) + existing_llm_provider.model_names = llm_provider.model_names + db_session.commit() + return FullLLMProvider.from_model(existing_llm_provider) + + # if it does not exist, create a new entry + llm_provider_model = LLMProviderModel( + name=llm_provider.name, + provider=llm_provider.provider, + api_key=llm_provider.api_key, + api_base=llm_provider.api_base, + api_version=llm_provider.api_version, + custom_config=llm_provider.custom_config, + default_model_name=llm_provider.default_model_name, + fast_default_model_name=llm_provider.fast_default_model_name, + model_names=llm_provider.model_names, + is_default_provider=None, + ) + db_session.add(llm_provider_model) + db_session.commit() + + return FullLLMProvider.from_model(llm_provider_model) + + +def fetch_existing_llm_providers(db_session: Session) -> list[LLMProviderModel]: + return list(db_session.scalars(select(LLMProviderModel)).all()) + + +def fetch_default_provider(db_session: Session) -> FullLLMProvider | None: + provider_model = db_session.scalar( + select(LLMProviderModel).where( + LLMProviderModel.is_default_provider == True # noqa: E712 + ) + ) + if not provider_model: + return None + return FullLLMProvider.from_model(provider_model) + + +def fetch_provider(db_session: Session, provider_name: str) -> FullLLMProvider | None: + provider_model = db_session.scalar( + select(LLMProviderModel).where(LLMProviderModel.name == provider_name) + ) + if not provider_model: + return None + return FullLLMProvider.from_model(provider_model) + + +def remove_llm_provider(db_session: Session, provider_id: int) -> None: + db_session.execute( + delete(LLMProviderModel).where(LLMProviderModel.id == provider_id) + ) + db_session.commit() + + +def update_default_provider(db_session: Session, provider_id: int) -> None: + new_default = db_session.scalar( + select(LLMProviderModel).where(LLMProviderModel.id == provider_id) + ) + if not new_default: + raise ValueError(f"LLM Provider with id {provider_id} does not exist") + + existing_default = db_session.scalar( + select(LLMProviderModel).where( + LLMProviderModel.is_default_provider == True # noqa: E712 + ) + ) + if existing_default: + existing_default.is_default_provider = None + # required to ensure that the below does not cause a unique constraint violation + db_session.flush() + + new_default.is_default_provider = True + db_session.commit() diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index ecd96103f..3b4b67f09 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -1,15 +1,15 @@ import datetime +import json from enum import Enum as PyEnum from typing import Any -from typing import List from typing import Literal from typing import NotRequired from typing import Optional from typing import TypedDict from uuid import UUID -from fastapi_users.db import SQLAlchemyBaseOAuthAccountTableUUID -from fastapi_users.db import SQLAlchemyBaseUserTableUUID +from fastapi_users_db_sqlalchemy import SQLAlchemyBaseOAuthAccountTableUUID +from fastapi_users_db_sqlalchemy import SQLAlchemyBaseUserTableUUID from fastapi_users_db_sqlalchemy.access_token import SQLAlchemyBaseAccessTokenTableUUID from sqlalchemy import Boolean from sqlalchemy import DateTime @@ -24,54 +24,73 @@ from sqlalchemy import String from sqlalchemy import Text from sqlalchemy import UniqueConstraint from sqlalchemy.dialects import postgresql +from sqlalchemy.engine.interfaces import Dialect from sqlalchemy.orm import DeclarativeBase from sqlalchemy.orm import Mapped from sqlalchemy.orm import mapped_column from sqlalchemy.orm import relationship +from sqlalchemy.types import LargeBinary +from sqlalchemy.types import TypeDecorator from danswer.auth.schemas import UserRole from danswer.configs.constants import DEFAULT_BOOST from danswer.configs.constants import DocumentSource +from danswer.configs.constants import FileOrigin from danswer.configs.constants import MessageType from danswer.configs.constants import SearchFeedbackType +from danswer.configs.constants import TokenRateLimitScope from danswer.connectors.models import InputType -from danswer.search.models import RecencyBiasSetting -from danswer.search.models import SearchType - - -class IndexingStatus(str, PyEnum): - NOT_STARTED = "not_started" - IN_PROGRESS = "in_progress" - SUCCESS = "success" - FAILED = "failed" - - -# these may differ in the future, which is why we're okay with this duplication -class DeletionStatus(str, PyEnum): - NOT_STARTED = "not_started" - IN_PROGRESS = "in_progress" - SUCCESS = "success" - FAILED = "failed" - - -# Consistent with Celery task statuses -class TaskStatus(str, PyEnum): - PENDING = "PENDING" - STARTED = "STARTED" - SUCCESS = "SUCCESS" - FAILURE = "FAILURE" - - -class IndexModelStatus(str, PyEnum): - PAST = "PAST" - PRESENT = "PRESENT" - FUTURE = "FUTURE" +from danswer.db.enums import ChatSessionSharedStatus +from danswer.db.enums import IndexingStatus +from danswer.db.enums import IndexModelStatus +from danswer.db.enums import TaskStatus +from danswer.db.pydantic_type import PydanticType +from danswer.dynamic_configs.interface import JSON_ro +from danswer.file_store.models import FileDescriptor +from danswer.llm.override_models import LLMOverride +from danswer.llm.override_models import PromptOverride +from danswer.search.enums import RecencyBiasSetting +from danswer.search.enums import SearchType +from danswer.utils.encryption import decrypt_bytes_to_string +from danswer.utils.encryption import encrypt_string_to_bytes class Base(DeclarativeBase): pass +class EncryptedString(TypeDecorator): + impl = LargeBinary + + def process_bind_param(self, value: str | None, dialect: Dialect) -> bytes | None: + if value is not None: + return encrypt_string_to_bytes(value) + return value + + def process_result_value(self, value: bytes | None, dialect: Dialect) -> str | None: + if value is not None: + return decrypt_bytes_to_string(value) + return value + + +class EncryptedJson(TypeDecorator): + impl = LargeBinary + + def process_bind_param(self, value: dict | None, dialect: Dialect) -> bytes | None: + if value is not None: + json_str = json.dumps(value) + return encrypt_string_to_bytes(json_str) + return value + + def process_result_value( + self, value: bytes | None, dialect: Dialect + ) -> dict | None: + if value is not None: + json_str = decrypt_bytes_to_string(value) + return json.loads(json_str) + return value + + """ Auth/Authz (users, permissions, access) Tables """ @@ -83,20 +102,37 @@ class OAuthAccount(SQLAlchemyBaseOAuthAccountTableUUID, Base): class User(SQLAlchemyBaseUserTableUUID, Base): - oauth_accounts: Mapped[List[OAuthAccount]] = relationship( + oauth_accounts: Mapped[list[OAuthAccount]] = relationship( "OAuthAccount", lazy="joined" ) role: Mapped[UserRole] = mapped_column( Enum(UserRole, native_enum=False, default=UserRole.BASIC) ) - credentials: Mapped[List["Credential"]] = relationship( + + """ + Preferences probably should be in a separate table at some point, but for now + putting here for simpicity + """ + + # if specified, controls the assistants that are shown to the user + their order + # if not specified, all assistants are shown + chosen_assistants: Mapped[list[int]] = mapped_column( + postgresql.ARRAY(Integer), nullable=True + ) + + # relationships + credentials: Mapped[list["Credential"]] = relationship( "Credential", back_populates="user", lazy="joined" ) - chat_sessions: Mapped[List["ChatSession"]] = relationship( + chat_sessions: Mapped[list["ChatSession"]] = relationship( "ChatSession", back_populates="user" ) - prompts: Mapped[List["Prompt"]] = relationship("Prompt", back_populates="user") - personas: Mapped[List["Persona"]] = relationship("Persona", back_populates="user") + chat_folders: Mapped[list["ChatFolder"]] = relationship( + "ChatFolder", back_populates="user" + ) + prompts: Mapped[list["Prompt"]] = relationship("Prompt", back_populates="user") + # Personas owned by this user + personas: Mapped[list["Persona"]] = relationship("Persona", back_populates="user") class AccessToken(SQLAlchemyBaseAccessTokenTableUUID, Base): @@ -107,6 +143,7 @@ class ApiKey(Base): __tablename__ = "api_key" id: Mapped[int] = mapped_column(Integer, primary_key=True) + name: Mapped[str | None] = mapped_column(String, nullable=True) hashed_api_key: Mapped[str] = mapped_column(String, unique=True) api_key_display: Mapped[str] = mapped_column(String, unique=True) # the ID of the "user" who represents the access credentials for the API key @@ -140,6 +177,22 @@ class Persona__Prompt(Base): prompt_id: Mapped[int] = mapped_column(ForeignKey("prompt.id"), primary_key=True) +class Persona__User(Base): + __tablename__ = "persona__user" + + persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True) + user_id: Mapped[UUID] = mapped_column(ForeignKey("user.id"), primary_key=True) + + +class DocumentSet__User(Base): + __tablename__ = "document_set__user" + + document_set_id: Mapped[int] = mapped_column( + ForeignKey("document_set.id"), primary_key=True + ) + user_id: Mapped[UUID] = mapped_column(ForeignKey("user.id"), primary_key=True) + + class DocumentSet__ConnectorCredentialPair(Base): __tablename__ = "document_set__connector_credential_pair" @@ -184,6 +237,13 @@ class Document__Tag(Base): tag_id: Mapped[int] = mapped_column(ForeignKey("tag.id"), primary_key=True) +class Persona__Tool(Base): + __tablename__ = "persona__tool" + + persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True) + tool_id: Mapped[int] = mapped_column(ForeignKey("tool.id"), primary_key=True) + + """ Documents/Indexing Tables """ @@ -223,9 +283,6 @@ class ConnectorCredentialPair(Base): last_successful_index_time: Mapped[datetime.datetime | None] = mapped_column( DateTime(timezone=True), default=None ) - last_attempt_status: Mapped[IndexingStatus | None] = mapped_column( - Enum(IndexingStatus) - ) total_docs_indexed: Mapped[int] = mapped_column(Integer, default=0) connector: Mapped["Connector"] = relationship( @@ -234,9 +291,13 @@ class ConnectorCredentialPair(Base): credential: Mapped["Credential"] = relationship( "Credential", back_populates="connectors" ) - document_sets: Mapped[List["DocumentSet"]] = relationship( + document_sets: Mapped[list["DocumentSet"]] = relationship( "DocumentSet", secondary=DocumentSet__ConnectorCredentialPair.__table__, + primaryjoin=( + (DocumentSet__ConnectorCredentialPair.connector_credential_pair_id == id) + & (DocumentSet__ConnectorCredentialPair.is_current.is_(True)) + ), back_populates="connector_credential_pairs", overlaps="document_set", ) @@ -275,7 +336,7 @@ class Document(Base): ) # TODO if more sensitive data is added here for display, make sure to add user/group permission - retrieval_feedbacks: Mapped[List["DocumentRetrievalFeedback"]] = relationship( + retrieval_feedbacks: Mapped[list["DocumentRetrievalFeedback"]] = relationship( "DocumentRetrievalFeedback", back_populates="document" ) tags = relationship( @@ -291,7 +352,9 @@ class Tag(Base): id: Mapped[int] = mapped_column(primary_key=True) tag_key: Mapped[str] = mapped_column(String) tag_value: Mapped[str] = mapped_column(String) - source: Mapped[DocumentSource] = mapped_column(Enum(DocumentSource)) + source: Mapped[DocumentSource] = mapped_column( + Enum(DocumentSource, native_enum=False) + ) documents = relationship( "Document", @@ -327,15 +390,15 @@ class Connector(Base): ) disabled: Mapped[bool] = mapped_column(Boolean, default=False) - credentials: Mapped[List["ConnectorCredentialPair"]] = relationship( + credentials: Mapped[list["ConnectorCredentialPair"]] = relationship( "ConnectorCredentialPair", back_populates="connector", cascade="all, delete-orphan", ) documents_by_connector: Mapped[ - List["DocumentByConnectorCredentialPair"] + list["DocumentByConnectorCredentialPair"] ] = relationship("DocumentByConnectorCredentialPair", back_populates="connector") - index_attempts: Mapped[List["IndexAttempt"]] = relationship( + index_attempts: Mapped[list["IndexAttempt"]] = relationship( "IndexAttempt", back_populates="connector" ) @@ -344,7 +407,7 @@ class Credential(Base): __tablename__ = "credential" id: Mapped[int] = mapped_column(primary_key=True) - credential_json: Mapped[dict[str, Any]] = mapped_column(postgresql.JSONB()) + credential_json: Mapped[dict[str, Any]] = mapped_column(EncryptedJson()) user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) # if `true`, then all Admins will have access to the credential admin_public: Mapped[bool] = mapped_column(Boolean, default=True) @@ -355,15 +418,15 @@ class Credential(Base): DateTime(timezone=True), server_default=func.now(), onupdate=func.now() ) - connectors: Mapped[List["ConnectorCredentialPair"]] = relationship( + connectors: Mapped[list["ConnectorCredentialPair"]] = relationship( "ConnectorCredentialPair", back_populates="credential", cascade="all, delete-orphan", ) documents_by_credential: Mapped[ - List["DocumentByConnectorCredentialPair"] + list["DocumentByConnectorCredentialPair"] ] = relationship("DocumentByConnectorCredentialPair", back_populates="credential") - index_attempts: Mapped[List["IndexAttempt"]] = relationship( + index_attempts: Mapped[list["IndexAttempt"]] = relationship( "IndexAttempt", back_populates="credential" ) user: Mapped[User | None] = relationship("User", back_populates="credentials") @@ -378,10 +441,12 @@ class EmbeddingModel(Base): normalize: Mapped[bool] = mapped_column(Boolean) query_prefix: Mapped[str] = mapped_column(String) passage_prefix: Mapped[str] = mapped_column(String) - status: Mapped[IndexModelStatus] = mapped_column(Enum(IndexModelStatus)) + status: Mapped[IndexModelStatus] = mapped_column( + Enum(IndexModelStatus, native_enum=False) + ) index_name: Mapped[str] = mapped_column(String) - index_attempts: Mapped[List["IndexAttempt"]] = relationship( + index_attempts: Mapped[list["IndexAttempt"]] = relationship( "IndexAttempt", back_populates="embedding_model" ) @@ -423,10 +488,13 @@ class IndexAttempt(Base): # This is only for attempts that are explicitly marked as from the start via # the run once API from_beginning: Mapped[bool] = mapped_column(Boolean) - status: Mapped[IndexingStatus] = mapped_column(Enum(IndexingStatus)) + status: Mapped[IndexingStatus] = mapped_column( + Enum(IndexingStatus, native_enum=False) + ) # The two below may be slightly out of sync if user switches Embedding Model new_docs_indexed: Mapped[int | None] = mapped_column(Integer, default=0) total_docs_indexed: Mapped[int | None] = mapped_column(Integer, default=0) + docs_removed_from_index: Mapped[int | None] = mapped_column(Integer, default=0) # only filled if status = "failed" error_msg: Mapped[str | None] = mapped_column(Text, default=None) # only filled if status = "failed" AND an unhandled exception caused the failure @@ -525,7 +593,9 @@ class SearchDoc(Base): link: Mapped[str | None] = mapped_column(String, nullable=True) blurb: Mapped[str] = mapped_column(String) boost: Mapped[int] = mapped_column(Integer) - source_type: Mapped[DocumentSource] = mapped_column(Enum(DocumentSource)) + source_type: Mapped[DocumentSource] = mapped_column( + Enum(DocumentSource, native_enum=False) + ) hidden: Mapped[bool] = mapped_column(Boolean) doc_metadata: Mapped[dict[str, str | list[str]]] = mapped_column(postgresql.JSONB()) score: Mapped[float] = mapped_column(Float) @@ -557,8 +627,31 @@ class ChatSession(Base): description: Mapped[str] = mapped_column(Text) # One-shot direct answering, currently the two types of chats are not mixed one_shot: Mapped[bool] = mapped_column(Boolean, default=False) + danswerbot_flow: Mapped[bool] = mapped_column(Boolean, default=False) # Only ever set to True if system is set to not hard-delete chats deleted: Mapped[bool] = mapped_column(Boolean, default=False) + # controls whether or not this conversation is viewable by others + shared_status: Mapped[ChatSessionSharedStatus] = mapped_column( + Enum(ChatSessionSharedStatus, native_enum=False), + default=ChatSessionSharedStatus.PRIVATE, + ) + folder_id: Mapped[int | None] = mapped_column( + ForeignKey("chat_folder.id"), nullable=True + ) + + # the latest "overrides" specified by the user. These take precedence over + # the attached persona. However, overrides specified directly in the + # `send-message` call will take precedence over these. + # NOTE: currently only used by the chat seeding flow, will be used in the + # future once we allow users to override default values via the Chat UI + # itself + llm_override: Mapped[LLMOverride | None] = mapped_column( + PydanticType(LLMOverride), nullable=True + ) + prompt_override: Mapped[PromptOverride | None] = mapped_column( + PydanticType(PromptOverride), nullable=True + ) + time_updated: Mapped[datetime.datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), @@ -569,7 +662,10 @@ class ChatSession(Base): ) user: Mapped[User] = relationship("User", back_populates="chat_sessions") - messages: Mapped[List["ChatMessage"]] = relationship( + folder: Mapped["ChatFolder"] = relationship( + "ChatFolder", back_populates="chat_sessions" + ) + messages: Mapped[list["ChatMessage"]] = relationship( "ChatMessage", back_populates="chat_session", cascade="delete" ) persona: Mapped["Persona"] = relationship("Persona") @@ -598,9 +694,16 @@ class ChatMessage(Base): # If prompt is None, then token_count is 0 as this message won't be passed into # the LLM's context (not included in the history of messages) token_count: Mapped[int] = mapped_column(Integer) - message_type: Mapped[MessageType] = mapped_column(Enum(MessageType)) + message_type: Mapped[MessageType] = mapped_column( + Enum(MessageType, native_enum=False) + ) # Maps the citation numbers to a SearchDoc id citations: Mapped[dict[int, int]] = mapped_column(postgresql.JSONB(), nullable=True) + # files associated with this message (e.g. images uploaded by the user that the + # user is asking a question of) + files: Mapped[list[FileDescriptor] | None] = mapped_column( + postgresql.JSONB(), nullable=True + ) # Only applies for LLM error: Mapped[str | None] = mapped_column(Text, nullable=True) time_sent: Mapped[datetime.datetime] = mapped_column( @@ -609,19 +712,44 @@ class ChatMessage(Base): chat_session: Mapped[ChatSession] = relationship("ChatSession") prompt: Mapped[Optional["Prompt"]] = relationship("Prompt") - chat_message_feedbacks: Mapped[List["ChatMessageFeedback"]] = relationship( + chat_message_feedbacks: Mapped[list["ChatMessageFeedback"]] = relationship( "ChatMessageFeedback", back_populates="chat_message" ) - document_feedbacks: Mapped[List["DocumentRetrievalFeedback"]] = relationship( + document_feedbacks: Mapped[list["DocumentRetrievalFeedback"]] = relationship( "DocumentRetrievalFeedback", back_populates="chat_message" ) - search_docs = relationship( + search_docs: Mapped[list["SearchDoc"]] = relationship( "SearchDoc", secondary="chat_message__search_doc", back_populates="chat_messages", ) +class ChatFolder(Base): + """For organizing chat sessions""" + + __tablename__ = "chat_folder" + + id: Mapped[int] = mapped_column(primary_key=True) + # Only null if auth is off + user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + name: Mapped[str | None] = mapped_column(String, nullable=True) + display_priority: Mapped[int] = mapped_column(Integer, nullable=True, default=0) + + user: Mapped[User] = relationship("User", back_populates="chat_folders") + chat_sessions: Mapped[list["ChatSession"]] = relationship( + "ChatSession", back_populates="folder" + ) + + def __lt__(self, other: Any) -> bool: + if not isinstance(other, ChatFolder): + return NotImplemented + if self.display_priority == other.display_priority: + # Bigger ID (created later) show earlier + return self.id > other.id + return self.display_priority < other.display_priority + + """ Feedback, Logging, Metrics Tables """ @@ -637,7 +765,7 @@ class DocumentRetrievalFeedback(Base): document_rank: Mapped[int] = mapped_column(Integer) clicked: Mapped[bool] = mapped_column(Boolean, default=False) feedback: Mapped[SearchFeedbackType | None] = mapped_column( - Enum(SearchFeedbackType), nullable=True + Enum(SearchFeedbackType, native_enum=False), nullable=True ) chat_message: Mapped[ChatMessage] = relationship( @@ -656,6 +784,7 @@ class ChatMessageFeedback(Base): is_positive: Mapped[bool | None] = mapped_column(Boolean, nullable=True) required_followup: Mapped[bool | None] = mapped_column(Boolean, nullable=True) feedback_text: Mapped[str | None] = mapped_column(Text, nullable=True) + predefined_feedback: Mapped[str | None] = mapped_column(String, nullable=True) chat_message: Mapped[ChatMessage] = relationship( "ChatMessage", back_populates="chat_message_feedbacks" @@ -667,6 +796,34 @@ Structures, Organizational, Configurations Tables """ +class LLMProvider(Base): + __tablename__ = "llm_provider" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + name: Mapped[str] = mapped_column(String, unique=True) + provider: Mapped[str] = mapped_column(String) + api_key: Mapped[str | None] = mapped_column(EncryptedString(), nullable=True) + api_base: Mapped[str | None] = mapped_column(String, nullable=True) + api_version: Mapped[str | None] = mapped_column(String, nullable=True) + # custom configs that should be passed to the LLM provider at inference time + # (e.g. `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, etc. for bedrock) + custom_config: Mapped[dict[str, str] | None] = mapped_column( + postgresql.JSONB(), nullable=True + ) + default_model_name: Mapped[str] = mapped_column(String) + fast_default_model_name: Mapped[str | None] = mapped_column(String, nullable=True) + + # The LLMs that are available for this provider. Only required if not a default provider. + # If a default provider, then the LLM options are pulled from the `options.py` file. + # If needed, can be pulled out as a separate table in the future. + model_names: Mapped[list[str] | None] = mapped_column( + postgresql.ARRAY(String), nullable=True + ) + + # should only be set for a single provider + is_default_provider: Mapped[bool | None] = mapped_column(Boolean, unique=True) + + class DocumentSet(Base): __tablename__ = "document_set" @@ -676,10 +833,21 @@ class DocumentSet(Base): user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) # Whether changes to the document set have been propagated is_up_to_date: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) + # If `False`, then the document set is not visible to users who are not explicitly + # given access to it either via the `users` or `groups` relationships + is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) connector_credential_pairs: Mapped[list[ConnectorCredentialPair]] = relationship( "ConnectorCredentialPair", secondary=DocumentSet__ConnectorCredentialPair.__table__, + primaryjoin=( + (DocumentSet__ConnectorCredentialPair.document_set_id == id) + & (DocumentSet__ConnectorCredentialPair.is_current.is_(True)) + ), + secondaryjoin=( + DocumentSet__ConnectorCredentialPair.connector_credential_pair_id + == ConnectorCredentialPair.id + ), back_populates="document_sets", overlaps="document_set", ) @@ -688,13 +856,24 @@ class DocumentSet(Base): secondary=Persona__DocumentSet.__table__, back_populates="document_sets", ) + # Other users with access + users: Mapped[list[User]] = relationship( + "User", + secondary=DocumentSet__User.__table__, + viewonly=True, + ) + # EE only + groups: Mapped[list["UserGroup"]] = relationship( + "UserGroup", + secondary="document_set__user_group", + viewonly=True, + ) class Prompt(Base): __tablename__ = "prompt" id: Mapped[int] = mapped_column(primary_key=True) - # If not belong to a user, then it's shared user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) name: Mapped[str] = mapped_column(String) description: Mapped[str] = mapped_column(String) @@ -715,17 +894,43 @@ class Prompt(Base): ) +class Tool(Base): + __tablename__ = "tool" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + name: Mapped[str] = mapped_column(String, nullable=False) + description: Mapped[str] = mapped_column(Text, nullable=True) + # ID of the tool in the codebase, only applies for in-code tools. + # tools defiend via the UI will have this as None + in_code_tool_id: Mapped[str | None] = mapped_column(String, nullable=True) + + # Relationship to Persona through the association table + personas: Mapped[list["Persona"]] = relationship( + "Persona", + secondary=Persona__Tool.__table__, + back_populates="tools", + ) + + +class StarterMessage(TypedDict): + """NOTE: is a `TypedDict` so it can be used as a type hint for a JSONB column + in Postgres""" + + name: str + description: str + message: str + + class Persona(Base): __tablename__ = "persona" id: Mapped[int] = mapped_column(primary_key=True) - # If not belong to a user, then it's shared user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) name: Mapped[str] = mapped_column(String) description: Mapped[str] = mapped_column(String) # Currently stored but unused, all flows use hybrid search_type: Mapped[SearchType] = mapped_column( - Enum(SearchType), default=SearchType.HYBRID + Enum(SearchType, native_enum=False), default=SearchType.HYBRID ) # Number of chunks to pass to the LLM for generation. num_chunks: Mapped[float | None] = mapped_column(Float, nullable=True) @@ -735,14 +940,22 @@ class Persona(Base): # Enables using LLM to extract time and source type filters # Can also be admin disabled globally llm_filter_extraction: Mapped[bool] = mapped_column(Boolean) - recency_bias: Mapped[RecencyBiasSetting] = mapped_column(Enum(RecencyBiasSetting)) + recency_bias: Mapped[RecencyBiasSetting] = mapped_column( + Enum(RecencyBiasSetting, native_enum=False) + ) # Allows the Persona to specify a different LLM version than is controlled # globablly via env variables. For flexibility, validity is not currently enforced # NOTE: only is applied on the actual response generation - is not used for things like # auto-detected time filters, relevance filters, etc. + llm_model_provider_override: Mapped[str | None] = mapped_column( + String, nullable=True + ) llm_model_version_override: Mapped[str | None] = mapped_column( String, nullable=True ) + starter_messages: Mapped[list[StarterMessage] | None] = mapped_column( + postgresql.JSONB(), nullable=True + ) # Default personas are configured via backend during deployment # Treated specially (cannot be user edited etc.) default_persona: Mapped[bool] = mapped_column(Boolean, default=False) @@ -753,6 +966,7 @@ class Persona(Base): # where lower value IDs (e.g. created earlier) are displayed first display_priority: Mapped[int] = mapped_column(Integer, nullable=True, default=None) deleted: Mapped[bool] = mapped_column(Boolean, default=False) + is_public: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) # These are only defaults, users can select from all if desired prompts: Mapped[list[Prompt]] = relationship( @@ -766,7 +980,25 @@ class Persona(Base): secondary=Persona__DocumentSet.__table__, back_populates="personas", ) - user: Mapped[User] = relationship("User", back_populates="personas") + tools: Mapped[list[Tool]] = relationship( + "Tool", + secondary=Persona__Tool.__table__, + back_populates="personas", + ) + # Owner + user: Mapped[User | None] = relationship("User", back_populates="personas") + # Other users with access + users: Mapped[list[User]] = relationship( + "User", + secondary=Persona__User.__table__, + viewonly=True, + ) + # EE only + groups: Mapped[list["UserGroup"]] = relationship( + "UserGroup", + secondary="persona__user_group", + viewonly=True, + ) # Default personas loaded via yaml cannot have the same name __table_args__ = ( @@ -798,6 +1030,11 @@ class ChannelConfig(TypedDict): follow_up_tags: NotRequired[list[str]] +class SlackBotResponseType(str, PyEnum): + QUOTES = "quotes" + CITATIONS = "citations" + + class SlackBotConfig(Base): __tablename__ = "slack_bot_config" @@ -809,6 +1046,9 @@ class SlackBotConfig(Base): channel_config: Mapped[ChannelConfig] = mapped_column( postgresql.JSONB(), nullable=False ) + response_type: Mapped[SlackBotResponseType] = mapped_column( + Enum(SlackBotResponseType, native_enum=False), nullable=False + ) persona: Mapped[Persona | None] = relationship("Persona") @@ -823,10 +1063,270 @@ class TaskQueueState(Base): # For any job type, this would be the same task_name: Mapped[str] = mapped_column(String) # Note that if the task dies, this won't necessarily be marked FAILED correctly - status: Mapped[TaskStatus] = mapped_column(Enum(TaskStatus)) + status: Mapped[TaskStatus] = mapped_column(Enum(TaskStatus, native_enum=False)) start_time: Mapped[datetime.datetime | None] = mapped_column( DateTime(timezone=True) ) register_time: Mapped[datetime.datetime] = mapped_column( DateTime(timezone=True), server_default=func.now() ) + + +class KVStore(Base): + __tablename__ = "key_value_store" + + key: Mapped[str] = mapped_column(String, primary_key=True) + value: Mapped[JSON_ro] = mapped_column(postgresql.JSONB(), nullable=True) + encrypted_value: Mapped[JSON_ro] = mapped_column(EncryptedJson(), nullable=True) + + +class PGFileStore(Base): + __tablename__ = "file_store" + + file_name: Mapped[str] = mapped_column(String, primary_key=True) + display_name: Mapped[str] = mapped_column(String, nullable=True) + file_origin: Mapped[FileOrigin] = mapped_column(Enum(FileOrigin, native_enum=False)) + file_type: Mapped[str] = mapped_column(String, default="text/plain") + file_metadata: Mapped[JSON_ro] = mapped_column(postgresql.JSONB(), nullable=True) + lobj_oid: Mapped[int] = mapped_column(Integer, nullable=False) + + +""" +************************************************************************ +Enterprise Edition Models +************************************************************************ + +These models are only used in Enterprise Edition only features in Danswer. +They are kept here to simplify the codebase and avoid having different assumptions +on the shape of data being passed around between the MIT and EE versions of Danswer. + +In the MIT version of Danswer, assume these tables are always empty. +""" + + +class SamlAccount(Base): + __tablename__ = "saml" + + id: Mapped[int] = mapped_column(primary_key=True) + user_id: Mapped[int] = mapped_column(ForeignKey("user.id"), unique=True) + encrypted_cookie: Mapped[str] = mapped_column(Text, unique=True) + expires_at: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True)) + updated_at: Mapped[datetime.datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), onupdate=func.now() + ) + + user: Mapped[User] = relationship("User") + + +class User__UserGroup(Base): + __tablename__ = "user__user_group" + + user_group_id: Mapped[int] = mapped_column( + ForeignKey("user_group.id"), primary_key=True + ) + user_id: Mapped[UUID] = mapped_column(ForeignKey("user.id"), primary_key=True) + + +class UserGroup__ConnectorCredentialPair(Base): + __tablename__ = "user_group__connector_credential_pair" + + user_group_id: Mapped[int] = mapped_column( + ForeignKey("user_group.id"), primary_key=True + ) + cc_pair_id: Mapped[int] = mapped_column( + ForeignKey("connector_credential_pair.id"), primary_key=True + ) + # if `True`, then is part of the current state of the UserGroup + # if `False`, then is a part of the prior state of the UserGroup + # rows with `is_current=False` should be deleted when the UserGroup + # is updated and should not exist for a given UserGroup if + # `UserGroup.is_up_to_date == True` + is_current: Mapped[bool] = mapped_column( + Boolean, + default=True, + primary_key=True, + ) + + cc_pair: Mapped[ConnectorCredentialPair] = relationship( + "ConnectorCredentialPair", + ) + + +class Persona__UserGroup(Base): + __tablename__ = "persona__user_group" + + persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True) + user_group_id: Mapped[int] = mapped_column( + ForeignKey("user_group.id"), primary_key=True + ) + + +class DocumentSet__UserGroup(Base): + __tablename__ = "document_set__user_group" + + document_set_id: Mapped[int] = mapped_column( + ForeignKey("document_set.id"), primary_key=True + ) + user_group_id: Mapped[int] = mapped_column( + ForeignKey("user_group.id"), primary_key=True + ) + + +class UserGroup(Base): + __tablename__ = "user_group" + + id: Mapped[int] = mapped_column(primary_key=True) + name: Mapped[str] = mapped_column(String, unique=True) + # whether or not changes to the UserGroup have been propagated to Vespa + is_up_to_date: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) + # tell the sync job to clean up the group + is_up_for_deletion: Mapped[bool] = mapped_column( + Boolean, nullable=False, default=False + ) + + users: Mapped[list[User]] = relationship( + "User", + secondary=User__UserGroup.__table__, + ) + cc_pairs: Mapped[list[ConnectorCredentialPair]] = relationship( + "ConnectorCredentialPair", + secondary=UserGroup__ConnectorCredentialPair.__table__, + viewonly=True, + ) + cc_pair_relationships: Mapped[ + list[UserGroup__ConnectorCredentialPair] + ] = relationship( + "UserGroup__ConnectorCredentialPair", + viewonly=True, + ) + personas: Mapped[list[Persona]] = relationship( + "Persona", + secondary=Persona__UserGroup.__table__, + viewonly=True, + ) + document_sets: Mapped[list[DocumentSet]] = relationship( + "DocumentSet", + secondary=DocumentSet__UserGroup.__table__, + viewonly=True, + ) + + +"""Tables related to Token Rate Limiting +NOTE: `TokenRateLimit` is partially an MIT feature (global rate limit) +""" + + +class TokenRateLimit(Base): + __tablename__ = "token_rate_limit" + + id: Mapped[int] = mapped_column(primary_key=True) + enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) + token_budget: Mapped[int] = mapped_column(Integer, nullable=False) + period_hours: Mapped[int] = mapped_column(Integer, nullable=False) + scope: Mapped[TokenRateLimitScope] = mapped_column( + Enum(TokenRateLimitScope, native_enum=False) + ) + created_at: Mapped[datetime.datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now() + ) + + +class TokenRateLimit__UserGroup(Base): + __tablename__ = "token_rate_limit__user_group" + + rate_limit_id: Mapped[int] = mapped_column( + ForeignKey("token_rate_limit.id"), primary_key=True + ) + user_group_id: Mapped[int] = mapped_column( + ForeignKey("user_group.id"), primary_key=True + ) + + +"""Tables related to Permission Sync""" + + +class PermissionSyncStatus(str, PyEnum): + IN_PROGRESS = "in_progress" + SUCCESS = "success" + FAILED = "failed" + + +class PermissionSyncJobType(str, PyEnum): + USER_LEVEL = "user_level" + GROUP_LEVEL = "group_level" + + +class PermissionSyncRun(Base): + """Represents one run of a permission sync job. For some given cc_pair, it is either sync-ing + the users or it is sync-ing the groups""" + + __tablename__ = "permission_sync_run" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + # Not strictly needed but makes it easy to use without fetching from cc_pair + source_type: Mapped[DocumentSource] = mapped_column( + Enum(DocumentSource, native_enum=False) + ) + # Currently all sync jobs are handled as a group permission sync or a user permission sync + update_type: Mapped[PermissionSyncJobType] = mapped_column( + Enum(PermissionSyncJobType) + ) + cc_pair_id: Mapped[int | None] = mapped_column( + ForeignKey("connector_credential_pair.id"), nullable=True + ) + status: Mapped[PermissionSyncStatus] = mapped_column(Enum(PermissionSyncStatus)) + error_msg: Mapped[str | None] = mapped_column(Text, default=None) + updated_at: Mapped[datetime.datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), onupdate=func.now() + ) + + cc_pair: Mapped[ConnectorCredentialPair] = relationship("ConnectorCredentialPair") + + +class ExternalPermission(Base): + """Maps user info both internal and external to the name of the external group + This maps the user to all of their external groups so that the external group name can be + attached to the ACL list matching during query time. User level permissions can be handled by + directly adding the Danswer user to the doc ACL list""" + + __tablename__ = "external_permission" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + # Email is needed because we want to keep track of users not in Danswer to simplify process + # when the user joins + user_email: Mapped[str] = mapped_column(String) + source_type: Mapped[DocumentSource] = mapped_column( + Enum(DocumentSource, native_enum=False) + ) + external_permission_group: Mapped[str] = mapped_column(String) + user = relationship("User") + + +class EmailToExternalUserCache(Base): + """A way to map users IDs in the external tool to a user in Danswer or at least an email for + when the user joins. Used as a cache for when fetching external groups which have their own + user ids, this can easily be mapped back to users already known in Danswer without needing + to call external APIs to get the user emails. + + This way when groups are updated in the external tool and we need to update the mapping of + internal users to the groups, we can sync the internal users to the external groups they are + part of using this. + + Ie. User Chris is part of groups alpha, beta, and we can update this if Chris is no longer + part of alpha in some external tool. + """ + + __tablename__ = "email_to_external_user_cache" + + id: Mapped[int] = mapped_column(Integer, primary_key=True) + external_user_id: Mapped[str] = mapped_column(String) + user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=True) + # Email is needed because we want to keep track of users not in Danswer to simplify process + # when the user joins + user_email: Mapped[str] = mapped_column(String) + source_type: Mapped[DocumentSource] = mapped_column( + Enum(DocumentSource, native_enum=False) + ) + + user = relationship("User") diff --git a/backend/danswer/db/persona.py b/backend/danswer/db/persona.py new file mode 100644 index 000000000..db543e0b9 --- /dev/null +++ b/backend/danswer/db/persona.py @@ -0,0 +1,135 @@ +from uuid import UUID + +from fastapi import HTTPException +from sqlalchemy import select +from sqlalchemy.orm import Session + +from danswer.db.chat import check_user_can_edit_persona +from danswer.db.chat import get_prompts_by_ids +from danswer.db.chat import upsert_persona +from danswer.db.document_set import get_document_sets_by_ids +from danswer.db.models import Persona +from danswer.db.models import Persona__User +from danswer.db.models import User +from danswer.server.features.persona.models import CreatePersonaRequest +from danswer.server.features.persona.models import PersonaSnapshot +from danswer.utils.logger import setup_logger +from danswer.utils.variable_functionality import fetch_versioned_implementation + +logger = setup_logger() + + +def make_persona_private( + persona_id: int, + user_ids: list[UUID] | None, + group_ids: list[int] | None, + db_session: Session, +) -> None: + if user_ids is not None: + db_session.query(Persona__User).filter( + Persona__User.persona_id == persona_id + ).delete(synchronize_session="fetch") + + for user_uuid in user_ids: + db_session.add(Persona__User(persona_id=persona_id, user_id=user_uuid)) + + db_session.commit() + + # May cause error if someone switches down to MIT from EE + if group_ids: + raise NotImplementedError("Danswer MIT does not support private Personas") + + +def create_update_persona( + persona_id: int | None, + create_persona_request: CreatePersonaRequest, + user: User | None, + db_session: Session, +) -> PersonaSnapshot: + # Permission to actually use these is checked later + document_sets = list( + get_document_sets_by_ids( + document_set_ids=create_persona_request.document_set_ids, + db_session=db_session, + ) + ) + prompts = list( + get_prompts_by_ids( + prompt_ids=create_persona_request.prompt_ids, + db_session=db_session, + ) + ) + + try: + persona = upsert_persona( + persona_id=persona_id, + user=user, + name=create_persona_request.name, + description=create_persona_request.description, + num_chunks=create_persona_request.num_chunks, + llm_relevance_filter=create_persona_request.llm_relevance_filter, + llm_filter_extraction=create_persona_request.llm_filter_extraction, + recency_bias=create_persona_request.recency_bias, + prompts=prompts, + tool_ids=create_persona_request.tool_ids, + document_sets=document_sets, + llm_model_provider_override=create_persona_request.llm_model_provider_override, + llm_model_version_override=create_persona_request.llm_model_version_override, + starter_messages=create_persona_request.starter_messages, + is_public=create_persona_request.is_public, + db_session=db_session, + ) + + versioned_make_persona_private = fetch_versioned_implementation( + "danswer.db.persona", "make_persona_private" + ) + + # Privatize Persona + versioned_make_persona_private( + persona_id=persona.id, + user_ids=create_persona_request.users, + group_ids=create_persona_request.groups, + db_session=db_session, + ) + + except ValueError as e: + logger.exception("Failed to create persona") + raise HTTPException(status_code=400, detail=str(e)) + return PersonaSnapshot.from_model(persona) + + +def update_persona_shared_users( + persona_id: int, + user_ids: list[UUID], + user: User | None, + db_session: Session, +) -> None: + """Simplified version of `create_update_persona` which only touches the + accessibility rather than any of the logic (e.g. prompt, connected data sources, + etc.).""" + persona = fetch_persona_by_id(db_session=db_session, persona_id=persona_id) + if not persona: + raise HTTPException( + status_code=404, detail=f"Persona with ID {persona_id} not found" + ) + + check_user_can_edit_persona(user=user, persona=persona) + + if persona.is_public: + raise HTTPException(status_code=400, detail="Cannot share public persona") + + versioned_make_persona_private = fetch_versioned_implementation( + "danswer.db.persona", "make_persona_private" + ) + + # Privatize Persona + versioned_make_persona_private( + persona_id=persona_id, + user_ids=user_ids, + group_ids=None, + db_session=db_session, + ) + + +def fetch_persona_by_id(db_session: Session, persona_id: int) -> Persona | None: + return db_session.scalar(select(Persona).where(Persona.id == persona_id)) diff --git a/backend/danswer/db/pg_file_store.py b/backend/danswer/db/pg_file_store.py new file mode 100644 index 000000000..7146fc75b --- /dev/null +++ b/backend/danswer/db/pg_file_store.py @@ -0,0 +1,108 @@ +from io import BytesIO +from typing import IO + +from psycopg2.extensions import connection +from sqlalchemy.orm import Session + +from danswer.configs.constants import FileOrigin +from danswer.db.models import PGFileStore +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def get_pg_conn_from_session(db_session: Session) -> connection: + return db_session.connection().connection.connection # type: ignore + + +def create_populate_lobj( + content: IO, + db_session: Session, +) -> int: + """Note, this does not commit the changes to the DB + This is because the commit should happen with the PGFileStore row creation + That step finalizes both the Large Object and the table tracking it + """ + pg_conn = get_pg_conn_from_session(db_session) + large_object = pg_conn.lobject() + + large_object.write(content.read()) + large_object.close() + + return large_object.oid + + +def read_lobj(lobj_oid: int, db_session: Session, mode: str | None = None) -> IO: + pg_conn = get_pg_conn_from_session(db_session) + large_object = ( + pg_conn.lobject(lobj_oid, mode=mode) if mode else pg_conn.lobject(lobj_oid) + ) + return BytesIO(large_object.read()) + + +def delete_lobj_by_id( + lobj_oid: int, + db_session: Session, +) -> None: + pg_conn = get_pg_conn_from_session(db_session) + pg_conn.lobject(lobj_oid).unlink() + + +def upsert_pgfilestore( + file_name: str, + display_name: str | None, + file_origin: FileOrigin, + file_type: str, + lobj_oid: int, + db_session: Session, + commit: bool = False, + file_metadata: dict | None = None, +) -> PGFileStore: + pgfilestore = db_session.query(PGFileStore).filter_by(file_name=file_name).first() + + if pgfilestore: + try: + # This should not happen in normal execution + delete_lobj_by_id(lobj_oid=pgfilestore.lobj_oid, db_session=db_session) + except Exception: + # If the delete fails as well, the large object doesn't exist anyway and even if it + # fails to delete, it's not too terrible as most files sizes are insignificant + logger.error( + f"Failed to delete large object with oid {pgfilestore.lobj_oid}" + ) + + pgfilestore.lobj_oid = lobj_oid + else: + pgfilestore = PGFileStore( + file_name=file_name, + display_name=display_name, + file_origin=file_origin, + file_type=file_type, + file_metadata=file_metadata, + lobj_oid=lobj_oid, + ) + db_session.add(pgfilestore) + + if commit: + db_session.commit() + + return pgfilestore + + +def get_pgfilestore_by_file_name( + file_name: str, + db_session: Session, +) -> PGFileStore: + pgfilestore = db_session.query(PGFileStore).filter_by(file_name=file_name).first() + + if not pgfilestore: + raise RuntimeError(f"File by name {file_name} does not exist or was deleted") + + return pgfilestore + + +def delete_pgfilestore_by_file_name( + file_name: str, + db_session: Session, +) -> None: + db_session.query(PGFileStore).filter_by(file_name=file_name).delete() diff --git a/backend/danswer/db/pydantic_type.py b/backend/danswer/db/pydantic_type.py new file mode 100644 index 000000000..1f37152a8 --- /dev/null +++ b/backend/danswer/db/pydantic_type.py @@ -0,0 +1,32 @@ +import json +from typing import Any +from typing import Optional +from typing import Type + +from pydantic import BaseModel +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.types import TypeDecorator + + +class PydanticType(TypeDecorator): + impl = JSONB + + def __init__( + self, pydantic_model: Type[BaseModel], *args: Any, **kwargs: Any + ) -> None: + super().__init__(*args, **kwargs) + self.pydantic_model = pydantic_model + + def process_bind_param( + self, value: Optional[BaseModel], dialect: Any + ) -> Optional[dict]: + if value is not None: + return json.loads(value.json()) + return None + + def process_result_value( + self, value: Optional[dict], dialect: Any + ) -> Optional[BaseModel]: + if value is not None: + return self.pydantic_model.parse_obj(value) + return None diff --git a/backend/danswer/db/slack_bot_config.py b/backend/danswer/db/slack_bot_config.py index bbf4ff0b6..973d76244 100644 --- a/backend/danswer/db/slack_bot_config.py +++ b/backend/danswer/db/slack_bot_config.py @@ -11,7 +11,8 @@ from danswer.db.models import ChannelConfig from danswer.db.models import Persona from danswer.db.models import Persona__DocumentSet from danswer.db.models import SlackBotConfig -from danswer.search.models import RecencyBiasSetting +from danswer.db.models import SlackBotResponseType +from danswer.search.enums import RecencyBiasSetting def _build_persona_name(channel_names: list[str]) -> str: @@ -48,7 +49,7 @@ def create_slack_bot_persona( # create/update persona associated with the slack bot persona_name = _build_persona_name(channel_names) persona = upsert_persona( - user_id=None, # Slack Bot Personas are not attached to users + user=None, # Slack Bot Personas are not attached to users persona_id=existing_persona_id, name=persona_name, description="", @@ -58,8 +59,10 @@ def create_slack_bot_persona( recency_bias=RecencyBiasSetting.AUTO, prompts=None, document_sets=document_sets, + llm_model_provider_override=None, llm_model_version_override=None, - shared=True, + starter_messages=None, + is_public=True, default_persona=False, db_session=db_session, commit=False, @@ -71,11 +74,13 @@ def create_slack_bot_persona( def insert_slack_bot_config( persona_id: int | None, channel_config: ChannelConfig, + response_type: SlackBotResponseType, db_session: Session, ) -> SlackBotConfig: slack_bot_config = SlackBotConfig( persona_id=persona_id, channel_config=channel_config, + response_type=response_type, ) db_session.add(slack_bot_config) db_session.commit() @@ -87,6 +92,7 @@ def update_slack_bot_config( slack_bot_config_id: int, persona_id: int | None, channel_config: ChannelConfig, + response_type: SlackBotResponseType, db_session: Session, ) -> SlackBotConfig: slack_bot_config = db_session.scalar( @@ -104,6 +110,7 @@ def update_slack_bot_config( # will encounter `violates foreign key constraint` errors slack_bot_config.persona_id = persona_id slack_bot_config.channel_config = channel_config + slack_bot_config.response_type = response_type # if the persona has changed, then clean up the old persona if persona_id != existing_persona_id and existing_persona_id: diff --git a/backend/danswer/db/swap_index.py b/backend/danswer/db/swap_index.py new file mode 100644 index 000000000..f14a45f29 --- /dev/null +++ b/backend/danswer/db/swap_index.py @@ -0,0 +1,60 @@ +from sqlalchemy.orm import Session + +from danswer.db.connector_credential_pair import get_connector_credential_pairs +from danswer.db.connector_credential_pair import resync_cc_pair +from danswer.db.embedding_model import get_current_db_embedding_model +from danswer.db.embedding_model import get_secondary_db_embedding_model +from danswer.db.embedding_model import update_embedding_model_status +from danswer.db.enums import IndexModelStatus +from danswer.db.index_attempt import cancel_indexing_attempts_past_model +from danswer.db.index_attempt import ( + count_unique_cc_pairs_with_successful_index_attempts, +) +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def check_index_swap(db_session: Session) -> None: + """Get count of cc-pairs and count of successful index_attempts for the + new model grouped by connector + credential, if it's the same, then assume + new index is done building. If so, swap the indices and expire the old one.""" + # Default CC-pair created for Ingestion API unused here + all_cc_pairs = get_connector_credential_pairs(db_session) + cc_pair_count = max(len(all_cc_pairs) - 1, 0) + embedding_model = get_secondary_db_embedding_model(db_session) + + if not embedding_model: + return + + unique_cc_indexings = count_unique_cc_pairs_with_successful_index_attempts( + embedding_model_id=embedding_model.id, db_session=db_session + ) + + # Index Attempts are cleaned up as well when the cc-pair is deleted so the logic in this + # function is correct. The unique_cc_indexings are specifically for the existing cc-pairs + if unique_cc_indexings > cc_pair_count: + logger.error("More unique indexings than cc pairs, should not occur") + + if cc_pair_count == 0 or cc_pair_count == unique_cc_indexings: + # Swap indices + now_old_embedding_model = get_current_db_embedding_model(db_session) + update_embedding_model_status( + embedding_model=now_old_embedding_model, + new_status=IndexModelStatus.PAST, + db_session=db_session, + ) + + update_embedding_model_status( + embedding_model=embedding_model, + new_status=IndexModelStatus.PRESENT, + db_session=db_session, + ) + + if cc_pair_count > 0: + # Expire jobs for the now past index/embedding model + cancel_indexing_attempts_past_model(db_session) + + # Recount aggregates + for cc_pair in all_cc_pairs: + resync_cc_pair(cc_pair, db_session=db_session) diff --git a/backend/danswer/db/tag.py b/backend/danswer/db/tag.py index bf70f7308..240065a17 100644 --- a/backend/danswer/db/tag.py +++ b/backend/danswer/db/tag.py @@ -12,13 +12,30 @@ from danswer.utils.logger import setup_logger logger = setup_logger() +def check_tag_validity(tag_key: str, tag_value: str) -> bool: + """If a tag is too long, it should not be used (it will cause an error in Postgres + as the unique constraint can only apply to entries that are less than 2704 bytes). + + Additionally, extremely long tags are not really usable / useful.""" + if len(tag_key) + len(tag_value) > 255: + logger.error( + f"Tag with key '{tag_key}' and value '{tag_value}' is too long, cannot be used" + ) + return False + + return True + + def create_or_add_document_tag( tag_key: str, tag_value: str, source: DocumentSource, document_id: str, db_session: Session, -) -> Tag: +) -> Tag | None: + if not check_tag_validity(tag_key, tag_value): + return None + document = db_session.get(Document, document_id) if not document: raise ValueError("Invalid Document, cannot attach Tags") @@ -48,22 +65,35 @@ def create_or_add_document_tag_list( document_id: str, db_session: Session, ) -> list[Tag]: + valid_tag_values = [ + tag_value for tag_value in tag_values if check_tag_validity(tag_key, tag_value) + ] + if not valid_tag_values: + return [] + document = db_session.get(Document, document_id) if not document: raise ValueError("Invalid Document, cannot attach Tags") existing_tags_stmt = select(Tag).where( - Tag.tag_key == tag_key, Tag.tag_value.in_(tag_values), Tag.source == source + Tag.tag_key == tag_key, + Tag.tag_value.in_(valid_tag_values), + Tag.source == source, ) existing_tags = list(db_session.execute(existing_tags_stmt).scalars().all()) existing_tag_values = {tag.tag_value for tag in existing_tags} new_tags = [] - for tag_value in tag_values: + for tag_value in valid_tag_values: if tag_value not in existing_tag_values: new_tag = Tag(tag_key=tag_key, tag_value=tag_value, source=source) db_session.add(new_tag) new_tags.append(new_tag) + existing_tag_values.add(tag_value) + + logger.debug( + f"Created new tags: {', '.join([f'{tag.tag_key}:{tag.tag_value}' for tag in new_tags])}" + ) all_tags = existing_tags + new_tags @@ -94,12 +124,11 @@ def get_tags_by_value_prefix_for_source_types( return list(tags) -def delete_document_tags_for_documents( +def delete_document_tags_for_documents__no_commit( document_ids: list[str], db_session: Session ) -> None: stmt = delete(Document__Tag).where(Document__Tag.document_id.in_(document_ids)) db_session.execute(stmt) - db_session.commit() orphan_tags_query = ( select(Tag.id) @@ -113,4 +142,3 @@ def delete_document_tags_for_documents( if orphan_tags: delete_orphan_tags_stmt = delete(Tag).where(Tag.id.in_(orphan_tags)) db_session.execute(delete_orphan_tags_stmt) - db_session.commit() diff --git a/backend/danswer/document_index/document_index_utils.py b/backend/danswer/document_index/document_index_utils.py index 51e6433cb..271fd0cc2 100644 --- a/backend/danswer/document_index/document_index_utils.py +++ b/backend/danswer/document_index/document_index_utils.py @@ -6,7 +6,7 @@ from sqlalchemy.orm import Session from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.embedding_model import get_secondary_db_embedding_model from danswer.indexing.models import IndexChunk -from danswer.indexing.models import InferenceChunk +from danswer.search.models import InferenceChunk DEFAULT_BATCH_SIZE = 30 diff --git a/backend/danswer/document_index/interfaces.py b/backend/danswer/document_index/interfaces.py index e528504aa..6adedd452 100644 --- a/backend/danswer/document_index/interfaces.py +++ b/backend/danswer/document_index/interfaces.py @@ -5,8 +5,8 @@ from typing import Any from danswer.access.models import DocumentAccess from danswer.indexing.models import DocMetadataAwareIndexChunk -from danswer.indexing.models import InferenceChunk from danswer.search.models import IndexFilters +from danswer.search.models import InferenceChunk @dataclass(frozen=True) @@ -17,6 +17,11 @@ class DocumentInsertionRecord: @dataclass class DocumentMetadata: + """ + Document information that needs to be inserted into Postgres on first time encountering this + document during indexing across any of the connectors. + """ + connector_id: int credential_id: int document_id: str @@ -32,11 +37,13 @@ class DocumentMetadata: @dataclass class UpdateRequest: - """For all document_ids, update the allowed_users and the boost to the new value - ignore if None""" + """ + For all document_ids, update the allowed_users and the boost to the new values + Does not update any of the None fields + """ document_ids: list[str] - # all other fields will be left alone + # all other fields except these 4 will always be left alone by the update request access: DocumentAccess | None = None document_sets: set[str] | None = None boost: float | None = None @@ -44,6 +51,18 @@ class UpdateRequest: class Verifiable(abc.ABC): + """ + Class must implement document index schema verification. For example, verify that all of the + necessary attributes for indexing, querying, filtering, and fields to return from search are + all valid in the schema. + + Parameters: + - index_name: The name of the primary index currently used for querying + - secondary_index_name: The name of the secondary index being built in the background, if it + currently exists. Some functions on the document index act on both the primary and + secondary index, some act on just one. + """ + @abc.abstractmethod def __init__( self, @@ -62,45 +81,139 @@ class Verifiable(abc.ABC): index_embedding_dim: int, secondary_index_embedding_dim: int | None, ) -> None: + """ + Verify that the document index exists and is consistent with the expectations in the code. + + Parameters: + - index_embedding_dim: Vector dimensionality for the vector similarity part of the search + - secondary_index_embedding_dim: Vector dimensionality of the secondary index being built + behind the scenes. The secondary index should only be built when switching + embedding models therefore this dim should be different from the primary index. + """ raise NotImplementedError class Indexable(abc.ABC): + """ + Class must implement the ability to index document chunks + """ + @abc.abstractmethod def index( self, chunks: list[DocMetadataAwareIndexChunk], ) -> set[DocumentInsertionRecord]: - """Indexes document chunks into the Document Index and return the IDs of all the documents indexed""" + """ + Takes a list of document chunks and indexes them in the document index + + NOTE: When a document is reindexed/updated here, it must clear all of the existing document + chunks before reindexing. This is because the document may have gotten shorter since the + last run. Therefore, upserting the first 0 through n chunks may leave some old chunks that + have not been written over. + + NOTE: The chunks of a document are never separated into separate index() calls. So there is + no worry of receiving the first 0 through n chunks in one index call and the next n through + m chunks of a docu in the next index call. + + NOTE: Due to some asymmetry between the primary and secondary indexing logic, this function + only needs to index chunks into the PRIMARY index. Do not update the secondary index here, + it is done automatically outside of this code. + + Parameters: + - chunks: Document chunks with all of the information needed for indexing to the document + index. + + Returns: + List of document ids which map to unique documents and are used for deduping chunks + when updating, as well as if the document is newly indexed or already existed and + just updated + """ raise NotImplementedError class Deletable(abc.ABC): + """ + Class must implement the ability to delete document by their unique document ids. + """ + @abc.abstractmethod def delete(self, doc_ids: list[str]) -> None: - """Removes the specified documents from the Index""" + """ + Given a list of document ids, hard delete them from the document index + + Parameters: + - doc_ids: list of document ids as specified by the connector + """ raise NotImplementedError class Updatable(abc.ABC): + """ + Class must implement the ability to update certain attributes of a document without needing to + update all of the fields. Specifically, needs to be able to update: + - Access Control List + - Document-set membership + - Boost value (learning from feedback mechanism) + - Whether the document is hidden or not, hidden documents are not returned from search + """ + @abc.abstractmethod def update(self, update_requests: list[UpdateRequest]) -> None: - """Updates metadata for the specified documents sets in the Index""" + """ + Updates some set of chunks. The document and fields to update are specified in the update + requests. Each update request in the list applies its changes to a list of document ids. + None values mean that the field does not need an update. + + Parameters: + - update_requests: for a list of document ids in the update request, apply the same updates + to all of the documents with those ids. This is for bulk handling efficiency. Many + updates are done at the connector level which have many documents for the connector + """ raise NotImplementedError class IdRetrievalCapable(abc.ABC): + """ + Class must implement the ability to retrieve either: + - all of the chunks of a document IN ORDER given a document id. + - a specific chunk given a document id and a chunk index (0 based) + """ + @abc.abstractmethod def id_based_retrieval( self, document_id: str, - chunk_ind: int | None, - filters: IndexFilters, + min_chunk_ind: int | None, + max_chunk_ind: int | None, + user_access_control_list: list[str] | None = None, ) -> list[InferenceChunk]: + """ + Fetch chunk(s) based on document id + + NOTE: This is used to reconstruct a full document or an extended (multi-chunk) section + of a document. Downstream currently assumes that the chunking does not introduce overlaps + between the chunks. If there are overlaps for the chunks, then the reconstructed document + or extended section will have duplicate segments. + + Parameters: + - document_id: document id for which to retrieve the chunk(s) + - min_chunk_ind: if None then fetch from the start of doc + - max_chunk_ind: + - filters: standard filters object, in this case only the access filter is applied as a + permission check + + Returns: + list of chunks for the document id or the specific chunk by the specified chunk index + and document id + """ raise NotImplementedError class KeywordCapable(abc.ABC): + """ + Class must implement the keyword search functionality + """ + @abc.abstractmethod def keyword_retrieval( self, @@ -110,10 +223,36 @@ class KeywordCapable(abc.ABC): num_to_retrieve: int, offset: int = 0, ) -> list[InferenceChunk]: + """ + Run keyword search and return a list of chunks. Inference chunks are chunks with all of the + information required for query time purposes. For example, some details of the document + required at indexing time are no longer needed past this point. At the same time, the + matching keywords need to be highlighted. + + NOTE: the query passed in here is the unprocessed plain text query. Preprocessing is + expected to be handled by this function as it may depend on the index implementation. + Things like query expansion, synonym injection, stop word removal, lemmatization, etc. are + done here. + + Parameters: + - query: unmodified user query + - filters: standard filter object + - time_decay_multiplier: how much to decay the document scores as they age. Some queries + based on the persona settings, will have this be a 2x or 3x of the default + - num_to_retrieve: number of highest matching chunks to return + - offset: number of highest matching chunks to skip (kind of like pagination) + + Returns: + best matching chunks based on keyword matching (should be BM25 algorithm ideally) + """ raise NotImplementedError class VectorCapable(abc.ABC): + """ + Class must implement the vector/semantic search functionality + """ + @abc.abstractmethod def semantic_retrieval( self, @@ -124,10 +263,31 @@ class VectorCapable(abc.ABC): num_to_retrieve: int, offset: int = 0, ) -> list[InferenceChunk]: + """ + Run vector/semantic search and return a list of inference chunks. + + Parameters: + - query: unmodified user query. This is needed for getting the matching highlighted + keywords + - query_embedding: vector representation of the query, must be of the correct + dimensionality for the primary index + - filters: standard filter object + - time_decay_multiplier: how much to decay the document scores as they age. Some queries + based on the persona settings, will have this be a 2x or 3x of the default + - num_to_retrieve: number of highest matching chunks to return + - offset: number of highest matching chunks to skip (kind of like pagination) + + Returns: + best matching chunks based on vector similarity + """ raise NotImplementedError class HybridCapable(abc.ABC): + """ + Class must implement hybrid (keyword + vector) search functionality + """ + @abc.abstractmethod def hybrid_retrieval( self, @@ -139,10 +299,48 @@ class HybridCapable(abc.ABC): offset: int = 0, hybrid_alpha: float | None = None, ) -> list[InferenceChunk]: + """ + Run hybrid search and return a list of inference chunks. + + NOTE: the query passed in here is the unprocessed plain text query. Preprocessing is + expected to be handled by this function as it may depend on the index implementation. + Things like query expansion, synonym injection, stop word removal, lemmatization, etc. are + done here. + + Parameters: + - query: unmodified user query. This is needed for getting the matching highlighted + keywords + - query_embedding: vector representation of the query, must be of the correct + dimensionality for the primary index + - filters: standard filter object + - time_decay_multiplier: how much to decay the document scores as they age. Some queries + based on the persona settings, will have this be a 2x or 3x of the default + - num_to_retrieve: number of highest matching chunks to return + - offset: number of highest matching chunks to skip (kind of like pagination) + - hybrid_alpha: weighting between the keyword and vector search results. It is important + that the two scores are normalized to the same range so that a meaningful + comparison can be made. 1 for 100% weighting on vector score, 0 for 100% weighting + on keyword score. + + Returns: + best matching chunks based on weighted sum of keyword and vector/semantic search scores + """ raise NotImplementedError class AdminCapable(abc.ABC): + """ + Class must implement a search for the admin "Explorer" page. The assumption here is that the + admin is not "searching" for knowledge but has some document already in mind. They are either + looking to positively boost it because they know it's a good reference document, looking to + negatively boost it as a way of "deprecating", or hiding the document. + + Assuming the admin knows the document name, this search has high emphasis on the title match. + + Suggested implementation: + Keyword only, BM25 search with 5x weighting on the title field compared to the contents + """ + @abc.abstractmethod def admin_retrieval( self, @@ -151,34 +349,46 @@ class AdminCapable(abc.ABC): num_to_retrieve: int, offset: int = 0, ) -> list[InferenceChunk]: + """ + Run the special search for the admin document explorer page + + Parameters: + - query: unmodified user query. Though in this flow probably unmodified is best + - filters: standard filter object + - num_to_retrieve: number of highest matching chunks to return + - offset: number of highest matching chunks to skip (kind of like pagination) + + Returns: + list of best matching chunks for the explorer page query + """ raise NotImplementedError class BaseIndex( Verifiable, - AdminCapable, - IdRetrievalCapable, Indexable, Updatable, Deletable, + AdminCapable, + IdRetrievalCapable, abc.ABC, ): - """All basic functionalities excluding a specific retrieval approach - Indices need to be able to - - Check that the index exists with a schema definition - - Can index documents - - Can delete documents - - Can update document metadata (such as access permissions and document specific boost) + """ + All basic document index functionalities excluding the actual querying approach. + + As a summary, document indices need to be able to + - Verify the schema definition is valid + - Index new documents + - Update specific attributes of existing documents + - Delete documents + - Provide a search for the admin document explorer page + - Retrieve documents based on document id """ -class KeywordIndex(KeywordCapable, BaseIndex, abc.ABC): - pass - - -class VectorIndex(VectorCapable, BaseIndex, abc.ABC): - pass - - class DocumentIndex(KeywordCapable, VectorCapable, HybridCapable, BaseIndex, abc.ABC): - pass + """ + A valid document index that can plug into all Danswer flows must implement all of these + functionalities, though "technically" it does not need to be keyword or vector capable as + currently all default search flows use Hybrid Search. + """ diff --git a/backend/danswer/document_index/vespa/index.py b/backend/danswer/document_index/vespa/index.py index 4d58bee22..1ec5fd787 100644 --- a/backend/danswer/document_index/vespa/index.py +++ b/backend/danswer/document_index/vespa/index.py @@ -5,7 +5,6 @@ import os import string import time import zipfile -from collections.abc import Callable from collections.abc import Mapping from dataclasses import dataclass from datetime import datetime @@ -20,6 +19,7 @@ import requests from retry import retry from danswer.configs.app_configs import LOG_VESPA_TIMING_INFORMATION +from danswer.configs.app_configs import VESPA_CONFIG_SERVER_HOST from danswer.configs.app_configs import VESPA_HOST from danswer.configs.app_configs import VESPA_PORT from danswer.configs.app_configs import VESPA_TENANT_PORT @@ -62,13 +62,12 @@ from danswer.document_index.interfaces import DocumentInsertionRecord from danswer.document_index.interfaces import UpdateRequest from danswer.document_index.vespa.utils import remove_invalid_unicode_chars from danswer.indexing.models import DocMetadataAwareIndexChunk -from danswer.indexing.models import InferenceChunk from danswer.search.models import IndexFilters -from danswer.search.search_runner import query_processing -from danswer.search.search_runner import remove_stop_words_and_punctuation +from danswer.search.models import InferenceChunk +from danswer.search.retrieval.search_runner import query_processing +from danswer.search.retrieval.search_runner import remove_stop_words_and_punctuation from danswer.utils.batching import batch_generator from danswer.utils.logger import setup_logger -from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel logger = setup_logger() @@ -76,17 +75,22 @@ VESPA_DIM_REPLACEMENT_PAT = "VARIABLE_DIM" DANSWER_CHUNK_REPLACEMENT_PAT = "DANSWER_CHUNK_NAME" DOCUMENT_REPLACEMENT_PAT = "DOCUMENT_REPLACEMENT" DATE_REPLACEMENT = "DATE_REPLACEMENT" -VESPA_CONFIG_SERVER_URL = f"http://{VESPA_HOST}:{VESPA_TENANT_PORT}" -VESPA_APP_CONTAINER_URL = f"http://{VESPA_HOST}:{VESPA_PORT}" + +# config server +VESPA_CONFIG_SERVER_URL = f"http://{VESPA_CONFIG_SERVER_HOST}:{VESPA_TENANT_PORT}" VESPA_APPLICATION_ENDPOINT = f"{VESPA_CONFIG_SERVER_URL}/application/v2" + +# main search application +VESPA_APP_CONTAINER_URL = f"http://{VESPA_HOST}:{VESPA_PORT}" # danswer_chunk below is defined in vespa/app_configs/schemas/danswer_chunk.sd DOCUMENT_ID_ENDPOINT = ( f"{VESPA_APP_CONTAINER_URL}/document/v1/default/{{index_name}}/docid" ) SEARCH_ENDPOINT = f"{VESPA_APP_CONTAINER_URL}/search/" -_BATCH_SIZE = 100 # Specific to Vespa + +_BATCH_SIZE = 128 # Specific to Vespa _NUM_THREADS = ( - 16 # since Vespa doesn't allow batching of inserts / updates, we use threads + 32 # since Vespa doesn't allow batching of inserts / updates, we use threads ) # up from 500ms for now, since we've seen quite a few timeouts # in the long term, we are looking to improve the performance of Vespa @@ -112,13 +116,13 @@ def _does_document_exist( """Returns whether the document already exists and the users/group whitelists Specifically in this case, document refers to a vespa document which is equivalent to a Danswer chunk. This checks for whether the chunk exists already in the index""" - doc_fetch_response = http_client.get( - f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}" - ) + doc_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}" + doc_fetch_response = http_client.get(doc_url) if doc_fetch_response.status_code == 404: return False if doc_fetch_response.status_code != 200: + logger.debug(f"Failed to check for document with URL {doc_url}") raise RuntimeError( f"Unexpected fetch document by ID value from Vespa " f"with error {doc_fetch_response.status_code}" @@ -136,38 +140,100 @@ def _vespa_get_updated_at_attribute(t: datetime | None) -> int | None: return int(t.timestamp()) -def _get_vespa_chunk_ids_by_document_id( +def _get_vespa_chunks_by_document_id( document_id: str, index_name: str, - hits_per_page: int = _BATCH_SIZE, - index_filters: IndexFilters | None = None, -) -> list[str]: - filters_str = ( - _build_vespa_filters(filters=index_filters, include_hidden=True) - if index_filters is not None - else "" + user_access_control_list: list[str] | None = None, + min_chunk_ind: int | None = None, + max_chunk_ind: int | None = None, + field_names: list[str] | None = None, +) -> list[dict]: + # Constructing the URL for the Visit API + # NOTE: visit API uses the same URL as the document API, but with different params + url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name) + + # build the list of fields to retrieve + field_set_list = ( + None + if not field_names + else [f"{index_name}:{field_name}" for field_name in field_names] ) + acl_fieldset_entry = f"{index_name}:{ACCESS_CONTROL_LIST}" + if ( + field_set_list + and user_access_control_list + and acl_fieldset_entry not in field_set_list + ): + field_set_list.append(acl_fieldset_entry) + field_set = ",".join(field_set_list) if field_set_list else None - offset = 0 - doc_chunk_ids = [] - params: dict[str, int | str] = { - "yql": f"select documentid from {index_name} where {filters_str}document_id contains '{document_id}'", - "timeout": "10s", - "offset": offset, - "hits": hits_per_page, + # build filters + selection = f"{index_name}.document_id=='{document_id}'" + if min_chunk_ind is not None: + selection += f" and {index_name}.chunk_id>={min_chunk_ind}" + if max_chunk_ind is not None: + selection += f" and {index_name}.chunk_id<={max_chunk_ind}" + + # Setting up the selection criteria in the query parameters + params = { + # NOTE: Document Selector Language doesn't allow `contains`, so we can't check + # for the ACL in the selection. Instead, we have to check as a postfilter + "selection": selection, + "continuation": None, + "wantedDocumentCount": 1_000, + "fieldSet": field_set, } + + document_chunks: list[dict] = [] while True: - results = requests.post(SEARCH_ENDPOINT, json=params).json() - hits = results["root"].get("children", []) + response = requests.get(url, params=params) + try: + response.raise_for_status() + except requests.HTTPError as e: + request_info = f"Headers: {response.request.headers}\nPayload: {params}" + response_info = f"Status Code: {response.status_code}\nResponse Content: {response.text}" + error_base = f"Error occurred getting chunk by Document ID {document_id}" + logger.error( + f"{error_base}:\n" + f"{request_info}\n" + f"{response_info}\n" + f"Exception: {e}" + ) + raise requests.HTTPError(error_base) from e - doc_chunk_ids.extend( - [hit["fields"]["documentid"].split("::", 1)[-1] for hit in hits] - ) - params["offset"] += hits_per_page # type: ignore + # Check if the response contains any documents + response_data = response.json() + if "documents" in response_data: + for document in response_data["documents"]: + if user_access_control_list: + document_acl = document["fields"].get(ACCESS_CONTROL_LIST) + if not document_acl or not any( + user_acl_entry in document_acl + for user_acl_entry in user_access_control_list + ): + continue + document_chunks.append(document) + document_chunks.extend(response_data["documents"]) - if len(hits) < hits_per_page: - break - return doc_chunk_ids + # Check for continuation token to handle pagination + if "continuation" in response_data and response_data["continuation"]: + params["continuation"] = response_data["continuation"] + else: + break # Exit loop if no continuation token + + return document_chunks + + +def _get_vespa_chunk_ids_by_document_id( + document_id: str, index_name: str, user_access_control_list: list[str] | None = None +) -> list[str]: + document_chunks = _get_vespa_chunks_by_document_id( + document_id=document_id, + index_name=index_name, + user_access_control_list=user_access_control_list, + field_names=[DOCUMENT_ID], + ) + return [chunk["id"].split("::", 1)[-1] for chunk in document_chunks] @retry(tries=3, delay=1, backoff=2) @@ -179,10 +245,14 @@ def _delete_vespa_doc_chunks( ) for chunk_id in doc_chunk_ids: - res = http_client.delete( - f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{chunk_id}" - ) - res.raise_for_status() + try: + res = http_client.delete( + f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{chunk_id}" + ) + res.raise_for_status() + except httpx.HTTPStatusError as e: + logger.error(f"Failed to delete chunk, details: {e.response.text}") + raise def _delete_vespa_docs( @@ -559,18 +629,35 @@ def _query_vespa(query_params: Mapping[str, str | int | float]) -> list[Inferenc if "query" in query_params and not cast(str, query_params["query"]).strip(): raise ValueError("No/empty query received") + params = dict( + **query_params, + **{ + "presentation.timing": True, + } + if LOG_VESPA_TIMING_INFORMATION + else {}, + ) + response = requests.post( SEARCH_ENDPOINT, - json=dict( - **query_params, - **{ - "presentation.timing": True, - } - if LOG_VESPA_TIMING_INFORMATION - else {}, - ), + json=params, ) - response.raise_for_status() + try: + response.raise_for_status() + except requests.HTTPError as e: + request_info = f"Headers: {response.request.headers}\nPayload: {params}" + response_info = ( + f"Status Code: {response.status_code}\n" + f"Response Content: {response.text}" + ) + error_base = "Failed to query Vespa" + logger.error( + f"{error_base}:\n" + f"{request_info}\n" + f"{response_info}\n" + f"Exception: {e}" + ) + raise requests.HTTPError(error_base) from e response_json: dict[str, Any] = response.json() if LOG_VESPA_TIMING_INFORMATION: @@ -760,9 +847,46 @@ class VespaIndex(DocumentIndex): def update(self, update_requests: list[UpdateRequest]) -> None: logger.info(f"Updating {len(update_requests)} documents in Vespa") - start = time.time() + update_start = time.monotonic() processed_updates_requests: list[_VespaUpdateRequest] = [] + all_doc_chunk_ids: dict[str, list[str]] = {} + + # Fetch all chunks for each document ahead of time + index_names = [self.index_name] + if self.secondary_index_name: + index_names.append(self.secondary_index_name) + + chunk_id_start_time = time.monotonic() + with concurrent.futures.ThreadPoolExecutor( + max_workers=_NUM_THREADS + ) as executor: + future_to_doc_chunk_ids = { + executor.submit( + _get_vespa_chunk_ids_by_document_id, + document_id=document_id, + index_name=index_name, + ): (document_id, index_name) + for index_name in index_names + for update_request in update_requests + for document_id in update_request.document_ids + } + for future in concurrent.futures.as_completed(future_to_doc_chunk_ids): + document_id, index_name = future_to_doc_chunk_ids[future] + try: + doc_chunk_ids = future.result() + if document_id not in all_doc_chunk_ids: + all_doc_chunk_ids[document_id] = [] + all_doc_chunk_ids[document_id].extend(doc_chunk_ids) + except Exception as e: + logger.error( + f"Error retrieving chunk IDs for document {document_id} in index {index_name}: {e}" + ) + logger.debug( + f"Took {time.monotonic() - chunk_id_start_time:.2f} seconds to fetch all Vespa chunk IDs" + ) + + # Build the _VespaUpdateRequest objects for update_request in update_requests: update_dict: dict[str, dict] = {"fields": {}} if update_request.boost is not None: @@ -786,32 +910,26 @@ class VespaIndex(DocumentIndex): logger.error("Update request received but nothing to update") continue - index_names = [self.index_name] - if self.secondary_index_name: - index_names.append(self.secondary_index_name) - - for index_name in index_names: - for document_id in update_request.document_ids: - for doc_chunk_id in _get_vespa_chunk_ids_by_document_id( - document_id=document_id, index_name=index_name - ): - processed_updates_requests.append( - _VespaUpdateRequest( - document_id=document_id, - url=f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}", - update_request=update_dict, - ) + for document_id in update_request.document_ids: + for doc_chunk_id in all_doc_chunk_ids[document_id]: + processed_updates_requests.append( + _VespaUpdateRequest( + document_id=document_id, + url=f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}/{doc_chunk_id}", + update_request=update_dict, ) + ) self._apply_updates_batched(processed_updates_requests) logger.info( - "Finished updating Vespa documents in %s seconds", time.time() - start + "Finished updating Vespa documents in %.2f seconds", + time.monotonic() - update_start, ) def delete(self, doc_ids: list[str]) -> None: logger.info(f"Deleting {len(doc_ids)} documents from Vespa") - # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficient for + # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for # indexing / updates / deletes since we have to make a large volume of requests. with httpx.Client(http2=True) as http_client: index_names = [self.index_name] @@ -826,41 +944,26 @@ class VespaIndex(DocumentIndex): def id_based_retrieval( self, document_id: str, - chunk_ind: int | None, - filters: IndexFilters, + min_chunk_ind: int | None, + max_chunk_ind: int | None, + user_access_control_list: list[str] | None = None, ) -> list[InferenceChunk]: - if chunk_ind is None: - vespa_chunk_ids = _get_vespa_chunk_ids_by_document_id( - document_id=document_id, - index_name=self.index_name, - index_filters=filters, - ) + vespa_chunks = _get_vespa_chunks_by_document_id( + document_id=document_id, + index_name=self.index_name, + user_access_control_list=user_access_control_list, + min_chunk_ind=min_chunk_ind, + max_chunk_ind=max_chunk_ind, + ) - if not vespa_chunk_ids: - return [] + if not vespa_chunks: + return [] - functions_with_args: list[tuple[Callable, tuple]] = [ - (_inference_chunk_by_vespa_id, (vespa_chunk_id, self.index_name)) - for vespa_chunk_id in vespa_chunk_ids - ] - - logger.debug( - "Running LLM usefulness eval in parallel (following logging may be out of order)" - ) - inference_chunks = run_functions_tuples_in_parallel( - functions_with_args, allow_failures=True - ) - inference_chunks.sort(key=lambda chunk: chunk.chunk_id) - return inference_chunks - - else: - filters_str = _build_vespa_filters(filters=filters, include_hidden=True) - yql = ( - VespaIndex.yql_base.format(index_name=self.index_name) - + filters_str - + f"({DOCUMENT_ID} contains '{document_id}' and {CHUNK_ID} contains '{chunk_ind}')" - ) - return _query_vespa({"yql": yql}) + inference_chunks = [ + _vespa_hit_to_inference_chunk(chunk) for chunk in vespa_chunks + ] + inference_chunks.sort(key=lambda chunk: chunk.chunk_id) + return inference_chunks def keyword_retrieval( self, diff --git a/backend/danswer/dynamic_configs/__init__.py b/backend/danswer/dynamic_configs/__init__.py index 0fc2233fa..e69de29bb 100644 --- a/backend/danswer/dynamic_configs/__init__.py +++ b/backend/danswer/dynamic_configs/__init__.py @@ -1,13 +0,0 @@ -from danswer.configs.app_configs import DYNAMIC_CONFIG_DIR_PATH -from danswer.configs.app_configs import DYNAMIC_CONFIG_STORE -from danswer.dynamic_configs.file_system.store import FileSystemBackedDynamicConfigStore -from danswer.dynamic_configs.interface import DynamicConfigStore - - -def get_dynamic_config_store() -> DynamicConfigStore: - dynamic_config_store_type = DYNAMIC_CONFIG_STORE - if dynamic_config_store_type == FileSystemBackedDynamicConfigStore.__name__: - return FileSystemBackedDynamicConfigStore(DYNAMIC_CONFIG_DIR_PATH) - - # TODO: change exception type - raise Exception("Unknown dynamic config store type") diff --git a/backend/danswer/dynamic_configs/factory.py b/backend/danswer/dynamic_configs/factory.py new file mode 100644 index 000000000..44b6e096b --- /dev/null +++ b/backend/danswer/dynamic_configs/factory.py @@ -0,0 +1,15 @@ +from danswer.configs.app_configs import DYNAMIC_CONFIG_STORE +from danswer.dynamic_configs.interface import DynamicConfigStore +from danswer.dynamic_configs.store import FileSystemBackedDynamicConfigStore +from danswer.dynamic_configs.store import PostgresBackedDynamicConfigStore + + +def get_dynamic_config_store() -> DynamicConfigStore: + dynamic_config_store_type = DYNAMIC_CONFIG_STORE + if dynamic_config_store_type == FileSystemBackedDynamicConfigStore.__name__: + raise NotImplementedError("File based config store no longer supported") + if dynamic_config_store_type == PostgresBackedDynamicConfigStore.__name__: + return PostgresBackedDynamicConfigStore() + + # TODO: change exception type + raise Exception("Unknown dynamic config store type") diff --git a/backend/danswer/dynamic_configs/file_system/store.py b/backend/danswer/dynamic_configs/file_system/store.py deleted file mode 100644 index 75cc0d740..000000000 --- a/backend/danswer/dynamic_configs/file_system/store.py +++ /dev/null @@ -1,48 +0,0 @@ -import json -import os -from pathlib import Path -from typing import cast - -from filelock import FileLock - -from danswer.dynamic_configs.interface import ConfigNotFoundError -from danswer.dynamic_configs.interface import DynamicConfigStore -from danswer.dynamic_configs.interface import JSON_ro - - -FILE_LOCK_TIMEOUT = 10 - - -def _get_file_lock(file_name: Path) -> FileLock: - return FileLock(file_name.with_suffix(".lock")) - - -class FileSystemBackedDynamicConfigStore(DynamicConfigStore): - def __init__(self, dir_path: str) -> None: - # TODO (chris): maybe require all possible keys to be passed in - # at app start somehow to prevent key overlaps - self.dir_path = Path(dir_path) - - def store(self, key: str, val: JSON_ro) -> None: - file_path = self.dir_path / key - lock = _get_file_lock(file_path) - with lock.acquire(timeout=FILE_LOCK_TIMEOUT): - with open(file_path, "w+") as f: - json.dump(val, f) - - def load(self, key: str) -> JSON_ro: - file_path = self.dir_path / key - if not file_path.exists(): - raise ConfigNotFoundError - lock = _get_file_lock(file_path) - with lock.acquire(timeout=FILE_LOCK_TIMEOUT): - with open(self.dir_path / key) as f: - return cast(JSON_ro, json.load(f)) - - def delete(self, key: str) -> None: - file_path = self.dir_path / key - if not file_path.exists(): - raise ConfigNotFoundError - lock = _get_file_lock(file_path) - with lock.acquire(timeout=FILE_LOCK_TIMEOUT): - os.remove(file_path) diff --git a/backend/danswer/dynamic_configs/interface.py b/backend/danswer/dynamic_configs/interface.py index da5adfa1a..999ad9396 100644 --- a/backend/danswer/dynamic_configs/interface.py +++ b/backend/danswer/dynamic_configs/interface.py @@ -15,7 +15,7 @@ class ConfigNotFoundError(Exception): class DynamicConfigStore: @abc.abstractmethod - def store(self, key: str, val: JSON_ro) -> None: + def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None: raise NotImplementedError @abc.abstractmethod diff --git a/backend/danswer/dynamic_configs/port_configs.py b/backend/danswer/dynamic_configs/port_configs.py new file mode 100644 index 000000000..809c06cbf --- /dev/null +++ b/backend/danswer/dynamic_configs/port_configs.py @@ -0,0 +1,115 @@ +import json +from pathlib import Path +from typing import cast + +from danswer.configs.constants import GEN_AI_API_KEY_STORAGE_KEY +from danswer.configs.model_configs import FAST_GEN_AI_MODEL_VERSION +from danswer.configs.model_configs import GEN_AI_API_ENDPOINT +from danswer.configs.model_configs import GEN_AI_API_KEY +from danswer.configs.model_configs import GEN_AI_API_VERSION +from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER +from danswer.configs.model_configs import GEN_AI_MODEL_VERSION +from danswer.db.engine import get_session_context_manager +from danswer.db.llm import fetch_existing_llm_providers +from danswer.db.llm import update_default_provider +from danswer.db.llm import upsert_llm_provider +from danswer.dynamic_configs.factory import get_dynamic_config_store +from danswer.dynamic_configs.factory import PostgresBackedDynamicConfigStore +from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.server.manage.llm.models import LLMProviderUpsertRequest +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + + +def read_file_system_store(directory_path: str) -> dict: + store = {} + base_path = Path(directory_path) + for file_path in base_path.iterdir(): + if file_path.is_file() and "." not in file_path.name: + with open(file_path, "r") as file: + key = file_path.stem + value = json.load(file) + + if value: + store[key] = value + return store + + +def insert_into_postgres(store_data: dict) -> None: + port_once_key = "file_store_ported" + config_store = PostgresBackedDynamicConfigStore() + try: + config_store.load(port_once_key) + return + except ConfigNotFoundError: + pass + + for key, value in store_data.items(): + config_store.store(key, value) + + config_store.store(port_once_key, True) + + +def port_filesystem_to_postgres(directory_path: str) -> None: + store_data = read_file_system_store(directory_path) + insert_into_postgres(store_data) + + +def port_api_key_to_postgres() -> None: + # can't port over custom, no longer supported + if GEN_AI_MODEL_PROVIDER == "custom": + return + + with get_session_context_manager() as db_session: + # if we already have ported things over / setup providers in the db, don't do anything + if len(fetch_existing_llm_providers(db_session)) > 0: + return + + api_key = GEN_AI_API_KEY + try: + api_key = cast( + str, get_dynamic_config_store().load(GEN_AI_API_KEY_STORAGE_KEY) + ) + except ConfigNotFoundError: + pass + + # if no API key set, don't port anything over + if not api_key: + return + + default_model_name = GEN_AI_MODEL_VERSION + if GEN_AI_MODEL_PROVIDER == "openai" and not default_model_name: + default_model_name = "gpt-4" + + # if no default model name found, don't port anything over + if not default_model_name: + return + + default_fast_model_name = FAST_GEN_AI_MODEL_VERSION + if GEN_AI_MODEL_PROVIDER == "openai" and not default_fast_model_name: + default_fast_model_name = "gpt-3.5-turbo" + + llm_provider_upsert = LLMProviderUpsertRequest( + name=GEN_AI_MODEL_PROVIDER, + provider=GEN_AI_MODEL_PROVIDER, + api_key=api_key, + api_base=GEN_AI_API_ENDPOINT, + api_version=GEN_AI_API_VERSION, + # can't port over any custom configs, since we don't know + # all the possible keys and values that could be in there + custom_config=None, + default_model_name=default_model_name, + fast_default_model_name=default_fast_model_name, + model_names=None, + ) + llm_provider = upsert_llm_provider(db_session, llm_provider_upsert) + update_default_provider(db_session, llm_provider.id) + logger.info(f"Ported over LLM provider:\n\n{llm_provider}") + + # delete the old API key + try: + get_dynamic_config_store().delete(GEN_AI_API_KEY_STORAGE_KEY) + except ConfigNotFoundError: + pass diff --git a/backend/danswer/dynamic_configs/store.py b/backend/danswer/dynamic_configs/store.py new file mode 100644 index 000000000..ee4ac3d09 --- /dev/null +++ b/backend/danswer/dynamic_configs/store.py @@ -0,0 +1,101 @@ +import json +import os +from collections.abc import Iterator +from contextlib import contextmanager +from pathlib import Path +from typing import cast + +from filelock import FileLock +from sqlalchemy.orm import Session + +from danswer.db.engine import SessionFactory +from danswer.db.models import KVStore +from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.dynamic_configs.interface import DynamicConfigStore +from danswer.dynamic_configs.interface import JSON_ro + + +FILE_LOCK_TIMEOUT = 10 + + +def _get_file_lock(file_name: Path) -> FileLock: + return FileLock(file_name.with_suffix(".lock")) + + +class FileSystemBackedDynamicConfigStore(DynamicConfigStore): + def __init__(self, dir_path: str) -> None: + # TODO (chris): maybe require all possible keys to be passed in + # at app start somehow to prevent key overlaps + self.dir_path = Path(dir_path) + + def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None: + file_path = self.dir_path / key + lock = _get_file_lock(file_path) + with lock.acquire(timeout=FILE_LOCK_TIMEOUT): + with open(file_path, "w+") as f: + json.dump(val, f) + + def load(self, key: str) -> JSON_ro: + file_path = self.dir_path / key + if not file_path.exists(): + raise ConfigNotFoundError + lock = _get_file_lock(file_path) + with lock.acquire(timeout=FILE_LOCK_TIMEOUT): + with open(self.dir_path / key) as f: + return cast(JSON_ro, json.load(f)) + + def delete(self, key: str) -> None: + file_path = self.dir_path / key + if not file_path.exists(): + raise ConfigNotFoundError + lock = _get_file_lock(file_path) + with lock.acquire(timeout=FILE_LOCK_TIMEOUT): + os.remove(file_path) + + +class PostgresBackedDynamicConfigStore(DynamicConfigStore): + @contextmanager + def get_session(self) -> Iterator[Session]: + session: Session = SessionFactory() + try: + yield session + finally: + session.close() + + def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None: + # The actual encryption/decryption is done in Postgres, we just need to choose + # which field to set + encrypted_val = val if encrypt else None + plain_val = val if not encrypt else None + with self.get_session() as session: + obj = session.query(KVStore).filter_by(key=key).first() + if obj: + obj.value = plain_val + obj.encrypted_value = encrypted_val + else: + obj = KVStore( + key=key, value=plain_val, encrypted_value=encrypted_val + ) # type: ignore + session.query(KVStore).filter_by(key=key).delete() # just in case + session.add(obj) + session.commit() + + def load(self, key: str) -> JSON_ro: + with self.get_session() as session: + obj = session.query(KVStore).filter_by(key=key).first() + if not obj: + raise ConfigNotFoundError + + if obj.value is not None: + return cast(JSON_ro, obj.value) + if obj.encrypted_value is not None: + return cast(JSON_ro, obj.encrypted_value) + + return None + + def delete(self, key: str) -> None: + with self.get_session() as session: + result = session.query(KVStore).filter_by(key=key).delete() # type: ignore + if result == 0: + raise ConfigNotFoundError + session.commit() diff --git a/backend/danswer/file_processing/__init__.py b/backend/danswer/file_processing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/danswer/file_processing/extract_file_text.py b/backend/danswer/file_processing/extract_file_text.py new file mode 100644 index 000000000..710771eef --- /dev/null +++ b/backend/danswer/file_processing/extract_file_text.py @@ -0,0 +1,286 @@ +import io +import json +import os +import re +import zipfile +from collections.abc import Iterator +from email.parser import Parser as EmailParser +from pathlib import Path +from typing import Any +from typing import IO + +import chardet +import docx # type: ignore +import openpyxl # type: ignore +import pptx # type: ignore +from pypdf import PdfReader +from pypdf.errors import PdfStreamError + +from danswer.file_processing.html_utils import parse_html_page_basic +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +TEXT_SECTION_SEPARATOR = "\n\n" + + +PLAIN_TEXT_FILE_EXTENSIONS = [ + ".txt", + ".md", + ".mdx", + ".conf", + ".log", + ".json", + ".csv", + ".tsv", + ".xml", + ".yml", + ".yaml", +] + + +VALID_FILE_EXTENSIONS = PLAIN_TEXT_FILE_EXTENSIONS + [ + ".pdf", + ".docx", + ".pptx", + ".xlsx", + ".eml", + ".epub", +] + + +def is_text_file_extension(file_name: str) -> bool: + return any(file_name.endswith(ext) for ext in PLAIN_TEXT_FILE_EXTENSIONS) + + +def get_file_ext(file_path_or_name: str | Path) -> str: + _, extension = os.path.splitext(file_path_or_name) + return extension + + +def check_file_ext_is_valid(ext: str) -> bool: + return ext in VALID_FILE_EXTENSIONS + + +def detect_encoding(file: IO[bytes]) -> str: + raw_data = file.read(50000) + encoding = chardet.detect(raw_data)["encoding"] or "utf-8" + file.seek(0) + return encoding + + +def is_macos_resource_fork_file(file_name: str) -> bool: + return os.path.basename(file_name).startswith("._") and file_name.startswith( + "__MACOSX" + ) + + +# To include additional metadata in the search index, add a .danswer_metadata.json file +# to the zip file. This file should contain a list of objects with the following format: +# [{ "filename": "file1.txt", "link": "https://example.com/file1.txt" }] +def load_files_from_zip( + zip_file_io: IO, + ignore_macos_resource_fork_files: bool = True, + ignore_dirs: bool = True, +) -> Iterator[tuple[zipfile.ZipInfo, IO[Any], dict[str, Any]]]: + with zipfile.ZipFile(zip_file_io, "r") as zip_file: + zip_metadata = {} + try: + metadata_file_info = zip_file.getinfo(".danswer_metadata.json") + with zip_file.open(metadata_file_info, "r") as metadata_file: + try: + zip_metadata = json.load(metadata_file) + if isinstance(zip_metadata, list): + # convert list of dicts to dict of dicts + zip_metadata = {d["filename"]: d for d in zip_metadata} + except json.JSONDecodeError: + logger.warn("Unable to load .danswer_metadata.json") + except KeyError: + logger.info("No .danswer_metadata.json file") + + for file_info in zip_file.infolist(): + with zip_file.open(file_info.filename, "r") as file: + if ignore_dirs and file_info.is_dir(): + continue + + if ignore_macos_resource_fork_files and is_macos_resource_fork_file( + file_info.filename + ): + continue + yield file_info, file, zip_metadata.get(file_info.filename, {}) + + +def _extract_danswer_metadata(line: str) -> dict | None: + html_comment_pattern = r"" + hashtag_pattern = r"#DANSWER_METADATA=\{(.*?)\}" + + html_comment_match = re.search(html_comment_pattern, line) + hashtag_match = re.search(hashtag_pattern, line) + + if html_comment_match: + json_str = html_comment_match.group(1) + elif hashtag_match: + json_str = hashtag_match.group(1) + else: + return None + + try: + return json.loads("{" + json_str + "}") + except json.JSONDecodeError: + return None + + +def read_text_file( + file: IO, + encoding: str = "utf-8", + errors: str = "replace", + ignore_danswer_metadata: bool = True, +) -> tuple[str, dict]: + metadata = {} + file_content_raw = "" + for ind, line in enumerate(file): + try: + line = line.decode(encoding) if isinstance(line, bytes) else line + except UnicodeDecodeError: + line = ( + line.decode(encoding, errors=errors) + if isinstance(line, bytes) + else line + ) + + if ind == 0: + metadata_or_none = ( + None if ignore_danswer_metadata else _extract_danswer_metadata(line) + ) + if metadata_or_none is not None: + metadata = metadata_or_none + else: + file_content_raw += line + else: + file_content_raw += line + + return file_content_raw, metadata + + +def pdf_to_text(file: IO[Any], pdf_pass: str | None = None) -> str: + try: + pdf_reader = PdfReader(file) + + # If marked as encrypted and a password is provided, try to decrypt + if pdf_reader.is_encrypted and pdf_pass is not None: + decrypt_success = False + if pdf_pass is not None: + try: + decrypt_success = pdf_reader.decrypt(pdf_pass) != 0 + except Exception: + logger.error("Unable to decrypt pdf") + else: + logger.info("No Password available to to decrypt pdf") + + if not decrypt_success: + # By user request, keep files that are unreadable just so they + # can be discoverable by title. + return "" + + return TEXT_SECTION_SEPARATOR.join( + page.extract_text() for page in pdf_reader.pages + ) + except PdfStreamError: + logger.exception("PDF file is not a valid PDF") + except Exception: + logger.exception("Failed to read PDF") + + # File is still discoverable by title + # but the contents are not included as they cannot be parsed + return "" + + +def docx_to_text(file: IO[Any]) -> str: + doc = docx.Document(file) + full_text = [para.text for para in doc.paragraphs] + return TEXT_SECTION_SEPARATOR.join(full_text) + + +def pptx_to_text(file: IO[Any]) -> str: + presentation = pptx.Presentation(file) + text_content = [] + for slide_number, slide in enumerate(presentation.slides, start=1): + extracted_text = f"\nSlide {slide_number}:\n" + for shape in slide.shapes: + if hasattr(shape, "text"): + extracted_text += shape.text + "\n" + text_content.append(extracted_text) + return TEXT_SECTION_SEPARATOR.join(text_content) + + +def xlsx_to_text(file: IO[Any]) -> str: + workbook = openpyxl.load_workbook(file) + text_content = [] + for sheet in workbook.worksheets: + sheet_string = "\n".join( + ",".join(map(str, row)) + for row in sheet.iter_rows(min_row=1, values_only=True) + ) + text_content.append(sheet_string) + return TEXT_SECTION_SEPARATOR.join(text_content) + + +def eml_to_text(file: IO[Any]) -> str: + text_file = io.TextIOWrapper(file, encoding=detect_encoding(file)) + parser = EmailParser() + message = parser.parse(text_file) + text_content = [] + for part in message.walk(): + if part.get_content_type().startswith("text/plain"): + text_content.append(part.get_payload()) + return TEXT_SECTION_SEPARATOR.join(text_content) + + +def epub_to_text(file: IO[Any]) -> str: + with zipfile.ZipFile(file) as epub: + text_content = [] + for item in epub.infolist(): + if item.filename.endswith(".xhtml") or item.filename.endswith(".html"): + with epub.open(item) as html_file: + text_content.append(parse_html_page_basic(html_file)) + return TEXT_SECTION_SEPARATOR.join(text_content) + + +def file_io_to_text(file: IO[Any]) -> str: + encoding = detect_encoding(file) + file_content_raw, _ = read_text_file(file, encoding=encoding) + return file_content_raw + + +def extract_file_text( + file_name: str | None, + file: IO[Any], +) -> str: + if not file_name: + return file_io_to_text(file) + + extension = get_file_ext(file_name) + if not check_file_ext_is_valid(extension): + raise RuntimeError("Unprocessable file type") + + if extension == ".pdf": + return pdf_to_text(file=file) + + elif extension == ".docx": + return docx_to_text(file) + + elif extension == ".pptx": + return pptx_to_text(file) + + elif extension == ".xlsx": + return xlsx_to_text(file) + + elif extension == ".eml": + return eml_to_text(file) + + elif extension == ".epub": + return epub_to_text(file) + + else: + return file_io_to_text(file) diff --git a/backend/danswer/connectors/cross_connector_utils/html_utils.py b/backend/danswer/file_processing/html_utils.py similarity index 98% rename from backend/danswer/connectors/cross_connector_utils/html_utils.py rename to backend/danswer/file_processing/html_utils.py index 0b4e9fade..9b5875227 100644 --- a/backend/danswer/connectors/cross_connector_utils/html_utils.py +++ b/backend/danswer/file_processing/html_utils.py @@ -1,6 +1,7 @@ import re from copy import copy from dataclasses import dataclass +from typing import IO import bs4 @@ -118,7 +119,7 @@ def format_document_soup( return strip_excessive_newlines_and_spaces(text) -def parse_html_page_basic(text: str) -> str: +def parse_html_page_basic(text: str | IO[bytes]) -> str: soup = bs4.BeautifulSoup(text, "html.parser") return format_document_soup(soup) diff --git a/backend/danswer/file_store/file_store.py b/backend/danswer/file_store/file_store.py new file mode 100644 index 000000000..9e131d38c --- /dev/null +++ b/backend/danswer/file_store/file_store.py @@ -0,0 +1,119 @@ +from abc import ABC +from abc import abstractmethod +from typing import IO + +from sqlalchemy.orm import Session + +from danswer.configs.constants import FileOrigin +from danswer.db.pg_file_store import create_populate_lobj +from danswer.db.pg_file_store import delete_lobj_by_id +from danswer.db.pg_file_store import delete_pgfilestore_by_file_name +from danswer.db.pg_file_store import get_pgfilestore_by_file_name +from danswer.db.pg_file_store import read_lobj +from danswer.db.pg_file_store import upsert_pgfilestore + + +class FileStore(ABC): + """ + An abstraction for storing files and large binary objects. + """ + + @abstractmethod + def save_file( + self, + file_name: str, + content: IO, + display_name: str | None, + file_origin: FileOrigin, + file_type: str, + ) -> None: + """ + Save a file to the blob store + + Parameters: + - connector_name: Name of the CC-Pair (as specified by the user in the UI) + - file_name: Name of the file to save + - content: Contents of the file + - display_name: Display name of the file + - file_origin: Origin of the file + - file_type: Type of the file + """ + raise NotImplementedError + + @abstractmethod + def read_file(self, file_name: str, mode: str | None) -> IO: + """ + Read the content of a given file by the name + + Parameters: + - file_name: Name of file to read + + Returns: + Contents of the file and metadata dict + """ + + @abstractmethod + def delete_file(self, file_name: str) -> None: + """ + Delete a file by its name. + + Parameters: + - file_name: Name of file to delete + """ + + +class PostgresBackedFileStore(FileStore): + def __init__(self, db_session: Session): + self.db_session = db_session + + def save_file( + self, + file_name: str, + content: IO, + display_name: str | None, + file_origin: FileOrigin, + file_type: str, + ) -> None: + try: + # The large objects in postgres are saved as special objects can be listed with + # SELECT * FROM pg_largeobject_metadata; + obj_id = create_populate_lobj(content=content, db_session=self.db_session) + upsert_pgfilestore( + file_name=file_name, + display_name=display_name or file_name, + file_origin=file_origin, + file_type=file_type, + lobj_oid=obj_id, + db_session=self.db_session, + ) + self.db_session.commit() + except Exception: + self.db_session.rollback() + raise + + def read_file(self, file_name: str, mode: str | None = None) -> IO: + file_record = get_pgfilestore_by_file_name( + file_name=file_name, db_session=self.db_session + ) + return read_lobj( + lobj_oid=file_record.lobj_oid, db_session=self.db_session, mode=mode + ) + + def delete_file(self, file_name: str) -> None: + try: + file_record = get_pgfilestore_by_file_name( + file_name=file_name, db_session=self.db_session + ) + delete_lobj_by_id(file_record.lobj_oid, db_session=self.db_session) + delete_pgfilestore_by_file_name( + file_name=file_name, db_session=self.db_session + ) + self.db_session.commit() + except Exception: + self.db_session.rollback() + raise + + +def get_default_file_store(db_session: Session) -> FileStore: + # The only supported file store now is the Postgres File Store + return PostgresBackedFileStore(db_session=db_session) diff --git a/backend/danswer/file_store/models.py b/backend/danswer/file_store/models.py new file mode 100644 index 000000000..f26fa4ca5 --- /dev/null +++ b/backend/danswer/file_store/models.py @@ -0,0 +1,46 @@ +import base64 +from enum import Enum +from typing import NotRequired +from typing import TypedDict + +from pydantic import BaseModel + + +class ChatFileType(str, Enum): + # Image types only contain the binary data + IMAGE = "image" + # Doc types are saved as both the binary, and the parsed text + DOC = "document" + # Plain text only contain the text + PLAIN_TEXT = "plain_text" + + +class FileDescriptor(TypedDict): + """NOTE: is a `TypedDict` so it can be used as a type hint for a JSONB column + in Postgres""" + + id: str + type: ChatFileType + name: NotRequired[str | None] + + +class InMemoryChatFile(BaseModel): + file_id: str + content: bytes + file_type: ChatFileType + filename: str | None = None + + def to_base64(self) -> str: + if self.file_type == ChatFileType.IMAGE: + return base64.b64encode(self.content).decode() + else: + raise RuntimeError( + "Should not be trying to convert a non-image file to base64" + ) + + def to_file_descriptor(self) -> FileDescriptor: + return { + "id": str(self.file_id), + "type": self.file_type, + "name": self.filename, + } diff --git a/backend/danswer/file_store/utils.py b/backend/danswer/file_store/utils.py new file mode 100644 index 000000000..82c027304 --- /dev/null +++ b/backend/danswer/file_store/utils.py @@ -0,0 +1,77 @@ +from io import BytesIO +from typing import cast +from uuid import uuid4 + +import requests +from sqlalchemy.orm import Session + +from danswer.configs.constants import FileOrigin +from danswer.db.engine import get_session_context_manager +from danswer.db.models import ChatMessage +from danswer.file_store.file_store import get_default_file_store +from danswer.file_store.models import FileDescriptor +from danswer.file_store.models import InMemoryChatFile +from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel + + +def load_chat_file( + file_descriptor: FileDescriptor, db_session: Session +) -> InMemoryChatFile: + file_io = get_default_file_store(db_session).read_file( + file_descriptor["id"], mode="b" + ) + return InMemoryChatFile( + file_id=file_descriptor["id"], + content=file_io.read(), + file_type=file_descriptor["type"], + filename=file_descriptor["name"], + ) + + +def load_all_chat_files( + chat_messages: list[ChatMessage], + file_descriptors: list[FileDescriptor], + db_session: Session, +) -> list[InMemoryChatFile]: + file_descriptors_for_history: list[FileDescriptor] = [] + for chat_message in chat_messages: + if chat_message.files: + file_descriptors_for_history.extend(chat_message.files) + + files = cast( + list[InMemoryChatFile], + run_functions_tuples_in_parallel( + [ + (load_chat_file, (file, db_session)) + for file in file_descriptors + file_descriptors_for_history + ] + ), + ) + return files + + +def save_file_from_url(url: str) -> str: + """NOTE: using multiple sessions here, since this is often called + using multithreading. In practice, sharing a session has resulted in + weird errors.""" + with get_session_context_manager() as db_session: + response = requests.get(url) + response.raise_for_status() + + unique_id = str(uuid4()) + + file_io = BytesIO(response.content) + file_store = get_default_file_store(db_session) + file_store.save_file( + file_name=unique_id, + content=file_io, + display_name="GeneratedImage", + file_origin=FileOrigin.CHAT_IMAGE_GEN, + file_type="image/png;base64", + ) + return unique_id + + +def save_files_from_urls(urls: list[str]) -> list[str]: + funcs = [(save_file_from_url, (url,)) for url in urls] + return run_functions_tuples_in_parallel(funcs) diff --git a/backend/danswer/indexing/chunker.py b/backend/danswer/indexing/chunker.py index 1166c93ff..b6f59d189 100644 --- a/backend/danswer/indexing/chunker.py +++ b/backend/danswer/indexing/chunker.py @@ -1,25 +1,32 @@ import abc from collections.abc import Callable - -from llama_index.text_splitter import SentenceSplitter -from transformers import AutoTokenizer # type:ignore +from typing import TYPE_CHECKING from danswer.configs.app_configs import BLURB_SIZE from danswer.configs.app_configs import CHUNK_OVERLAP from danswer.configs.app_configs import MINI_CHUNK_SIZE +from danswer.configs.constants import DocumentSource from danswer.configs.constants import SECTION_SEPARATOR from danswer.configs.constants import TITLE_SEPARATOR -from danswer.configs.model_configs import CHUNK_SIZE +from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE from danswer.connectors.models import Document from danswer.indexing.models import DocAwareChunk from danswer.search.search_nlp_models import get_default_tokenizer +from danswer.utils.logger import setup_logger from danswer.utils.text_processing import shared_precompare_cleanup +if TYPE_CHECKING: + from transformers import AutoTokenizer # type:ignore + + +logger = setup_logger() ChunkFunc = Callable[[Document], list[DocAwareChunk]] def extract_blurb(text: str, blurb_size: int) -> str: + from llama_index.text_splitter import SentenceSplitter + token_count_func = get_default_tokenizer().tokenize blurb_splitter = SentenceSplitter( tokenizer=token_count_func, chunk_size=blurb_size, chunk_overlap=0 @@ -33,11 +40,13 @@ def chunk_large_section( section_link_text: str, document: Document, start_chunk_id: int, - tokenizer: AutoTokenizer, - chunk_size: int = CHUNK_SIZE, + tokenizer: "AutoTokenizer", + chunk_size: int = DOC_EMBEDDING_CONTEXT_SIZE, chunk_overlap: int = CHUNK_OVERLAP, blurb_size: int = BLURB_SIZE, ) -> list[DocAwareChunk]: + from llama_index.text_splitter import SentenceSplitter + blurb = extract_blurb(section_text, blurb_size) sentence_aware_splitter = SentenceSplitter( @@ -62,7 +71,7 @@ def chunk_large_section( def chunk_document( document: Document, - chunk_tok_size: int = CHUNK_SIZE, + chunk_tok_size: int = DOC_EMBEDDING_CONTEXT_SIZE, subsection_overlap: int = CHUNK_OVERLAP, blurb_size: int = BLURB_SIZE, ) -> list[DocAwareChunk]: @@ -155,6 +164,8 @@ def chunk_document( def split_chunk_text_into_mini_chunks( chunk_text: str, mini_chunk_size: int = MINI_CHUNK_SIZE ) -> list[str]: + from llama_index.text_splitter import SentenceSplitter + token_count_func = get_default_tokenizer().tokenize sentence_aware_splitter = SentenceSplitter( tokenizer=token_count_func, chunk_size=mini_chunk_size, chunk_overlap=0 @@ -171,4 +182,7 @@ class Chunker: class DefaultChunker(Chunker): def chunk(self, document: Document) -> list[DocAwareChunk]: + # Specifically for reproducing an issue with gmail + if document.source == DocumentSource.GMAIL: + logger.debug(f"Chunking {document.semantic_identifier}") return chunk_document(document) diff --git a/backend/danswer/indexing/embedder.py b/backend/danswer/indexing/embedder.py index 3be10f5b4..0b542067a 100644 --- a/backend/danswer/indexing/embedder.py +++ b/backend/danswer/indexing/embedder.py @@ -4,8 +4,6 @@ from abc import abstractmethod from sqlalchemy.orm import Session from danswer.configs.app_configs import ENABLE_MINI_CHUNK -from danswer.configs.app_configs import INDEXING_MODEL_SERVER_HOST -from danswer.configs.app_configs import MODEL_SERVER_PORT from danswer.configs.model_configs import BATCH_SIZE_ENCODE_CHUNKS from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE from danswer.db.embedding_model import get_current_db_embedding_model @@ -16,9 +14,12 @@ from danswer.indexing.chunker import split_chunk_text_into_mini_chunks from danswer.indexing.models import ChunkEmbedding from danswer.indexing.models import DocAwareChunk from danswer.indexing.models import IndexChunk +from danswer.search.enums import EmbedTextType from danswer.search.search_nlp_models import EmbeddingModel -from danswer.search.search_nlp_models import EmbedTextType +from danswer.utils.batching import batch_list from danswer.utils.logger import setup_logger +from shared_configs.configs import INDEXING_MODEL_SERVER_HOST +from shared_configs.configs import INDEXING_MODEL_SERVER_PORT logger = setup_logger() @@ -60,7 +61,7 @@ class DefaultIndexingEmbedder(IndexingEmbedder): normalize=normalize, # The below are globally set, this flow always uses the indexing one server_host=INDEXING_MODEL_SERVER_HOST, - server_port=MODEL_SERVER_PORT, + server_port=INDEXING_MODEL_SERVER_PORT, ) def embed_chunks( @@ -73,6 +74,8 @@ class DefaultIndexingEmbedder(IndexingEmbedder): title_embed_dict: dict[str, list[float]] = {} embedded_chunks: list[IndexChunk] = [] + # Create Mini Chunks for more precise matching of details + # Off by default with unedited settings chunk_texts = [] chunk_mini_chunks_count = {} for chunk_ind, chunk in enumerate(chunks): @@ -85,23 +88,43 @@ class DefaultIndexingEmbedder(IndexingEmbedder): chunk_texts.extend(mini_chunk_texts) chunk_mini_chunks_count[chunk_ind] = 1 + len(mini_chunk_texts) - text_batches = [ - chunk_texts[i : i + batch_size] - for i in range(0, len(chunk_texts), batch_size) - ] + # Batching for embedding + text_batches = batch_list(chunk_texts, batch_size) embeddings: list[list[float]] = [] len_text_batches = len(text_batches) for idx, text_batch in enumerate(text_batches, start=1): - logger.debug(f"Embedding text batch {idx} of {len_text_batches}") - # Normalize embeddings is only configured via model_configs.py, be sure to use right value for the set loss + logger.debug(f"Embedding Content Texts batch {idx} of {len_text_batches}") + # Normalize embeddings is only configured via model_configs.py, be sure to use right + # value for the set loss embeddings.extend( self.embedding_model.encode(text_batch, text_type=EmbedTextType.PASSAGE) ) - # Replace line above with the line below for easy debugging of indexing flow, skipping the actual model + # Replace line above with the line below for easy debugging of indexing flow + # skipping the actual model # embeddings.extend([[0.0] * 384 for _ in range(len(text_batch))]) + chunk_titles = { + chunk.source_document.get_title_for_document_index() for chunk in chunks + } + + # Drop any None or empty strings + chunk_titles_list = [title for title in chunk_titles if title] + + # Embed Titles in batches + title_batches = batch_list(chunk_titles_list, batch_size) + len_title_batches = len(title_batches) + for ind_batch, title_batch in enumerate(title_batches, start=1): + logger.debug(f"Embedding Titles batch {ind_batch} of {len_title_batches}") + title_embeddings = self.embedding_model.encode( + title_batch, text_type=EmbedTextType.PASSAGE + ) + title_embed_dict.update( + {title: vector for title, vector in zip(title_batch, title_embeddings)} + ) + + # Mapping embeddings to chunks embedding_ind_start = 0 for chunk_ind, chunk in enumerate(chunks): num_embeddings = chunk_mini_chunks_count[chunk_ind] @@ -114,16 +137,19 @@ class DefaultIndexingEmbedder(IndexingEmbedder): title_embedding = None if title: if title in title_embed_dict: - # Using cached value for speedup + # Using cached value to avoid recalculating for every chunk title_embedding = title_embed_dict[title] else: + logger.error( + "Title had to be embedded separately, this should not happen!" + ) title_embedding = self.embedding_model.encode( [title], text_type=EmbedTextType.PASSAGE )[0] title_embed_dict[title] = title_embedding new_embedded_chunk = IndexChunk( - **{k: getattr(chunk, k) for k in chunk.__dataclass_fields__}, + **chunk.dict(), embeddings=ChunkEmbedding( full_embedding=chunk_embeddings[0], mini_chunk_embeddings=chunk_embeddings[1:], diff --git a/backend/danswer/indexing/indexing_pipeline.py b/backend/danswer/indexing/indexing_pipeline.py index fa5199563..2506b4715 100644 --- a/backend/danswer/indexing/indexing_pipeline.py +++ b/backend/danswer/indexing/indexing_pipeline.py @@ -16,7 +16,7 @@ from danswer.db.document import prepare_to_modify_documents from danswer.db.document import update_docs_updated_at from danswer.db.document import upsert_documents_complete from danswer.db.document_set import fetch_document_sets_for_documents -from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.models import Document as DBDocument from danswer.db.tag import create_or_add_document_tag from danswer.db.tag import create_or_add_document_tag_list from danswer.document_index.interfaces import DocumentIndex @@ -88,6 +88,28 @@ def upsert_documents_in_db( ) +def get_doc_ids_to_update( + documents: list[Document], db_docs: list[DBDocument] +) -> list[Document]: + """Figures out which documents actually need to be updated. If a document is already present + and the `updated_at` hasn't changed, we shouldn't need to do anything with it.""" + id_update_time_map = { + doc.id: doc.doc_updated_at for doc in db_docs if doc.doc_updated_at + } + + updatable_docs: list[Document] = [] + for doc in documents: + if ( + doc.id in id_update_time_map + and doc.doc_updated_at + and doc.doc_updated_at <= id_update_time_map[doc.id] + ): + continue + updatable_docs.append(doc) + + return updatable_docs + + @log_function_time() def index_doc_batch( *, @@ -96,61 +118,50 @@ def index_doc_batch( document_index: DocumentIndex, documents: list[Document], index_attempt_metadata: IndexAttemptMetadata, + db_session: Session, ignore_time_skip: bool = False, ) -> tuple[int, int]: """Takes different pieces of the indexing pipeline and applies it to a batch of documents Note that the documents should already be batched at this point so that it does not inflate the memory requirements""" - with Session(get_sqlalchemy_engine()) as db_session: - document_ids = [document.id for document in documents] + document_ids = [document.id for document in documents] + db_docs = get_documents_by_ids( + document_ids=document_ids, + db_session=db_session, + ) + id_to_db_doc_map = {doc.id: doc for doc in db_docs} - # Skip indexing docs that don't have a newer updated at - # Shortcuts the time-consuming flow on connector index retries - db_docs = get_documents_by_ids( - document_ids=document_ids, - db_session=db_session, - ) - id_to_db_doc_map = {doc.id: doc for doc in db_docs} - id_update_time_map = { - doc.id: doc.doc_updated_at for doc in db_docs if doc.doc_updated_at - } + # Skip indexing docs that don't have a newer updated at + # Shortcuts the time-consuming flow on connector index retries + updatable_docs = ( + get_doc_ids_to_update(documents=documents, db_docs=db_docs) + if not ignore_time_skip + else documents + ) + updatable_ids = [doc.id for doc in updatable_docs] - updatable_docs: list[Document] = [] - if ignore_time_skip: - updatable_docs = documents - else: - for doc in documents: - if ( - doc.id in id_update_time_map - and doc.doc_updated_at - and doc.doc_updated_at <= id_update_time_map[doc.id] - ): - continue - updatable_docs.append(doc) + # Create records in the source of truth about these documents, + # does not include doc_updated_at which is also used to indicate a successful update + upsert_documents_in_db( + documents=documents, + index_attempt_metadata=index_attempt_metadata, + db_session=db_session, + ) - updatable_ids = [doc.id for doc in updatable_docs] + logger.debug("Starting chunking") - # Acquires a lock on the documents so that no other process can modify them - prepare_to_modify_documents(db_session=db_session, document_ids=updatable_ids) + # The first chunk additionally contains the Title of the Document + chunks: list[DocAwareChunk] = list( + chain(*[chunker.chunk(document=document) for document in updatable_docs]) + ) - # Create records in the source of truth about these documents, - # does not include doc_updated_at which is also used to indicate a successful update - upsert_documents_in_db( - documents=updatable_docs, - index_attempt_metadata=index_attempt_metadata, - db_session=db_session, - ) - - logger.debug("Starting chunking") - - # The first chunk additionally contains the Title of the Document - chunks: list[DocAwareChunk] = list( - chain(*[chunker.chunk(document=document) for document in updatable_docs]) - ) - - logger.debug("Starting embedding") - chunks_with_embeddings = embedder.embed_chunks(chunks=chunks) + logger.debug("Starting embedding") + chunks_with_embeddings = embedder.embed_chunks(chunks=chunks) + # Acquires a lock on the documents so that no other process can modify them + # NOTE: don't need to acquire till here, since this is when the actual race condition + # with Vespa can occur. + with prepare_to_modify_documents(db_session=db_session, document_ids=updatable_ids): # Attach the latest status from Postgres (source of truth for access) to each # chunk. This access status will be attached to each chunk in the document index # TODO: attach document sets to the chunk based on the status of Postgres as well @@ -212,6 +223,7 @@ def build_indexing_pipeline( *, embedder: IndexingEmbedder, document_index: DocumentIndex, + db_session: Session, chunker: Chunker | None = None, ignore_time_skip: bool = False, ) -> IndexingPipelineProtocol: @@ -224,4 +236,5 @@ def build_indexing_pipeline( embedder=embedder, document_index=document_index, ignore_time_skip=ignore_time_skip, + db_session=db_session, ) diff --git a/backend/danswer/indexing/models.py b/backend/danswer/indexing/models.py index c875c88bd..5fc32cd9a 100644 --- a/backend/danswer/indexing/models.py +++ b/backend/danswer/indexing/models.py @@ -1,14 +1,14 @@ -from dataclasses import dataclass -from dataclasses import fields -from datetime import datetime +from typing import TYPE_CHECKING from pydantic import BaseModel from danswer.access.models import DocumentAccess -from danswer.configs.constants import DocumentSource from danswer.connectors.models import Document from danswer.utils.logger import setup_logger +if TYPE_CHECKING: + from danswer.db.models import EmbeddingModel + logger = setup_logger() @@ -16,14 +16,12 @@ logger = setup_logger() Embedding = list[float] -@dataclass -class ChunkEmbedding: +class ChunkEmbedding(BaseModel): full_embedding: Embedding mini_chunk_embeddings: list[Embedding] -@dataclass -class BaseChunk: +class BaseChunk(BaseModel): chunk_id: int blurb: str # The first sentence(s) of the first Section of the chunk content: str @@ -33,7 +31,6 @@ class BaseChunk: section_continuation: bool # True if this Chunk's start is not at the start of a Section -@dataclass class DocAwareChunk(BaseChunk): # During indexing flow, we have access to a complete "Document" # During inference we only have access to the document id and do not reconstruct the Document @@ -46,13 +43,11 @@ class DocAwareChunk(BaseChunk): ) -@dataclass class IndexChunk(DocAwareChunk): embeddings: ChunkEmbedding title_embedding: Embedding | None -@dataclass class DocMetadataAwareIndexChunk(IndexChunk): """An `IndexChunk` that contains all necessary metadata to be indexed. This includes the following: @@ -77,56 +72,28 @@ class DocMetadataAwareIndexChunk(IndexChunk): document_sets: set[str], boost: int, ) -> "DocMetadataAwareIndexChunk": + index_chunk_data = index_chunk.dict() return cls( - **{ - field.name: getattr(index_chunk, field.name) - for field in fields(index_chunk) - }, + **index_chunk_data, access=access, document_sets=document_sets, boost=boost, ) -@dataclass -class InferenceChunk(BaseChunk): - document_id: str - source_type: DocumentSource - semantic_identifier: str - boost: int - recency_bias: float - score: float | None - hidden: bool - metadata: dict[str, str | list[str]] - # Matched sections in the chunk. Uses Vespa syntax e.g. TEXT - # to specify that a set of words should be highlighted. For example: - # ["the answer is 42", "he couldn't find an answer"] - match_highlights: list[str] - # when the doc was last updated - updated_at: datetime | None - primary_owners: list[str] | None = None - secondary_owners: list[str] | None = None - - @property - def unique_id(self) -> str: - return f"{self.document_id}__{self.chunk_id}" - - def __repr__(self) -> str: - blurb_words = self.blurb.split() - short_blurb = "" - for word in blurb_words: - if not short_blurb: - short_blurb = word - continue - if len(short_blurb) > 25: - break - short_blurb += " " + word - return f"Inference Chunk: {self.document_id} - {short_blurb}..." - - class EmbeddingModelDetail(BaseModel): model_name: str model_dim: int normalize: bool query_prefix: str | None passage_prefix: str | None + + @classmethod + def from_model(cls, embedding_model: "EmbeddingModel") -> "EmbeddingModelDetail": + return cls( + model_name=embedding_model.model_name, + model_dim=embedding_model.model_dim, + normalize=embedding_model.normalize, + query_prefix=embedding_model.query_prefix, + passage_prefix=embedding_model.passage_prefix, + ) diff --git a/backend/danswer/llm/answering/answer.py b/backend/danswer/llm/answering/answer.py new file mode 100644 index 000000000..41f8e1090 --- /dev/null +++ b/backend/danswer/llm/answering/answer.py @@ -0,0 +1,445 @@ +from collections.abc import Iterator +from typing import cast +from uuid import uuid4 + +from langchain.schema.messages import BaseMessage +from langchain_core.messages import AIMessageChunk + +from danswer.chat.chat_utils import llm_doc_from_inference_section +from danswer.chat.models import AnswerQuestionPossibleReturn +from danswer.chat.models import CitationInfo +from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.models import LlmDoc +from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE +from danswer.file_store.utils import InMemoryChatFile +from danswer.llm.answering.models import AnswerStyleConfig +from danswer.llm.answering.models import PreviousMessage +from danswer.llm.answering.models import PromptConfig +from danswer.llm.answering.models import StreamProcessor +from danswer.llm.answering.prompts.build import AnswerPromptBuilder +from danswer.llm.answering.prompts.build import default_build_system_message +from danswer.llm.answering.prompts.build import default_build_user_message +from danswer.llm.answering.prompts.citations_prompt import ( + build_citations_system_message, +) +from danswer.llm.answering.prompts.citations_prompt import build_citations_user_message +from danswer.llm.answering.prompts.quotes_prompt import build_quotes_user_message +from danswer.llm.answering.stream_processing.citation_processing import ( + build_citation_processor, +) +from danswer.llm.answering.stream_processing.quotes_processing import ( + build_quotes_processor, +) +from danswer.llm.interfaces import LLM +from danswer.llm.utils import get_default_llm_tokenizer +from danswer.llm.utils import message_generator_to_string_generator +from danswer.tools.force import filter_tools_for_force_tool_use +from danswer.tools.force import ForceUseTool +from danswer.tools.images.image_generation_tool import IMAGE_GENERATION_RESPONSE_ID +from danswer.tools.images.image_generation_tool import ImageGenerationResponse +from danswer.tools.images.image_generation_tool import ImageGenerationTool +from danswer.tools.images.prompt import build_image_generation_user_prompt +from danswer.tools.message import build_tool_message +from danswer.tools.message import ToolCallSummary +from danswer.tools.search.search_tool import FINAL_CONTEXT_DOCUMENTS +from danswer.tools.search.search_tool import SEARCH_RESPONSE_SUMMARY_ID +from danswer.tools.search.search_tool import SearchResponseSummary +from danswer.tools.search.search_tool import SearchTool +from danswer.tools.tool import Tool +from danswer.tools.tool import ToolResponse +from danswer.tools.tool_runner import ( + check_which_tools_should_run_for_non_tool_calling_llm, +) +from danswer.tools.tool_runner import ToolRunKickoff +from danswer.tools.tool_runner import ToolRunner +from danswer.tools.utils import explicit_tool_calling_supported + + +def _get_answer_stream_processor( + context_docs: list[LlmDoc], + search_order_docs: list[LlmDoc], + answer_style_configs: AnswerStyleConfig, +) -> StreamProcessor: + if answer_style_configs.citation_config: + return build_citation_processor( + context_docs=context_docs, search_order_docs=search_order_docs + ) + if answer_style_configs.quotes_config: + return build_quotes_processor( + context_docs=context_docs, is_json_prompt=not (QA_PROMPT_OVERRIDE == "weak") + ) + + raise RuntimeError("Not implemented yet") + + +AnswerStream = Iterator[AnswerQuestionPossibleReturn | ToolRunKickoff | ToolResponse] + + +class Answer: + def __init__( + self, + question: str, + answer_style_config: AnswerStyleConfig, + llm: LLM, + prompt_config: PromptConfig, + # must be the same length as `docs`. If None, all docs are considered "relevant" + message_history: list[PreviousMessage] | None = None, + single_message_history: str | None = None, + # newly passed in files to include as part of this question + # TODO THIS NEEDS TO BE HANDLED + latest_query_files: list[InMemoryChatFile] | None = None, + files: list[InMemoryChatFile] | None = None, + tools: list[Tool] | None = None, + # if specified, tells the LLM to always this tool + # NOTE: for native tool-calling, this is only supported by OpenAI atm, + # but we only support them anyways + force_use_tool: ForceUseTool | None = None, + # if set to True, then never use the LLMs provided tool-calling functonality + skip_explicit_tool_calling: bool = False, + ) -> None: + if single_message_history and message_history: + raise ValueError( + "Cannot provide both `message_history` and `single_message_history`" + ) + + self.question = question + + self.latest_query_files = latest_query_files or [] + self.file_id_to_file = {file.file_id: file for file in (files or [])} + + self.tools = tools or [] + self.force_use_tool = force_use_tool + self.skip_explicit_tool_calling = skip_explicit_tool_calling + + self.message_history = message_history or [] + # used for QA flow where we only want to send a single message + self.single_message_history = single_message_history + + self.answer_style_config = answer_style_config + self.prompt_config = prompt_config + + self.llm = llm + self.llm_tokenizer = get_default_llm_tokenizer() + + self._final_prompt: list[BaseMessage] | None = None + + self._streamed_output: list[str] | None = None + self._processed_stream: list[ + AnswerQuestionPossibleReturn | ToolResponse | ToolRunKickoff + ] | None = None + + def _update_prompt_builder_for_search_tool( + self, prompt_builder: AnswerPromptBuilder, final_context_documents: list[LlmDoc] + ) -> None: + if self.answer_style_config.citation_config: + prompt_builder.update_system_prompt( + build_citations_system_message(self.prompt_config) + ) + prompt_builder.update_user_prompt( + build_citations_user_message( + question=self.question, + prompt_config=self.prompt_config, + context_docs=final_context_documents, + files=self.latest_query_files, + all_doc_useful=( + self.answer_style_config.citation_config.all_docs_useful + if self.answer_style_config.citation_config + else False + ), + ) + ) + elif self.answer_style_config.quotes_config: + prompt_builder.update_user_prompt( + build_quotes_user_message( + question=self.question, + context_docs=final_context_documents, + history_str=self.single_message_history or "", + prompt=self.prompt_config, + ) + ) + + def _raw_output_for_explicit_tool_calling_llms( + self, + ) -> Iterator[str | ToolRunKickoff | ToolResponse]: + prompt_builder = AnswerPromptBuilder(self.message_history, self.llm.config) + + tool_call_chunk: AIMessageChunk | None = None + if self.force_use_tool and self.force_use_tool.args is not None: + # if we are forcing a tool WITH args specified, we don't need to check which tools to run + # / need to generate the args + tool_call_chunk = AIMessageChunk( + content="", + ) + tool_call_chunk.tool_calls = [ + { + "name": self.force_use_tool.tool_name, + "args": self.force_use_tool.args, + "id": str(uuid4()), + } + ] + else: + # if tool calling is supported, first try the raw message + # to see if we don't need to use any tools + prompt_builder.update_system_prompt( + default_build_system_message(self.prompt_config) + ) + prompt_builder.update_user_prompt( + default_build_user_message( + self.question, self.prompt_config, self.latest_query_files + ) + ) + prompt = prompt_builder.build() + final_tool_definitions = [ + tool.tool_definition() + for tool in filter_tools_for_force_tool_use( + self.tools, self.force_use_tool + ) + ] + for message in self.llm.stream( + prompt=prompt, + tools=final_tool_definitions if final_tool_definitions else None, + tool_choice="required" if self.force_use_tool else None, + ): + if isinstance(message, AIMessageChunk) and ( + message.tool_call_chunks or message.tool_calls + ): + if tool_call_chunk is None: + tool_call_chunk = message + else: + tool_call_chunk += message # type: ignore + else: + if message.content: + yield cast(str, message.content) + + if not tool_call_chunk: + return # no tool call needed + + # if we have a tool call, we need to call the tool + tool_call_requests = tool_call_chunk.tool_calls + for tool_call_request in tool_call_requests: + tool = [ + tool for tool in self.tools if tool.name() == tool_call_request["name"] + ][0] + tool_args = ( + self.force_use_tool.args + if self.force_use_tool and self.force_use_tool.args + else tool_call_request["args"] + ) + + tool_runner = ToolRunner(tool, tool_args) + yield tool_runner.kickoff() + yield from tool_runner.tool_responses() + + tool_call_summary = ToolCallSummary( + tool_call_request=tool_call_chunk, + tool_call_result=build_tool_message( + tool_call_request, tool_runner.tool_message_content() + ), + ) + + if tool.name() == SearchTool.name(): + self._update_prompt_builder_for_search_tool(prompt_builder, []) + elif tool.name() == ImageGenerationTool.name(): + prompt_builder.update_user_prompt( + build_image_generation_user_prompt( + query=self.question, + ) + ) + prompt = prompt_builder.build(tool_call_summary=tool_call_summary) + + yield from message_generator_to_string_generator( + self.llm.stream( + prompt=prompt, + tools=[tool.tool_definition() for tool in self.tools], + ) + ) + + return + + def _raw_output_for_non_explicit_tool_calling_llms( + self, + ) -> Iterator[str | ToolRunKickoff | ToolResponse]: + prompt_builder = AnswerPromptBuilder(self.message_history, self.llm.config) + chosen_tool_and_args: tuple[Tool, dict] | None = None + + if self.force_use_tool: + # if we are forcing a tool, we don't need to check which tools to run + tool = next( + iter( + [ + tool + for tool in self.tools + if tool.name() == self.force_use_tool.tool_name + ] + ), + None, + ) + if not tool: + raise RuntimeError(f"Tool '{self.force_use_tool.tool_name}' not found") + + tool_args = ( + self.force_use_tool.args + if self.force_use_tool.args + else tool.get_args_for_non_tool_calling_llm( + query=self.question, + history=self.message_history, + llm=self.llm, + force_run=True, + ) + ) + + if tool_args is None: + raise RuntimeError(f"Tool '{tool.name()}' did not return args") + + chosen_tool_and_args = (tool, tool_args) + else: + all_tool_args = check_which_tools_should_run_for_non_tool_calling_llm( + tools=self.tools, + query=self.question, + history=self.message_history, + llm=self.llm, + ) + for ind, args in enumerate(all_tool_args): + if args is not None: + chosen_tool_and_args = (self.tools[ind], args) + # for now, just pick the first tool selected + break + + if not chosen_tool_and_args: + prompt_builder.update_system_prompt( + default_build_system_message(self.prompt_config) + ) + prompt_builder.update_user_prompt( + default_build_user_message( + self.question, self.prompt_config, self.latest_query_files + ) + ) + prompt = prompt_builder.build() + yield from message_generator_to_string_generator( + self.llm.stream(prompt=prompt) + ) + return + + tool, tool_args = chosen_tool_and_args + tool_runner = ToolRunner(tool, tool_args) + yield tool_runner.kickoff() + + if tool.name() == SearchTool.name(): + final_context_documents = None + for response in tool_runner.tool_responses(): + if response.id == FINAL_CONTEXT_DOCUMENTS: + final_context_documents = cast(list[LlmDoc], response.response) + yield response + + if final_context_documents is None: + raise RuntimeError("SearchTool did not return final context documents") + + self._update_prompt_builder_for_search_tool( + prompt_builder, final_context_documents + ) + elif tool.name() == ImageGenerationTool.name(): + img_urls = [] + for response in tool_runner.tool_responses(): + if response.id == IMAGE_GENERATION_RESPONSE_ID: + img_generation_response = cast( + list[ImageGenerationResponse], response.response + ) + img_urls = [img.url for img in img_generation_response] + break + yield response + + prompt_builder.update_user_prompt( + build_image_generation_user_prompt( + query=self.question, + img_urls=img_urls, + ) + ) + + prompt = prompt_builder.build() + yield from message_generator_to_string_generator(self.llm.stream(prompt=prompt)) + + @property + def processed_streamed_output(self) -> AnswerStream: + if self._processed_stream is not None: + yield from self._processed_stream + return + + output_generator = ( + self._raw_output_for_explicit_tool_calling_llms() + if explicit_tool_calling_supported( + self.llm.config.model_provider, self.llm.config.model_name + ) + and not self.skip_explicit_tool_calling + else self._raw_output_for_non_explicit_tool_calling_llms() + ) + + def _process_stream( + stream: Iterator[ToolRunKickoff | ToolResponse | str], + ) -> AnswerStream: + message = None + + # special things we need to keep track of for the SearchTool + search_results: list[ + LlmDoc + ] | None = None # raw results that will be displayed to the user + final_context_docs: list[ + LlmDoc + ] | None = None # processed docs to feed into the LLM + + for message in stream: + if isinstance(message, ToolRunKickoff): + yield message + elif isinstance(message, ToolResponse): + if message.id == SEARCH_RESPONSE_SUMMARY_ID: + search_results = [ + llm_doc_from_inference_section(section) + for section in cast( + SearchResponseSummary, message.response + ).top_sections + ] + elif message.id == FINAL_CONTEXT_DOCUMENTS: + final_context_docs = cast(list[LlmDoc], message.response) + yield message + else: + # assumes all tool responses will come first, then the final answer + break + + process_answer_stream_fn = _get_answer_stream_processor( + context_docs=final_context_docs or [], + # if doc selection is enabled, then search_results will be None, + # so we need to use the final_context_docs + search_order_docs=search_results or final_context_docs or [], + answer_style_configs=self.answer_style_config, + ) + + def _stream() -> Iterator[str]: + if message: + yield cast(str, message) + yield from cast(Iterator[str], stream) + + yield from process_answer_stream_fn(_stream()) + + processed_stream = [] + for processed_packet in _process_stream(output_generator): + processed_stream.append(processed_packet) + yield processed_packet + + self._processed_stream = processed_stream + + @property + def llm_answer(self) -> str: + answer = "" + for packet in self.processed_streamed_output: + if isinstance(packet, DanswerAnswerPiece) and packet.answer_piece: + answer += packet.answer_piece + + return answer + + @property + def citations(self) -> list[CitationInfo]: + citations: list[CitationInfo] = [] + for packet in self.processed_streamed_output: + if isinstance(packet, CitationInfo): + citations.append(packet) + + return citations diff --git a/backend/danswer/llm/answering/doc_pruning.py b/backend/danswer/llm/answering/doc_pruning.py new file mode 100644 index 000000000..5a43ab3c6 --- /dev/null +++ b/backend/danswer/llm/answering/doc_pruning.py @@ -0,0 +1,230 @@ +import json +from copy import deepcopy +from typing import TypeVar + +from danswer.chat.models import ( + LlmDoc, +) +from danswer.configs.constants import IGNORE_FOR_QA +from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE +from danswer.llm.answering.models import DocumentPruningConfig +from danswer.llm.answering.models import PromptConfig +from danswer.llm.answering.prompts.citations_prompt import compute_max_document_tokens +from danswer.llm.interfaces import LLMConfig +from danswer.llm.utils import get_default_llm_tokenizer +from danswer.llm.utils import tokenizer_trim_content +from danswer.prompts.prompt_utils import build_doc_context_str +from danswer.search.models import InferenceChunk +from danswer.tools.search.search_utils import llm_doc_to_dict +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + +T = TypeVar("T", bound=LlmDoc | InferenceChunk) + +_METADATA_TOKEN_ESTIMATE = 75 + + +class PruningError(Exception): + pass + + +def _compute_limit( + prompt_config: PromptConfig, + llm_config: LLMConfig, + question: str, + max_chunks: int | None, + max_window_percentage: float | None, + max_tokens: int | None, + tool_token_count: int, +) -> int: + llm_max_document_tokens = compute_max_document_tokens( + prompt_config=prompt_config, + llm_config=llm_config, + tool_token_count=tool_token_count, + actual_user_input=question, + ) + + window_percentage_based_limit = ( + max_window_percentage * llm_max_document_tokens + if max_window_percentage + else None + ) + chunk_count_based_limit = ( + max_chunks * DOC_EMBEDDING_CONTEXT_SIZE if max_chunks else None + ) + + limit_options = [ + lim + for lim in [ + window_percentage_based_limit, + chunk_count_based_limit, + max_tokens, + llm_max_document_tokens, + ] + if lim + ] + return int(min(limit_options)) + + +def reorder_docs( + docs: list[T], + doc_relevance_list: list[bool] | None, +) -> list[T]: + if doc_relevance_list is None: + return docs + + reordered_docs: list[T] = [] + if doc_relevance_list is not None: + for selection_target in [True, False]: + for doc, is_relevant in zip(docs, doc_relevance_list): + if is_relevant == selection_target: + reordered_docs.append(doc) + return reordered_docs + + +def _remove_docs_to_ignore(docs: list[LlmDoc]) -> list[LlmDoc]: + return [doc for doc in docs if not doc.metadata.get(IGNORE_FOR_QA)] + + +def _apply_pruning( + docs: list[LlmDoc], + doc_relevance_list: list[bool] | None, + token_limit: int, + is_manually_selected_docs: bool, + use_sections: bool, + using_tool_message: bool, +) -> list[LlmDoc]: + llm_tokenizer = get_default_llm_tokenizer() + docs = deepcopy(docs) # don't modify in place + + # re-order docs with all the "relevant" docs at the front + docs = reorder_docs(docs=docs, doc_relevance_list=doc_relevance_list) + # remove docs that are explicitly marked as not for QA + docs = _remove_docs_to_ignore(docs=docs) + + tokens_per_doc: list[int] = [] + final_doc_ind = None + total_tokens = 0 + for ind, llm_doc in enumerate(docs): + doc_str = ( + json.dumps(llm_doc_to_dict(llm_doc, ind)) + if using_tool_message + else build_doc_context_str( + semantic_identifier=llm_doc.semantic_identifier, + source_type=llm_doc.source_type, + content=llm_doc.content, + metadata_dict=llm_doc.metadata, + updated_at=llm_doc.updated_at, + ind=ind, + ) + ) + + doc_tokens = len(llm_tokenizer.encode(doc_str)) + # if chunks, truncate chunks that are way too long + # this can happen if the embedding model tokenizer is different + # than the LLM tokenizer + if ( + not is_manually_selected_docs + and not use_sections + and doc_tokens > DOC_EMBEDDING_CONTEXT_SIZE + _METADATA_TOKEN_ESTIMATE + ): + logger.warning( + "Found more tokens in chunk than expected, " + "likely mismatch between embedding and LLM tokenizers. Trimming content..." + ) + llm_doc.content = tokenizer_trim_content( + content=llm_doc.content, + desired_length=DOC_EMBEDDING_CONTEXT_SIZE, + tokenizer=llm_tokenizer, + ) + doc_tokens = DOC_EMBEDDING_CONTEXT_SIZE + tokens_per_doc.append(doc_tokens) + total_tokens += doc_tokens + if total_tokens > token_limit: + final_doc_ind = ind + break + + if final_doc_ind is not None: + if is_manually_selected_docs or use_sections: + # for document selection, only allow the final document to get truncated + # if more than that, then the user message is too long + if final_doc_ind != len(docs) - 1: + if use_sections: + # Truncate the rest of the list since we're over the token limit + # for the last one, trim it. In this case, the Sections can be rather long + # so better to trim the back than throw away the whole thing. + docs = docs[: final_doc_ind + 1] + else: + raise PruningError( + "LLM context window exceeded. Please de-select some documents or shorten your query." + ) + + amount_to_truncate = total_tokens - token_limit + # NOTE: need to recalculate the length here, since the previous calculation included + # overhead from JSON-fying the doc / the metadata + final_doc_content_length = len( + llm_tokenizer.encode(docs[final_doc_ind].content) + ) - (amount_to_truncate) + # this could occur if we only have space for the title / metadata + # not ideal, but it's the most reasonable thing to do + # NOTE: the frontend prevents documents from being selected if + # less than 75 tokens are available to try and avoid this situation + # from occurring in the first place + if final_doc_content_length <= 0: + logger.error( + f"Final doc ({docs[final_doc_ind].semantic_identifier}) content " + "length is less than 0. Removing this doc from the final prompt." + ) + docs.pop() + else: + docs[final_doc_ind].content = tokenizer_trim_content( + content=docs[final_doc_ind].content, + desired_length=final_doc_content_length, + tokenizer=llm_tokenizer, + ) + else: + # For regular search, don't truncate the final document unless it's the only one + # If it's not the only one, we can throw it away, if it's the only one, we have to truncate + if final_doc_ind != 0: + docs = docs[:final_doc_ind] + else: + docs[0].content = tokenizer_trim_content( + content=docs[0].content, + desired_length=token_limit - _METADATA_TOKEN_ESTIMATE, + tokenizer=llm_tokenizer, + ) + docs = [docs[0]] + + return docs + + +def prune_documents( + docs: list[LlmDoc], + doc_relevance_list: list[bool] | None, + prompt_config: PromptConfig, + llm_config: LLMConfig, + question: str, + document_pruning_config: DocumentPruningConfig, +) -> list[LlmDoc]: + if doc_relevance_list is not None: + assert len(docs) == len(doc_relevance_list) + + doc_token_limit = _compute_limit( + prompt_config=prompt_config, + llm_config=llm_config, + question=question, + max_chunks=document_pruning_config.max_chunks, + max_window_percentage=document_pruning_config.max_window_percentage, + max_tokens=document_pruning_config.max_tokens, + tool_token_count=document_pruning_config.tool_num_tokens, + ) + return _apply_pruning( + docs=docs, + doc_relevance_list=doc_relevance_list, + token_limit=doc_token_limit, + is_manually_selected_docs=document_pruning_config.is_manually_selected_docs, + use_sections=document_pruning_config.use_sections, + using_tool_message=document_pruning_config.using_tool_message, + ) diff --git a/backend/danswer/llm/answering/models.py b/backend/danswer/llm/answering/models.py new file mode 100644 index 000000000..a5248fac2 --- /dev/null +++ b/backend/danswer/llm/answering/models.py @@ -0,0 +1,143 @@ +from collections.abc import Callable +from collections.abc import Iterator +from typing import Any +from typing import TYPE_CHECKING + +from langchain.schema.messages import AIMessage +from langchain.schema.messages import BaseMessage +from langchain.schema.messages import HumanMessage +from langchain.schema.messages import SystemMessage +from pydantic import BaseModel +from pydantic import Field +from pydantic import root_validator + +from danswer.chat.models import AnswerQuestionStreamReturn +from danswer.configs.constants import MessageType +from danswer.file_store.models import InMemoryChatFile +from danswer.llm.override_models import PromptOverride +from danswer.llm.utils import build_content_with_imgs + +if TYPE_CHECKING: + from danswer.db.models import ChatMessage + from danswer.db.models import Prompt + + +StreamProcessor = Callable[[Iterator[str]], AnswerQuestionStreamReturn] + + +class PreviousMessage(BaseModel): + """Simplified version of `ChatMessage`""" + + message: str + token_count: int + message_type: MessageType + files: list[InMemoryChatFile] + + @classmethod + def from_chat_message( + cls, chat_message: "ChatMessage", available_files: list[InMemoryChatFile] + ) -> "PreviousMessage": + message_file_ids = ( + [file["id"] for file in chat_message.files] if chat_message.files else [] + ) + return cls( + message=chat_message.message, + token_count=chat_message.token_count, + message_type=chat_message.message_type, + files=[ + file + for file in available_files + if str(file.file_id) in message_file_ids + ], + ) + + def to_langchain_msg(self) -> BaseMessage: + content = build_content_with_imgs(self.message, self.files) + if self.message_type == MessageType.USER: + return HumanMessage(content=content) + elif self.message_type == MessageType.ASSISTANT: + return AIMessage(content=content) + else: + return SystemMessage(content=content) + + +class DocumentPruningConfig(BaseModel): + max_chunks: int | None = None + max_window_percentage: float | None = None + max_tokens: int | None = None + # different pruning behavior is expected when the + # user manually selects documents they want to chat with + # e.g. we don't want to truncate each document to be no more + # than one chunk long + is_manually_selected_docs: bool = False + # If user specifies to include additional context chunks for each match, then different pruning + # is used. As many Sections as possible are included, and the last Section is truncated + use_sections: bool = False + # If using tools, then we need to consider the tool length + tool_num_tokens: int = 0 + # If using a tool message to represent the docs, then we have to JSON serialize + # the document content, which adds to the token count. + using_tool_message: bool = False + + +class CitationConfig(BaseModel): + all_docs_useful: bool = False + + +class QuotesConfig(BaseModel): + pass + + +class AnswerStyleConfig(BaseModel): + citation_config: CitationConfig | None = None + quotes_config: QuotesConfig | None = None + document_pruning_config: DocumentPruningConfig = Field( + default_factory=DocumentPruningConfig + ) + + @root_validator + def check_quotes_and_citation(cls, values: dict[str, Any]) -> dict[str, Any]: + citation_config = values.get("citation_config") + quotes_config = values.get("quotes_config") + + if citation_config is None and quotes_config is None: + raise ValueError( + "One of `citation_config` or `quotes_config` must be provided" + ) + + if citation_config is not None and quotes_config is not None: + raise ValueError( + "Only one of `citation_config` or `quotes_config` must be provided" + ) + + return values + + +class PromptConfig(BaseModel): + """Final representation of the Prompt configuration passed + into the `Answer` object.""" + + system_prompt: str + task_prompt: str + datetime_aware: bool + include_citations: bool + + @classmethod + def from_model( + cls, model: "Prompt", prompt_override: PromptOverride | None = None + ) -> "PromptConfig": + override_system_prompt = ( + prompt_override.system_prompt if prompt_override else None + ) + override_task_prompt = prompt_override.task_prompt if prompt_override else None + + return cls( + system_prompt=override_system_prompt or model.system_prompt, + task_prompt=override_task_prompt or model.task_prompt, + datetime_aware=model.datetime_aware, + include_citations=model.include_citations, + ) + + # needed so that this can be passed into lru_cache funcs + class Config: + frozen = True diff --git a/backend/danswer/llm/answering/prompts/build.py b/backend/danswer/llm/answering/prompts/build.py new file mode 100644 index 000000000..4907bde88 --- /dev/null +++ b/backend/danswer/llm/answering/prompts/build.py @@ -0,0 +1,119 @@ +from collections.abc import Callable +from typing import cast + +from langchain_core.messages import BaseMessage +from langchain_core.messages import HumanMessage +from langchain_core.messages import SystemMessage + +from danswer.file_store.models import InMemoryChatFile +from danswer.llm.answering.models import PreviousMessage +from danswer.llm.answering.models import PromptConfig +from danswer.llm.answering.prompts.citations_prompt import compute_max_llm_input_tokens +from danswer.llm.interfaces import LLMConfig +from danswer.llm.utils import build_content_with_imgs +from danswer.llm.utils import check_message_tokens +from danswer.llm.utils import get_default_llm_tokenizer +from danswer.llm.utils import translate_history_to_basemessages +from danswer.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT +from danswer.prompts.prompt_utils import add_time_to_system_prompt +from danswer.prompts.prompt_utils import drop_messages_history_overflow +from danswer.tools.message import ToolCallSummary + + +def default_build_system_message( + prompt_config: PromptConfig, +) -> SystemMessage | None: + system_prompt = prompt_config.system_prompt.strip() + if prompt_config.datetime_aware: + system_prompt = add_time_to_system_prompt(system_prompt=system_prompt) + + if not system_prompt: + return None + + system_msg = SystemMessage(content=system_prompt) + + return system_msg + + +def default_build_user_message( + user_query: str, prompt_config: PromptConfig, files: list[InMemoryChatFile] = [] +) -> HumanMessage: + user_prompt = ( + CHAT_USER_CONTEXT_FREE_PROMPT.format( + task_prompt=prompt_config.task_prompt, user_query=user_query + ) + if prompt_config.task_prompt + else user_query + ) + user_prompt = user_prompt.strip() + user_msg = HumanMessage( + content=build_content_with_imgs(user_prompt, files) if files else user_prompt + ) + return user_msg + + +class AnswerPromptBuilder: + def __init__( + self, message_history: list[PreviousMessage], llm_config: LLMConfig + ) -> None: + self.max_tokens = compute_max_llm_input_tokens(llm_config) + + ( + self.message_history, + self.history_token_cnts, + ) = translate_history_to_basemessages(message_history) + + self.system_message_and_token_cnt: tuple[SystemMessage, int] | None = None + self.user_message_and_token_cnt: tuple[HumanMessage, int] | None = None + + llm_tokenizer = get_default_llm_tokenizer() + self.llm_tokenizer_encode_func = cast( + Callable[[str], list[int]], llm_tokenizer.encode + ) + + def update_system_prompt(self, system_message: SystemMessage | None) -> None: + if not system_message: + self.system_message_and_token_cnt = None + return + + self.system_message_and_token_cnt = ( + system_message, + check_message_tokens(system_message, self.llm_tokenizer_encode_func), + ) + + def update_user_prompt(self, user_message: HumanMessage) -> None: + if not user_message: + self.user_message_and_token_cnt = None + return + + self.user_message_and_token_cnt = ( + user_message, + check_message_tokens(user_message, self.llm_tokenizer_encode_func), + ) + + def build( + self, tool_call_summary: ToolCallSummary | None = None + ) -> list[BaseMessage]: + if not self.user_message_and_token_cnt: + raise ValueError("User message must be set before building prompt") + + final_messages_with_tokens: list[tuple[BaseMessage, int]] = [] + if self.system_message_and_token_cnt: + final_messages_with_tokens.append(self.system_message_and_token_cnt) + + final_messages_with_tokens.extend( + [ + (self.message_history[i], self.history_token_cnts[i]) + for i in range(len(self.message_history)) + ] + ) + + final_messages_with_tokens.append(self.user_message_and_token_cnt) + + if tool_call_summary: + final_messages_with_tokens.append((tool_call_summary.tool_call_request, 0)) + final_messages_with_tokens.append((tool_call_summary.tool_call_result, 0)) + + return drop_messages_history_overflow( + final_messages_with_tokens, self.max_tokens + ) diff --git a/backend/danswer/llm/answering/prompts/citations_prompt.py b/backend/danswer/llm/answering/prompts/citations_prompt.py new file mode 100644 index 000000000..81626e272 --- /dev/null +++ b/backend/danswer/llm/answering/prompts/citations_prompt.py @@ -0,0 +1,162 @@ +from langchain.schema.messages import HumanMessage +from langchain.schema.messages import SystemMessage + +from danswer.chat.models import LlmDoc +from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION +from danswer.configs.model_configs import GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS +from danswer.db.chat import get_default_prompt +from danswer.db.models import Persona +from danswer.file_store.utils import InMemoryChatFile +from danswer.llm.answering.models import PromptConfig +from danswer.llm.factory import get_llm_for_persona +from danswer.llm.interfaces import LLMConfig +from danswer.llm.utils import build_content_with_imgs +from danswer.llm.utils import check_number_of_tokens +from danswer.llm.utils import get_max_input_tokens +from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT +from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT +from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT +from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT_FOR_TOOL_CALLING +from danswer.prompts.prompt_utils import add_time_to_system_prompt +from danswer.prompts.prompt_utils import build_complete_context_str +from danswer.prompts.prompt_utils import build_task_prompt_reminders +from danswer.prompts.token_counts import ADDITIONAL_INFO_TOKEN_CNT +from danswer.prompts.token_counts import ( + CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT, +) +from danswer.prompts.token_counts import CITATION_REMINDER_TOKEN_CNT +from danswer.prompts.token_counts import CITATION_STATEMENT_TOKEN_CNT +from danswer.prompts.token_counts import LANGUAGE_HINT_TOKEN_CNT +from danswer.search.models import InferenceChunk + + +def get_prompt_tokens(prompt_config: PromptConfig) -> int: + # Note: currently custom prompts do not allow datetime aware, only default prompts + return ( + check_number_of_tokens(prompt_config.system_prompt) + + check_number_of_tokens(prompt_config.task_prompt) + + CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT + + CITATION_STATEMENT_TOKEN_CNT + + CITATION_REMINDER_TOKEN_CNT + + (LANGUAGE_HINT_TOKEN_CNT if bool(MULTILINGUAL_QUERY_EXPANSION) else 0) + + (ADDITIONAL_INFO_TOKEN_CNT if prompt_config.datetime_aware else 0) + ) + + +# buffer just to be safe so that we don't overflow the token limit due to +# a small miscalculation +_MISC_BUFFER = 40 + + +def compute_max_document_tokens( + prompt_config: PromptConfig, + llm_config: LLMConfig, + actual_user_input: str | None = None, + tool_token_count: int = 0, + max_llm_token_override: int | None = None, +) -> int: + """Estimates the number of tokens available for context documents. Formula is roughly: + + ( + model_context_window - reserved_output_tokens - prompt_tokens + - (actual_user_input OR reserved_user_message_tokens) - buffer (just to be safe) + ) + + The actual_user_input is used at query time. If we are calculating this before knowing the exact input (e.g. + if we're trying to determine if the user should be able to select another document) then we just set an + arbitrary "upper bound". + """ + # if we can't find a number of tokens, just assume some common default + max_input_tokens = ( + max_llm_token_override + if max_llm_token_override + else get_max_input_tokens( + model_name=llm_config.model_name, model_provider=llm_config.model_provider + ) + ) + prompt_tokens = get_prompt_tokens(prompt_config) + + user_input_tokens = ( + check_number_of_tokens(actual_user_input) + if actual_user_input is not None + else GEN_AI_SINGLE_USER_MESSAGE_EXPECTED_MAX_TOKENS + ) + + return ( + max_input_tokens + - prompt_tokens + - user_input_tokens + - tool_token_count + - _MISC_BUFFER + ) + + +def compute_max_document_tokens_for_persona( + persona: Persona, + actual_user_input: str | None = None, + max_llm_token_override: int | None = None, +) -> int: + prompt = persona.prompts[0] if persona.prompts else get_default_prompt() + return compute_max_document_tokens( + prompt_config=PromptConfig.from_model(prompt), + llm_config=get_llm_for_persona(persona).config, + actual_user_input=actual_user_input, + max_llm_token_override=max_llm_token_override, + ) + + +def compute_max_llm_input_tokens(llm_config: LLMConfig) -> int: + """Maximum tokens allows in the input to the LLM (of any type).""" + + input_tokens = get_max_input_tokens( + model_name=llm_config.model_name, model_provider=llm_config.model_provider + ) + return input_tokens - _MISC_BUFFER + + +def build_citations_system_message( + prompt_config: PromptConfig, +) -> SystemMessage: + system_prompt = prompt_config.system_prompt.strip() + if prompt_config.include_citations: + system_prompt += REQUIRE_CITATION_STATEMENT + if prompt_config.datetime_aware: + system_prompt = add_time_to_system_prompt(system_prompt=system_prompt) + + return SystemMessage(content=system_prompt) + + +def build_citations_user_message( + question: str, + prompt_config: PromptConfig, + context_docs: list[LlmDoc] | list[InferenceChunk], + files: list[InMemoryChatFile], + all_doc_useful: bool, + history_message: str = "", +) -> HumanMessage: + task_prompt_with_reminder = build_task_prompt_reminders(prompt_config) + + if context_docs: + context_docs_str = build_complete_context_str(context_docs) + optional_ignore = "" if all_doc_useful else DEFAULT_IGNORE_STATEMENT + + user_prompt = CITATIONS_PROMPT.format( + optional_ignore_statement=optional_ignore, + context_docs_str=context_docs_str, + task_prompt=task_prompt_with_reminder, + user_query=question, + history_block=history_message, + ) + else: + # if no context docs provided, assume we're in the tool calling flow + user_prompt = CITATIONS_PROMPT_FOR_TOOL_CALLING.format( + task_prompt=task_prompt_with_reminder, + user_query=question, + ) + + user_prompt = user_prompt.strip() + user_msg = HumanMessage( + content=build_content_with_imgs(user_prompt, files) if files else user_prompt + ) + + return user_msg diff --git a/backend/danswer/llm/answering/prompts/quotes_prompt.py b/backend/danswer/llm/answering/prompts/quotes_prompt.py new file mode 100644 index 000000000..c0a36b10e --- /dev/null +++ b/backend/danswer/llm/answering/prompts/quotes_prompt.py @@ -0,0 +1,109 @@ +from langchain.schema.messages import HumanMessage + +from danswer.chat.models import LlmDoc +from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION +from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE +from danswer.llm.answering.models import PromptConfig +from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK +from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK +from danswer.prompts.direct_qa_prompts import JSON_PROMPT +from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT +from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT +from danswer.prompts.prompt_utils import build_complete_context_str +from danswer.search.models import InferenceChunk + + +def _build_weak_llm_quotes_prompt( + question: str, + context_docs: list[LlmDoc] | list[InferenceChunk], + history_str: str, + prompt: PromptConfig, + use_language_hint: bool, +) -> HumanMessage: + """Since Danswer supports a variety of LLMs, this less demanding prompt is provided + as an option to use with weaker LLMs such as small version, low float precision, quantized, + or distilled models. It only uses one context document and has very weak requirements of + output format. + """ + context_block = "" + if context_docs: + context_block = CONTEXT_BLOCK.format(context_docs_str=context_docs[0].content) + + prompt_str = WEAK_LLM_PROMPT.format( + system_prompt=prompt.system_prompt, + context_block=context_block, + task_prompt=prompt.task_prompt, + user_query=question, + ) + return HumanMessage(content=prompt_str) + + +def _build_strong_llm_quotes_prompt( + question: str, + context_docs: list[LlmDoc] | list[InferenceChunk], + history_str: str, + prompt: PromptConfig, + use_language_hint: bool, +) -> HumanMessage: + context_block = "" + if context_docs: + context_docs_str = build_complete_context_str(context_docs) + context_block = CONTEXT_BLOCK.format(context_docs_str=context_docs_str) + + history_block = "" + if history_str: + history_block = HISTORY_BLOCK.format(history_str=history_str) + + full_prompt = JSON_PROMPT.format( + system_prompt=prompt.system_prompt, + context_block=context_block, + history_block=history_block, + task_prompt=prompt.task_prompt, + user_query=question, + language_hint_or_none=LANGUAGE_HINT.strip() if use_language_hint else "", + ).strip() + return HumanMessage(content=full_prompt) + + +def build_quotes_user_message( + question: str, + context_docs: list[LlmDoc] | list[InferenceChunk], + history_str: str, + prompt: PromptConfig, + use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), +) -> HumanMessage: + prompt_builder = ( + _build_weak_llm_quotes_prompt + if QA_PROMPT_OVERRIDE == "weak" + else _build_strong_llm_quotes_prompt + ) + + return prompt_builder( + question=question, + context_docs=context_docs, + history_str=history_str, + prompt=prompt, + use_language_hint=use_language_hint, + ) + + +def build_quotes_prompt( + question: str, + context_docs: list[LlmDoc] | list[InferenceChunk], + history_str: str, + prompt: PromptConfig, + use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), +) -> HumanMessage: + prompt_builder = ( + _build_weak_llm_quotes_prompt + if QA_PROMPT_OVERRIDE == "weak" + else _build_strong_llm_quotes_prompt + ) + + return prompt_builder( + question=question, + context_docs=context_docs, + history_str=history_str, + prompt=prompt, + use_language_hint=use_language_hint, + ) diff --git a/backend/danswer/llm/answering/prompts/utils.py b/backend/danswer/llm/answering/prompts/utils.py new file mode 100644 index 000000000..bcc8b8918 --- /dev/null +++ b/backend/danswer/llm/answering/prompts/utils.py @@ -0,0 +1,20 @@ +from danswer.prompts.direct_qa_prompts import PARAMATERIZED_PROMPT +from danswer.prompts.direct_qa_prompts import PARAMATERIZED_PROMPT_WITHOUT_CONTEXT + + +def build_dummy_prompt( + system_prompt: str, task_prompt: str, retrieval_disabled: bool +) -> str: + if retrieval_disabled: + return PARAMATERIZED_PROMPT_WITHOUT_CONTEXT.format( + user_query="", + system_prompt=system_prompt, + task_prompt=task_prompt, + ).strip() + + return PARAMATERIZED_PROMPT.format( + context_docs_str="", + user_query="", + system_prompt=system_prompt, + task_prompt=task_prompt, + ).strip() diff --git a/backend/danswer/llm/answering/stream_processing/citation_processing.py b/backend/danswer/llm/answering/stream_processing/citation_processing.py new file mode 100644 index 000000000..fa774660c --- /dev/null +++ b/backend/danswer/llm/answering/stream_processing/citation_processing.py @@ -0,0 +1,126 @@ +import re +from collections.abc import Iterator + +from danswer.chat.models import AnswerQuestionStreamReturn +from danswer.chat.models import CitationInfo +from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.models import LlmDoc +from danswer.configs.chat_configs import STOP_STREAM_PAT +from danswer.llm.answering.models import StreamProcessor +from danswer.llm.answering.stream_processing.utils import map_document_id_order +from danswer.prompts.constants import TRIPLE_BACKTICK +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + + +def in_code_block(llm_text: str) -> bool: + count = llm_text.count(TRIPLE_BACKTICK) + return count % 2 != 0 + + +def extract_citations_from_stream( + tokens: Iterator[str], + context_docs: list[LlmDoc], + doc_id_to_rank_map: dict[str, int], + stop_stream: str | None = STOP_STREAM_PAT, +) -> Iterator[DanswerAnswerPiece | CitationInfo]: + llm_out = "" + max_citation_num = len(context_docs) + curr_segment = "" + prepend_bracket = False + cited_inds = set() + hold = "" + for raw_token in tokens: + if stop_stream: + next_hold = hold + raw_token + + if stop_stream in next_hold: + break + + if next_hold == stop_stream[: len(next_hold)]: + hold = next_hold + continue + + token = next_hold + hold = "" + else: + token = raw_token + + # Special case of [1][ where ][ is a single token + # This is where the model attempts to do consecutive citations like [1][2] + if prepend_bracket: + curr_segment += "[" + curr_segment + prepend_bracket = False + + curr_segment += token + llm_out += token + + possible_citation_pattern = r"(\[\d*$)" # [1, [, etc + possible_citation_found = re.search(possible_citation_pattern, curr_segment) + + citation_pattern = r"\[(\d+)\]" # [1], [2] etc + citation_found = re.search(citation_pattern, curr_segment) + + if citation_found and not in_code_block(llm_out): + numerical_value = int(citation_found.group(1)) + if 1 <= numerical_value <= max_citation_num: + context_llm_doc = context_docs[ + numerical_value - 1 + ] # remove 1 index offset + + link = context_llm_doc.link + target_citation_num = doc_id_to_rank_map[context_llm_doc.document_id] + + # Use the citation number for the document's rank in + # the search (or selected docs) results + curr_segment = re.sub( + rf"\[{numerical_value}\]", f"[{target_citation_num}]", curr_segment + ) + + if target_citation_num not in cited_inds: + cited_inds.add(target_citation_num) + yield CitationInfo( + citation_num=target_citation_num, + document_id=context_llm_doc.document_id, + ) + + if link: + curr_segment = re.sub(r"\[", "[[", curr_segment, count=1) + curr_segment = re.sub("]", f"]]({link})", curr_segment, count=1) + + # In case there's another open bracket like [1][, don't want to match this + possible_citation_found = None + + # if we see "[", but haven't seen the right side, hold back - this may be a + # citation that needs to be replaced with a link + if possible_citation_found: + continue + + # Special case with back to back citations [1][2] + if curr_segment and curr_segment[-1] == "[": + curr_segment = curr_segment[:-1] + prepend_bracket = True + + yield DanswerAnswerPiece(answer_piece=curr_segment) + curr_segment = "" + + if curr_segment: + if prepend_bracket: + yield DanswerAnswerPiece(answer_piece="[" + curr_segment) + else: + yield DanswerAnswerPiece(answer_piece=curr_segment) + + +def build_citation_processor( + context_docs: list[LlmDoc], search_order_docs: list[LlmDoc] +) -> StreamProcessor: + def stream_processor(tokens: Iterator[str]) -> AnswerQuestionStreamReturn: + yield from extract_citations_from_stream( + tokens=tokens, + context_docs=context_docs, + doc_id_to_rank_map=map_document_id_order(search_order_docs), + ) + + return stream_processor diff --git a/backend/danswer/llm/answering/stream_processing/quotes_processing.py b/backend/danswer/llm/answering/stream_processing/quotes_processing.py new file mode 100644 index 000000000..10d15b719 --- /dev/null +++ b/backend/danswer/llm/answering/stream_processing/quotes_processing.py @@ -0,0 +1,293 @@ +import math +import re +from collections.abc import Callable +from collections.abc import Generator +from collections.abc import Iterator +from json import JSONDecodeError +from typing import Optional + +import regex + +from danswer.chat.models import AnswerQuestionStreamReturn +from danswer.chat.models import DanswerAnswer +from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.models import DanswerQuote +from danswer.chat.models import DanswerQuotes +from danswer.chat.models import LlmDoc +from danswer.configs.chat_configs import QUOTE_ALLOWED_ERROR_PERCENT +from danswer.prompts.constants import ANSWER_PAT +from danswer.prompts.constants import QUOTE_PAT +from danswer.prompts.constants import UNCERTAINTY_PAT +from danswer.search.models import InferenceChunk +from danswer.utils.logger import setup_logger +from danswer.utils.text_processing import clean_model_quote +from danswer.utils.text_processing import clean_up_code_blocks +from danswer.utils.text_processing import extract_embedded_json +from danswer.utils.text_processing import shared_precompare_cleanup + + +logger = setup_logger() + + +def _extract_answer_quotes_freeform( + answer_raw: str, +) -> tuple[Optional[str], Optional[list[str]]]: + """Splits the model output into an Answer and 0 or more Quote sections. + Splits by the Quote pattern, if not exist then assume it's all answer and no quotes + """ + # If no answer section, don't care about the quote + if answer_raw.lower().strip().startswith(QUOTE_PAT.lower()): + return None, None + + # Sometimes model regenerates the Answer: pattern despite it being provided in the prompt + if answer_raw.lower().startswith(ANSWER_PAT.lower()): + answer_raw = answer_raw[len(ANSWER_PAT) :] + + # Accept quote sections starting with the lower case version + answer_raw = answer_raw.replace( + f"\n{QUOTE_PAT}".lower(), f"\n{QUOTE_PAT}" + ) # Just in case model unreliable + + sections = re.split(rf"(?<=\n){QUOTE_PAT}", answer_raw) + sections_clean = [ + str(section).strip() for section in sections if str(section).strip() + ] + if not sections_clean: + return None, None + + answer = str(sections_clean[0]) + if len(sections) == 1: + return answer, None + return answer, sections_clean[1:] + + +def _extract_answer_quotes_json( + answer_dict: dict[str, str | list[str]] +) -> tuple[Optional[str], Optional[list[str]]]: + answer_dict = {k.lower(): v for k, v in answer_dict.items()} + answer = str(answer_dict.get("answer")) + quotes = answer_dict.get("quotes") or answer_dict.get("quote") + if isinstance(quotes, str): + quotes = [quotes] + return answer, quotes + + +def _extract_answer_json(raw_model_output: str) -> dict: + try: + answer_json = extract_embedded_json(raw_model_output) + except (ValueError, JSONDecodeError): + # LLMs get confused when handling the list in the json. Sometimes it doesn't attend + # enough to the previous { token so it just ends the list of quotes and stops there + # here, we add logic to try to fix this LLM error. + answer_json = extract_embedded_json(raw_model_output + "}") + + if "answer" not in answer_json: + raise ValueError("Model did not output an answer as expected.") + + return answer_json + + +def match_quotes_to_docs( + quotes: list[str], + docs: list[LlmDoc] | list[InferenceChunk], + max_error_percent: float = QUOTE_ALLOWED_ERROR_PERCENT, + fuzzy_search: bool = False, + prefix_only_length: int = 100, +) -> DanswerQuotes: + danswer_quotes: list[DanswerQuote] = [] + for quote in quotes: + max_edits = math.ceil(float(len(quote)) * max_error_percent) + + for doc in docs: + if not doc.source_links: + continue + + quote_clean = shared_precompare_cleanup( + clean_model_quote(quote, trim_length=prefix_only_length) + ) + chunk_clean = shared_precompare_cleanup(doc.content) + + # Finding the offset of the quote in the plain text + if fuzzy_search: + re_search_str = ( + r"(" + re.escape(quote_clean) + r"){e<=" + str(max_edits) + r"}" + ) + found = regex.search(re_search_str, chunk_clean) + if not found: + continue + offset = found.span()[0] + else: + if quote_clean not in chunk_clean: + continue + offset = chunk_clean.index(quote_clean) + + # Extracting the link from the offset + curr_link = None + for link_offset, link in doc.source_links.items(): + # Should always find one because offset is at least 0 and there + # must be a 0 link_offset + if int(link_offset) <= offset: + curr_link = link + else: + break + + danswer_quotes.append( + DanswerQuote( + quote=quote, + document_id=doc.document_id, + link=curr_link, + source_type=doc.source_type, + semantic_identifier=doc.semantic_identifier, + blurb=doc.blurb, + ) + ) + break + + return DanswerQuotes(quotes=danswer_quotes) + + +def separate_answer_quotes( + answer_raw: str, is_json_prompt: bool = False +) -> tuple[Optional[str], Optional[list[str]]]: + """Takes in a raw model output and pulls out the answer and the quotes sections.""" + if is_json_prompt: + model_raw_json = _extract_answer_json(answer_raw) + return _extract_answer_quotes_json(model_raw_json) + + return _extract_answer_quotes_freeform(clean_up_code_blocks(answer_raw)) + + +def process_answer( + answer_raw: str, + docs: list[LlmDoc], + is_json_prompt: bool = True, +) -> tuple[DanswerAnswer, DanswerQuotes]: + """Used (1) in the non-streaming case to process the model output + into an Answer and Quotes AND (2) after the complete streaming response + has been received to process the model output into an Answer and Quotes.""" + answer, quote_strings = separate_answer_quotes(answer_raw, is_json_prompt) + if answer == UNCERTAINTY_PAT or not answer: + if answer == UNCERTAINTY_PAT: + logger.debug("Answer matched UNCERTAINTY_PAT") + else: + logger.debug("No answer extracted from raw output") + return DanswerAnswer(answer=None), DanswerQuotes(quotes=[]) + + logger.info(f"Answer: {answer}") + if not quote_strings: + logger.debug("No quotes extracted from raw output") + return DanswerAnswer(answer=answer), DanswerQuotes(quotes=[]) + logger.info(f"All quotes (including unmatched): {quote_strings}") + quotes = match_quotes_to_docs(quote_strings, docs) + logger.debug(f"Final quotes: {quotes}") + + return DanswerAnswer(answer=answer), quotes + + +def _stream_json_answer_end(answer_so_far: str, next_token: str) -> bool: + next_token = next_token.replace('\\"', "") + # If the previous character is an escape token, don't consider the first character of next_token + # This does not work if it's an escaped escape sign before the " but this is rare, not worth handling + if answer_so_far and answer_so_far[-1] == "\\": + next_token = next_token[1:] + if '"' in next_token: + return True + return False + + +def _extract_quotes_from_completed_token_stream( + model_output: str, context_docs: list[LlmDoc], is_json_prompt: bool = True +) -> DanswerQuotes: + answer, quotes = process_answer(model_output, context_docs, is_json_prompt) + if answer: + logger.info(answer) + elif model_output: + logger.warning("Answer extraction from model output failed.") + + return quotes + + +def process_model_tokens( + tokens: Iterator[str], + context_docs: list[LlmDoc], + is_json_prompt: bool = True, +) -> Generator[DanswerAnswerPiece | DanswerQuotes, None, None]: + """Used in the streaming case to process the model output + into an Answer and Quotes + + Yields Answer tokens back out in a dict for streaming to frontend + When Answer section ends, yields dict with answer_finished key + Collects all the tokens at the end to form the complete model output""" + quote_pat = f"\n{QUOTE_PAT}" + # Sometimes worse model outputs new line instead of : + quote_loose = f"\n{quote_pat[:-1]}\n" + # Sometime model outputs two newlines before quote section + quote_pat_full = f"\n{quote_pat}" + model_output: str = "" + found_answer_start = False if is_json_prompt else True + found_answer_end = False + hold_quote = "" + for token in tokens: + model_previous = model_output + model_output += token + + if not found_answer_start and '{"answer":"' in re.sub(r"\s", "", model_output): + # Note, if the token that completes the pattern has additional text, for example if the token is "? + # Then the chars after " will not be streamed, but this is ok as it prevents streaming the ? in the + # event that the model outputs the UNCERTAINTY_PAT + found_answer_start = True + + # Prevent heavy cases of hallucinations where model is not even providing a json until later + if is_json_prompt and len(model_output) > 40: + logger.warning("LLM did not produce json as prompted") + found_answer_end = True + + continue + + if found_answer_start and not found_answer_end: + if is_json_prompt and _stream_json_answer_end(model_previous, token): + found_answer_end = True + + # return the remaining part of the answer e.g. token might be 'd.", ' and we should yield 'd.' + if token: + try: + answer_token_section = token.index('"') + yield DanswerAnswerPiece( + answer_piece=hold_quote + token[:answer_token_section] + ) + except ValueError: + logger.error("Quotation mark not found in token") + yield DanswerAnswerPiece(answer_piece=hold_quote + token) + yield DanswerAnswerPiece(answer_piece=None) + continue + elif not is_json_prompt: + if quote_pat in hold_quote + token or quote_loose in hold_quote + token: + found_answer_end = True + yield DanswerAnswerPiece(answer_piece=None) + continue + if hold_quote + token in quote_pat_full: + hold_quote += token + continue + yield DanswerAnswerPiece(answer_piece=hold_quote + token) + hold_quote = "" + + logger.debug(f"Raw Model QnA Output: {model_output}") + + yield _extract_quotes_from_completed_token_stream( + model_output=model_output, + context_docs=context_docs, + is_json_prompt=is_json_prompt, + ) + + +def build_quotes_processor( + context_docs: list[LlmDoc], is_json_prompt: bool +) -> Callable[[Iterator[str]], AnswerQuestionStreamReturn]: + def stream_processor(tokens: Iterator[str]) -> AnswerQuestionStreamReturn: + yield from process_model_tokens( + tokens=tokens, + context_docs=context_docs, + is_json_prompt=is_json_prompt, + ) + + return stream_processor diff --git a/backend/danswer/llm/answering/stream_processing/utils.py b/backend/danswer/llm/answering/stream_processing/utils.py new file mode 100644 index 000000000..9f21e6a34 --- /dev/null +++ b/backend/danswer/llm/answering/stream_processing/utils.py @@ -0,0 +1,17 @@ +from collections.abc import Sequence + +from danswer.chat.models import LlmDoc +from danswer.search.models import InferenceChunk + + +def map_document_id_order( + chunks: Sequence[InferenceChunk | LlmDoc], one_indexed: bool = True +) -> dict[str, int]: + order_mapping = {} + current = 1 if one_indexed else 0 + for chunk in chunks: + if chunk.document_id not in order_mapping: + order_mapping[chunk.document_id] = current + current += 1 + + return order_mapping diff --git a/backend/danswer/llm/chat_llm.py b/backend/danswer/llm/chat_llm.py index 52811b7f3..d450efa91 100644 --- a/backend/danswer/llm/chat_llm.py +++ b/backend/danswer/llm/chat_llm.py @@ -1,22 +1,36 @@ -import abc +import json +import os from collections.abc import Iterator +from typing import Any +from typing import cast -import litellm # type:ignore -from langchain.chat_models import ChatLiteLLM -from langchain.chat_models.base import BaseChatModel +import litellm # type: ignore from langchain.schema.language_model import LanguageModelInput +from langchain_core.messages import AIMessage +from langchain_core.messages import AIMessageChunk +from langchain_core.messages import BaseMessage +from langchain_core.messages import BaseMessageChunk +from langchain_core.messages import ChatMessage +from langchain_core.messages import ChatMessageChunk +from langchain_core.messages import FunctionMessage +from langchain_core.messages import FunctionMessageChunk +from langchain_core.messages import HumanMessage +from langchain_core.messages import HumanMessageChunk +from langchain_core.messages import SystemMessage +from langchain_core.messages import SystemMessageChunk +from langchain_core.messages.tool import ToolCallChunk +from langchain_core.messages.tool import ToolMessage from danswer.configs.app_configs import LOG_ALL_MODEL_INTERACTIONS +from danswer.configs.model_configs import DISABLE_LITELLM_STREAMING from danswer.configs.model_configs import GEN_AI_API_ENDPOINT from danswer.configs.model_configs import GEN_AI_API_VERSION from danswer.configs.model_configs import GEN_AI_LLM_PROVIDER_TYPE from danswer.configs.model_configs import GEN_AI_MAX_OUTPUT_TOKENS -from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER -from danswer.configs.model_configs import GEN_AI_MODEL_VERSION from danswer.configs.model_configs import GEN_AI_TEMPERATURE from danswer.llm.interfaces import LLM -from danswer.llm.utils import message_generator_to_string_generator -from danswer.llm.utils import should_be_verbose +from danswer.llm.interfaces import LLMConfig +from danswer.llm.interfaces import ToolChoiceOptions from danswer.utils.logger import setup_logger @@ -28,78 +42,148 @@ litellm.drop_params = True litellm.telemetry = False -class LangChainChatLLM(LLM, abc.ABC): - @property - @abc.abstractmethod - def llm(self) -> BaseChatModel: - raise NotImplementedError +def _base_msg_to_role(msg: BaseMessage) -> str: + if isinstance(msg, HumanMessage) or isinstance(msg, HumanMessageChunk): + return "user" + if isinstance(msg, AIMessage) or isinstance(msg, AIMessageChunk): + return "assistant" + if isinstance(msg, SystemMessage) or isinstance(msg, SystemMessageChunk): + return "system" + if isinstance(msg, FunctionMessage) or isinstance(msg, FunctionMessageChunk): + return "function" + return "unknown" - @staticmethod - def _log_prompt(prompt: LanguageModelInput) -> None: - if isinstance(prompt, list): - for ind, msg in enumerate(prompt): - logger.debug(f"Message {ind}:\n{msg.content}") - if isinstance(prompt, str): - logger.debug(f"Prompt:\n{prompt}") - def log_model_configs(self) -> None: - llm_dict = {k: v for k, v in self.llm.__dict__.items() if v} - llm_dict.pop("client") - logger.info( - f"LLM Model Class: {self.llm.__class__.__name__}, Model Config: {llm_dict}" +def _convert_litellm_message_to_langchain_message( + litellm_message: litellm.Message, +) -> BaseMessage: + # Extracting the basic attributes from the litellm message + content = litellm_message.content + role = litellm_message.role + + # Handling function calls and tool calls if present + tool_calls = ( + cast( + list[litellm.utils.ChatCompletionMessageToolCall], + litellm_message.tool_calls, ) + if hasattr(litellm_message, "tool_calls") + else [] + ) - def invoke(self, prompt: LanguageModelInput) -> str: - if LOG_ALL_MODEL_INTERACTIONS: - self._log_prompt(prompt) + # Create the appropriate langchain message based on the role + if role == "user": + return HumanMessage(content=content) + elif role == "assistant": + return AIMessage( + content=content, + tool_calls=[ + { + "name": tool_call.function.name or "", + "args": json.loads(tool_call.function.arguments), + "id": tool_call.id, + } + for tool_call in tool_calls + ], + ) + elif role == "system": + return SystemMessage(content=content) + else: + raise ValueError(f"Unknown role type received: {role}") - model_raw = self.llm.invoke(prompt).content - if LOG_ALL_MODEL_INTERACTIONS: - logger.debug(f"Raw Model Output:\n{model_raw}") - if not isinstance(model_raw, str): - raise RuntimeError( - "Model output inconsistent with expected type, " - "is this related to a library upgrade?" +def _convert_message_to_dict(message: BaseMessage) -> dict: + """Adapted from langchain_community.chat_models.litellm._convert_message_to_dict""" + if isinstance(message, ChatMessage): + message_dict = {"role": message.role, "content": message.content} + elif isinstance(message, HumanMessage): + message_dict = {"role": "user", "content": message.content} + elif isinstance(message, AIMessage): + message_dict = {"role": "assistant", "content": message.content} + if message.tool_calls: + message_dict["tool_calls"] = [ + { + "id": tool_call.get("id"), + "function": { + "name": tool_call["name"], + "arguments": json.dumps(tool_call["args"]), + }, + "type": "function", + "index": 0, # only support a single tool call atm + } + for tool_call in message.tool_calls + ] + if "function_call" in message.additional_kwargs: + message_dict["function_call"] = message.additional_kwargs["function_call"] + elif isinstance(message, SystemMessage): + message_dict = {"role": "system", "content": message.content} + elif isinstance(message, FunctionMessage): + message_dict = { + "role": "function", + "content": message.content, + "name": message.name, + } + elif isinstance(message, ToolMessage): + message_dict = { + "tool_call_id": message.tool_call_id, + "role": "tool", + "name": message.name or "", + "content": message.content, + } + else: + raise ValueError(f"Got unknown type {message}") + if "name" in message.additional_kwargs: + message_dict["name"] = message.additional_kwargs["name"] + return message_dict + + +def _convert_delta_to_message_chunk( + _dict: dict[str, Any], curr_msg: BaseMessage | None +) -> BaseMessageChunk: + """Adapted from langchain_community.chat_models.litellm._convert_delta_to_message_chunk""" + role = _dict.get("role") or (_base_msg_to_role(curr_msg) if curr_msg else None) + content = _dict.get("content") or "" + additional_kwargs = {} + if _dict.get("function_call"): + additional_kwargs.update({"function_call": dict(_dict["function_call"])}) + tool_calls = cast( + list[litellm.utils.ChatCompletionDeltaToolCall] | None, _dict.get("tool_calls") + ) + + if role == "user": + return HumanMessageChunk(content=content) + elif role == "assistant": + if tool_calls: + tool_call = tool_calls[0] + tool_name = tool_call.function.name or (curr_msg and curr_msg.name) or "" + + tool_call_chunk = ToolCallChunk( + name=tool_name, + id=tool_call.id, + args=tool_call.function.arguments, + index=0, # only support a single tool call atm ) + return AIMessageChunk( + content=content, + additional_kwargs=additional_kwargs, + tool_call_chunks=[tool_call_chunk], + ) + return AIMessageChunk(content=content, additional_kwargs=additional_kwargs) + elif role == "system": + return SystemMessageChunk(content=content) + elif role == "function": + return FunctionMessageChunk(content=content, name=_dict["name"]) + elif role: + return ChatMessageChunk(content=content, role=role) - return model_raw - - def stream(self, prompt: LanguageModelInput) -> Iterator[str]: - if LOG_ALL_MODEL_INTERACTIONS: - self._log_prompt(prompt) - - output_tokens = [] - for token in message_generator_to_string_generator(self.llm.stream(prompt)): - output_tokens.append(token) - yield token - - full_output = "".join(output_tokens) - if LOG_ALL_MODEL_INTERACTIONS: - logger.debug(f"Raw Model Output:\n{full_output}") + raise ValueError(f"Unknown role: {role}") -def _get_model_str( - model_provider: str | None, - model_version: str | None, -) -> str: - if model_provider and model_version: - return model_provider + "/" + model_version - - if model_version: - # Litellm defaults to openai if no provider specified - # It's implicit so no need to specify here either - return model_version - - # User specified something wrong, just use Danswer default - return GEN_AI_MODEL_VERSION - - -class DefaultMultiLLM(LangChainChatLLM): +class DefaultMultiLLM(LLM): """Uses Litellm library to allow easy configuration to use a multitude of LLMs See https://python.langchain.com/docs/integrations/chat/litellm""" - DEFAULT_MODEL_PARAMS = { + DEFAULT_MODEL_PARAMS: dict[str, Any] = { "frequency_penalty": 0, "presence_penalty": 0, } @@ -108,33 +192,182 @@ class DefaultMultiLLM(LangChainChatLLM): self, api_key: str | None, timeout: int, - model_provider: str = GEN_AI_MODEL_PROVIDER, - model_version: str = GEN_AI_MODEL_VERSION, + model_provider: str, + model_name: str, api_base: str | None = GEN_AI_API_ENDPOINT, api_version: str | None = GEN_AI_API_VERSION, custom_llm_provider: str | None = GEN_AI_LLM_PROVIDER_TYPE, max_output_tokens: int = GEN_AI_MAX_OUTPUT_TOKENS, temperature: float = GEN_AI_TEMPERATURE, + custom_config: dict[str, str] | None = None, + extra_headers: dict[str, str] | None = None, ): - # Litellm Langchain integration currently doesn't take in the api key param - # Can place this in the call below once integration is in - litellm.api_key = api_key or "dummy-key" - litellm.api_version = api_version + self._timeout = timeout + self._model_provider = model_provider + self._model_version = model_name + self._temperature = temperature + self._api_key = api_key + self._api_base = api_base + self._api_version = api_version + self._custom_llm_provider = custom_llm_provider + self._max_output_tokens = max_output_tokens + self._custom_config = custom_config - self._llm = ChatLiteLLM( # type: ignore - model=model_version - if custom_llm_provider - else _get_model_str(model_provider, model_version), - api_base=api_base, - custom_llm_provider=custom_llm_provider, - max_tokens=max_output_tokens, - temperature=temperature, - request_timeout=timeout, - model_kwargs=DefaultMultiLLM.DEFAULT_MODEL_PARAMS, - verbose=should_be_verbose(), - max_retries=0, # retries are handled outside of langchain + # NOTE: have to set these as environment variables for Litellm since + # not all are able to passed in but they always support them set as env + # variables + if custom_config: + for k, v in custom_config.items(): + os.environ[k] = v + + model_kwargs = ( + DefaultMultiLLM.DEFAULT_MODEL_PARAMS if model_provider == "openai" else {} ) + if extra_headers: + model_kwargs.update({"extra_headers": extra_headers}) + + self._model_kwargs = model_kwargs + + @staticmethod + def _log_prompt(prompt: LanguageModelInput) -> None: + if isinstance(prompt, list): + for ind, msg in enumerate(prompt): + if isinstance(msg, AIMessageChunk): + if msg.content: + log_msg = msg.content + elif msg.tool_call_chunks: + log_msg = "Tool Calls: " + str( + [ + { + key: value + for key, value in tool_call.items() + if key != "index" + } + for tool_call in msg.tool_call_chunks + ] + ) + else: + log_msg = "" + logger.debug(f"Message {ind}:\n{log_msg}") + else: + logger.debug(f"Message {ind}:\n{msg.content}") + if isinstance(prompt, str): + logger.debug(f"Prompt:\n{prompt}") + + def log_model_configs(self) -> None: + logger.info(f"Config: {self.config}") + + def _completion( + self, + prompt: LanguageModelInput, + tools: list[dict] | None, + tool_choice: ToolChoiceOptions | None, + stream: bool, + ) -> litellm.ModelResponse | litellm.CustomStreamWrapper: + if isinstance(prompt, list): + prompt = [ + _convert_message_to_dict(msg) if isinstance(msg, BaseMessage) else msg + for msg in prompt + ] + elif isinstance(prompt, str): + prompt = [_convert_message_to_dict(HumanMessage(content=prompt))] + + try: + return litellm.completion( + # model choice + model=f"{self.config.model_provider}/{self.config.model_name}", + api_key=self._api_key, + base_url=self._api_base, + api_version=self._api_version, + custom_llm_provider=self._custom_llm_provider, + # actual input + messages=prompt, + tools=tools, + tool_choice=tool_choice if tools else None, + # streaming choice + stream=stream, + # model params + temperature=self._temperature, + max_tokens=self._max_output_tokens, + timeout=self._timeout, + **self._model_kwargs, + ) + except Exception as e: + # for break pointing + raise e @property - def llm(self) -> ChatLiteLLM: - return self._llm + def config(self) -> LLMConfig: + return LLMConfig( + model_provider=self._model_provider, + model_name=self._model_version, + temperature=self._temperature, + api_key=self._api_key, + ) + + def invoke( + self, + prompt: LanguageModelInput, + tools: list[dict] | None = None, + tool_choice: ToolChoiceOptions | None = None, + ) -> BaseMessage: + if LOG_ALL_MODEL_INTERACTIONS: + self.log_model_configs() + self._log_prompt(prompt) + + response = cast( + litellm.ModelResponse, self._completion(prompt, tools, tool_choice, False) + ) + return _convert_litellm_message_to_langchain_message( + response.choices[0].message + ) + + def stream( + self, + prompt: LanguageModelInput, + tools: list[dict] | None = None, + tool_choice: ToolChoiceOptions | None = None, + ) -> Iterator[BaseMessage]: + if LOG_ALL_MODEL_INTERACTIONS: + self.log_model_configs() + self._log_prompt(prompt) + + if DISABLE_LITELLM_STREAMING: + yield self.invoke(prompt) + return + + output = None + response = self._completion(prompt, tools, tool_choice, True) + for part in response: + if len(part["choices"]) == 0: + continue + delta = part["choices"][0]["delta"] + message_chunk = _convert_delta_to_message_chunk(delta, output) + if output is None: + output = message_chunk + else: + output += message_chunk + + yield message_chunk + + if LOG_ALL_MODEL_INTERACTIONS and output: + content = output.content or "" + if isinstance(output, AIMessage): + if content: + log_msg = content + elif output.tool_calls: + log_msg = "Tool Calls: " + str( + [ + { + key: value + for key, value in tool_call.items() + if key != "index" + } + for tool_call in output.tool_calls + ] + ) + else: + log_msg = "" + logger.debug(f"Raw Model Output:\n{log_msg}") + else: + logger.debug(f"Raw Model Output:\n{content}") diff --git a/backend/danswer/llm/custom_llm.py b/backend/danswer/llm/custom_llm.py index 4c11a29a4..2c4c029aa 100644 --- a/backend/danswer/llm/custom_llm.py +++ b/backend/danswer/llm/custom_llm.py @@ -3,11 +3,14 @@ from collections.abc import Iterator import requests from langchain.schema.language_model import LanguageModelInput +from langchain_core.messages import AIMessage +from langchain_core.messages import BaseMessage from requests import Timeout from danswer.configs.model_configs import GEN_AI_API_ENDPOINT from danswer.configs.model_configs import GEN_AI_MAX_OUTPUT_TOKENS from danswer.llm.interfaces import LLM +from danswer.llm.interfaces import ToolChoiceOptions from danswer.llm.utils import convert_lm_input_to_basic_string from danswer.utils.logger import setup_logger @@ -47,7 +50,7 @@ class CustomModelServer(LLM): self._max_output_tokens = max_output_tokens self._timeout = timeout - def _execute(self, input: LanguageModelInput) -> str: + def _execute(self, input: LanguageModelInput) -> AIMessage: headers = { "Content-Type": "application/json", } @@ -67,13 +70,24 @@ class CustomModelServer(LLM): raise Timeout(f"Model inference to {self._endpoint} timed out") from error response.raise_for_status() - return json.loads(response.content).get("generated_text", "") + response_content = json.loads(response.content).get("generated_text", "") + return AIMessage(content=response_content) def log_model_configs(self) -> None: logger.debug(f"Custom model at: {self._endpoint}") - def invoke(self, prompt: LanguageModelInput) -> str: + def invoke( + self, + prompt: LanguageModelInput, + tools: list[dict] | None = None, + tool_choice: ToolChoiceOptions | None = None, + ) -> BaseMessage: return self._execute(prompt) - def stream(self, prompt: LanguageModelInput) -> Iterator[str]: + def stream( + self, + prompt: LanguageModelInput, + tools: list[dict] | None = None, + tool_choice: ToolChoiceOptions | None = None, + ) -> Iterator[BaseMessage]: yield self._execute(prompt) diff --git a/backend/danswer/llm/factory.py b/backend/danswer/llm/factory.py index fca6a9c14..9c92eb9a6 100644 --- a/backend/danswer/llm/factory.py +++ b/backend/danswer/llm/factory.py @@ -1,43 +1,91 @@ from danswer.configs.app_configs import DISABLE_GENERATIVE_AI from danswer.configs.chat_configs import QA_TIMEOUT -from danswer.configs.model_configs import FAST_GEN_AI_MODEL_VERSION -from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER -from danswer.configs.model_configs import GEN_AI_MODEL_VERSION +from danswer.configs.model_configs import GEN_AI_TEMPERATURE +from danswer.configs.model_configs import LITELLM_EXTRA_HEADERS +from danswer.db.engine import get_session_context_manager +from danswer.db.llm import fetch_default_provider +from danswer.db.llm import fetch_provider +from danswer.db.models import Persona from danswer.llm.chat_llm import DefaultMultiLLM -from danswer.llm.custom_llm import CustomModelServer from danswer.llm.exceptions import GenAIDisabledException -from danswer.llm.gpt_4_all import DanswerGPT4All from danswer.llm.interfaces import LLM -from danswer.llm.utils import get_gen_ai_api_key +from danswer.llm.override_models import LLMOverride + + +def get_llm_for_persona( + persona: Persona, llm_override: LLMOverride | None = None +) -> LLM: + model_provider_override = llm_override.model_provider if llm_override else None + model_version_override = llm_override.model_version if llm_override else None + temperature_override = llm_override.temperature if llm_override else None + + return get_default_llm( + model_provider_name=( + model_provider_override or persona.llm_model_provider_override + ), + model_version=(model_version_override or persona.llm_model_version_override), + temperature=temperature_override or GEN_AI_TEMPERATURE, + ) def get_default_llm( - gen_ai_model_provider: str = GEN_AI_MODEL_PROVIDER, - api_key: str | None = None, timeout: int = QA_TIMEOUT, + temperature: float = GEN_AI_TEMPERATURE, use_fast_llm: bool = False, - gen_ai_model_version_override: str | None = None, + model_provider_name: str | None = None, + model_version: str | None = None, ) -> LLM: - """A single place to fetch the configured LLM for Danswer - Also allows overriding certain LLM defaults""" if DISABLE_GENERATIVE_AI: raise GenAIDisabledException() - if gen_ai_model_version_override: - model_version = gen_ai_model_version_override - else: - model_version = ( - FAST_GEN_AI_MODEL_VERSION if use_fast_llm else GEN_AI_MODEL_VERSION - ) - if api_key is None: - api_key = get_gen_ai_api_key() + # TODO: pass this in + with get_session_context_manager() as session: + if model_provider_name is None: + llm_provider = fetch_default_provider(session) + else: + llm_provider = fetch_provider(session, model_provider_name) - if gen_ai_model_provider.lower() == "custom": - return CustomModelServer(api_key=api_key, timeout=timeout) + if not llm_provider: + raise ValueError("No default LLM provider found") - if gen_ai_model_provider.lower() == "gpt4all": - return DanswerGPT4All(model_version=model_version, timeout=timeout) - - return DefaultMultiLLM( - model_version=model_version, api_key=api_key, timeout=timeout + model_name = model_version or ( + (llm_provider.fast_default_model_name or llm_provider.default_model_name) + if use_fast_llm + else llm_provider.default_model_name + ) + if not model_name: + raise ValueError("No default model name found") + + return get_llm( + provider=llm_provider.provider, + model=model_name, + api_key=llm_provider.api_key, + api_base=llm_provider.api_base, + api_version=llm_provider.api_version, + custom_config=llm_provider.custom_config, + timeout=timeout, + temperature=temperature, + ) + + +def get_llm( + provider: str, + model: str, + api_key: str | None = None, + api_base: str | None = None, + api_version: str | None = None, + custom_config: dict[str, str] | None = None, + temperature: float = GEN_AI_TEMPERATURE, + timeout: int = QA_TIMEOUT, +) -> LLM: + return DefaultMultiLLM( + model_provider=provider, + model_name=model, + api_key=api_key, + api_base=api_base, + api_version=api_version, + timeout=timeout, + temperature=temperature, + custom_config=custom_config, + extra_headers=LITELLM_EXTRA_HEADERS, ) diff --git a/backend/danswer/llm/gpt_4_all.py b/backend/danswer/llm/gpt_4_all.py deleted file mode 100644 index d2307eb78..000000000 --- a/backend/danswer/llm/gpt_4_all.py +++ /dev/null @@ -1,75 +0,0 @@ -from collections.abc import Iterator -from typing import Any - -from langchain.schema.language_model import LanguageModelInput - -from danswer.configs.model_configs import GEN_AI_MAX_OUTPUT_TOKENS -from danswer.configs.model_configs import GEN_AI_MODEL_VERSION -from danswer.configs.model_configs import GEN_AI_TEMPERATURE -from danswer.llm.interfaces import LLM -from danswer.llm.utils import convert_lm_input_to_basic_string -from danswer.utils.logger import setup_logger - - -logger = setup_logger() - - -class DummyGPT4All: - """In the case of import failure due to architectural incompatibilities, - this module does not raise exceptions during server startup, - as long as the module isn't actually used""" - - def __init__(self, *args: Any, **kwargs: Any) -> None: - raise RuntimeError("GPT4All library not installed.") - - -try: - from gpt4all import GPT4All # type:ignore -except ImportError: - # Setting a low log level because users get scared when they see this - logger.debug( - "GPT4All library not installed. " - "If you wish to run GPT4ALL (in memory) to power Danswer's " - "Generative AI features, please install gpt4all==2.0.2." - ) - GPT4All = DummyGPT4All - - -class DanswerGPT4All(LLM): - """Option to run an LLM locally, however this is significantly slower and - answers tend to be much worse""" - - @property - def requires_warm_up(self) -> bool: - """GPT4All models are lazy loaded, load them on server start so that the - first inference isn't extremely delayed""" - return True - - @property - def requires_api_key(self) -> bool: - return False - - def __init__( - self, - timeout: int, - model_version: str = GEN_AI_MODEL_VERSION, - max_output_tokens: int = GEN_AI_MAX_OUTPUT_TOKENS, - temperature: float = GEN_AI_TEMPERATURE, - ): - self.timeout = timeout - self.max_output_tokens = max_output_tokens - self.temperature = temperature - self.gpt4all_model = GPT4All(model_version) - - def log_model_configs(self) -> None: - logger.debug( - f"GPT4All Model: {self.gpt4all_model}, Temperature: {self.temperature}" - ) - - def invoke(self, prompt: LanguageModelInput) -> str: - prompt_basic = convert_lm_input_to_basic_string(prompt) - return self.gpt4all_model.generate(prompt_basic) - - def stream(self, prompt: LanguageModelInput) -> Iterator[str]: - prompt_basic = convert_lm_input_to_basic_string(prompt) - return self.gpt4all_model.generate(prompt_basic, streaming=True) diff --git a/backend/danswer/llm/interfaces.py b/backend/danswer/llm/interfaces.py index 41fe428bb..1f99383fa 100644 --- a/backend/danswer/llm/interfaces.py +++ b/backend/danswer/llm/interfaces.py @@ -1,13 +1,25 @@ import abc from collections.abc import Iterator +from typing import Literal from langchain.schema.language_model import LanguageModelInput +from langchain_core.messages import BaseMessage +from pydantic import BaseModel from danswer.utils.logger import setup_logger logger = setup_logger() +ToolChoiceOptions = Literal["required"] | Literal["auto"] | Literal["none"] + + +class LLMConfig(BaseModel): + model_provider: str + model_name: str + temperature: float + api_key: str | None + class LLM(abc.ABC): """Mimics the LangChain LLM / BaseChatModel interfaces to make it easy @@ -22,14 +34,29 @@ class LLM(abc.ABC): def requires_api_key(self) -> bool: return True + @property + @abc.abstractmethod + def config(self) -> LLMConfig: + raise NotImplementedError + @abc.abstractmethod def log_model_configs(self) -> None: raise NotImplementedError @abc.abstractmethod - def invoke(self, prompt: LanguageModelInput) -> str: + def invoke( + self, + prompt: LanguageModelInput, + tools: list[dict] | None = None, + tool_choice: ToolChoiceOptions | None = None, + ) -> BaseMessage: raise NotImplementedError @abc.abstractmethod - def stream(self, prompt: LanguageModelInput) -> Iterator[str]: + def stream( + self, + prompt: LanguageModelInput, + tools: list[dict] | None = None, + tool_choice: ToolChoiceOptions | None = None, + ) -> Iterator[BaseMessage]: raise NotImplementedError diff --git a/backend/danswer/llm/llm_initialization.py b/backend/danswer/llm/llm_initialization.py new file mode 100644 index 000000000..5c6f8bdbe --- /dev/null +++ b/backend/danswer/llm/llm_initialization.py @@ -0,0 +1,78 @@ +from sqlalchemy.orm import Session + +from danswer.configs.app_configs import DISABLE_GENERATIVE_AI +from danswer.configs.model_configs import FAST_GEN_AI_MODEL_VERSION +from danswer.configs.model_configs import GEN_AI_API_ENDPOINT +from danswer.configs.model_configs import GEN_AI_API_KEY +from danswer.configs.model_configs import GEN_AI_API_VERSION +from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER +from danswer.configs.model_configs import GEN_AI_MODEL_VERSION +from danswer.db.llm import fetch_existing_llm_providers +from danswer.db.llm import update_default_provider +from danswer.db.llm import upsert_llm_provider +from danswer.llm.llm_provider_options import AZURE_PROVIDER_NAME +from danswer.llm.llm_provider_options import BEDROCK_PROVIDER_NAME +from danswer.llm.llm_provider_options import fetch_available_well_known_llms +from danswer.server.manage.llm.models import LLMProviderUpsertRequest +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + + +def load_llm_providers(db_session: Session) -> None: + existing_providers = fetch_existing_llm_providers(db_session) + if existing_providers: + return + + if not GEN_AI_API_KEY or DISABLE_GENERATIVE_AI: + return + + well_known_provider_name_to_provider = { + provider.name: provider + for provider in fetch_available_well_known_llms() + if provider.name != BEDROCK_PROVIDER_NAME + } + + if GEN_AI_MODEL_PROVIDER not in well_known_provider_name_to_provider: + logger.error(f"Cannot auto-transition LLM provider: {GEN_AI_MODEL_PROVIDER}") + return None + + # Azure provider requires custom model names, + # OpenAI / anthropic can just use the defaults + model_names = ( + [ + name + for name in [ + GEN_AI_MODEL_VERSION, + FAST_GEN_AI_MODEL_VERSION, + ] + if name + ] + if GEN_AI_MODEL_PROVIDER == AZURE_PROVIDER_NAME + else None + ) + + well_known_provider = well_known_provider_name_to_provider[GEN_AI_MODEL_PROVIDER] + llm_provider_request = LLMProviderUpsertRequest( + name=well_known_provider.display_name, + provider=GEN_AI_MODEL_PROVIDER, + api_key=GEN_AI_API_KEY, + api_base=GEN_AI_API_ENDPOINT, + api_version=GEN_AI_API_VERSION, + custom_config={}, + default_model_name=( + GEN_AI_MODEL_VERSION + or well_known_provider.default_model + or well_known_provider.llm_names[0] + ), + fast_default_model_name=( + FAST_GEN_AI_MODEL_VERSION or well_known_provider.default_fast_model + ), + model_names=model_names, + ) + llm_provider = upsert_llm_provider(db_session, llm_provider_request) + update_default_provider(db_session, llm_provider.id) + logger.info( + f"Migrated LLM provider from env variables for provider '{GEN_AI_MODEL_PROVIDER}'" + ) diff --git a/backend/danswer/llm/llm_provider_options.py b/backend/danswer/llm/llm_provider_options.py new file mode 100644 index 000000000..3b2c62c6c --- /dev/null +++ b/backend/danswer/llm/llm_provider_options.py @@ -0,0 +1,128 @@ +import litellm # type: ignore +from pydantic import BaseModel + + +class CustomConfigKey(BaseModel): + name: str + description: str | None = None + is_required: bool = True + is_secret: bool = False + + +class WellKnownLLMProviderDescriptor(BaseModel): + name: str + display_name: str + api_key_required: bool + api_base_required: bool + api_version_required: bool + custom_config_keys: list[CustomConfigKey] | None = None + + llm_names: list[str] + default_model: str | None = None + default_fast_model: str | None = None + + +OPENAI_PROVIDER_NAME = "openai" +OPEN_AI_MODEL_NAMES = [ + "gpt-4", + "gpt-4o", + "gpt-4-turbo", + "gpt-4-turbo-preview", + "gpt-4-1106-preview", + "gpt-4-vision-preview", + "gpt-4-32k", + "gpt-4-0613", + "gpt-4-32k-0613", + "gpt-4-0314", + "gpt-4-32k-0314", + "gpt-3.5-turbo", + "gpt-3.5-turbo-0125", + "gpt-3.5-turbo-1106", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-16k-0613", + "gpt-3.5-turbo-0301", +] + +BEDROCK_PROVIDER_NAME = "bedrock" +# need to remove all the weird "bedrock/eu-central-1/anthropic.claude-v1" named +# models +BEDROCK_MODEL_NAMES = [model for model in litellm.bedrock_models if "/" not in model][ + ::-1 +] + +ANTHROPIC_PROVIDER_NAME = "anthropic" +ANTHROPIC_MODEL_NAMES = [model for model in litellm.anthropic_models][::-1] + +AZURE_PROVIDER_NAME = "azure" + + +_PROVIDER_TO_MODELS_MAP = { + OPENAI_PROVIDER_NAME: OPEN_AI_MODEL_NAMES, + BEDROCK_PROVIDER_NAME: BEDROCK_MODEL_NAMES, + ANTHROPIC_PROVIDER_NAME: ANTHROPIC_MODEL_NAMES, +} + + +def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]: + return [ + WellKnownLLMProviderDescriptor( + name="openai", + display_name="OpenAI", + api_key_required=True, + api_base_required=False, + api_version_required=False, + custom_config_keys=[], + llm_names=fetch_models_for_provider("openai"), + default_model="gpt-4", + default_fast_model="gpt-3.5-turbo", + ), + WellKnownLLMProviderDescriptor( + name=ANTHROPIC_PROVIDER_NAME, + display_name="Anthropic", + api_key_required=True, + api_base_required=False, + api_version_required=False, + custom_config_keys=[], + llm_names=fetch_models_for_provider(ANTHROPIC_PROVIDER_NAME), + default_model="claude-3-opus-20240229", + default_fast_model="claude-3-sonnet-20240229", + ), + WellKnownLLMProviderDescriptor( + name=AZURE_PROVIDER_NAME, + display_name="Azure OpenAI", + api_key_required=True, + api_base_required=True, + api_version_required=True, + custom_config_keys=[], + llm_names=fetch_models_for_provider(AZURE_PROVIDER_NAME), + ), + WellKnownLLMProviderDescriptor( + name=BEDROCK_PROVIDER_NAME, + display_name="AWS Bedrock", + api_key_required=False, + api_base_required=False, + api_version_required=False, + custom_config_keys=[ + CustomConfigKey(name="AWS_REGION_NAME"), + CustomConfigKey( + name="AWS_ACCESS_KEY_ID", + is_required=False, + description="If using AWS IAM roles, AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY can be left blank.", + ), + CustomConfigKey( + name="AWS_SECRET_ACCESS_KEY", + is_required=False, + is_secret=True, + description="If using AWS IAM roles, AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY can be left blank.", + ), + ], + llm_names=fetch_models_for_provider(BEDROCK_PROVIDER_NAME), + default_model="anthropic.claude-3-sonnet-20240229-v1:0", + default_fast_model="anthropic.claude-3-haiku-20240307-v1:0", + ), + ] + + +def fetch_models_for_provider(provider_name: str) -> list[str]: + return _PROVIDER_TO_MODELS_MAP.get(provider_name, []) diff --git a/backend/danswer/llm/override_models.py b/backend/danswer/llm/override_models.py new file mode 100644 index 000000000..1ecb3192f --- /dev/null +++ b/backend/danswer/llm/override_models.py @@ -0,0 +1,17 @@ +"""Overrides sent over the wire / stored in the DB + +NOTE: these models are used in many places, so have to be +kepy in a separate file to avoid circular imports. +""" +from pydantic import BaseModel + + +class LLMOverride(BaseModel): + model_provider: str | None = None + model_version: str | None = None + temperature: float | None = None + + +class PromptOverride(BaseModel): + system_prompt: str | None = None + task_prompt: str | None = None diff --git a/backend/danswer/llm/utils.py b/backend/danswer/llm/utils.py index 379ccfbae..a526adddc 100644 --- a/backend/danswer/llm/utils.py +++ b/backend/danswer/llm/utils.py @@ -3,7 +3,10 @@ from collections.abc import Iterator from copy import copy from typing import Any from typing import cast +from typing import TYPE_CHECKING +from typing import Union +import litellm # type: ignore import tiktoken from langchain.prompts.base import StringPromptValue from langchain.prompts.chat import ChatPromptValue @@ -11,27 +14,26 @@ from langchain.schema import PromptValue from langchain.schema.language_model import LanguageModelInput from langchain.schema.messages import AIMessage from langchain.schema.messages import BaseMessage -from langchain.schema.messages import BaseMessageChunk from langchain.schema.messages import HumanMessage from langchain.schema.messages import SystemMessage -from litellm import get_max_tokens # type: ignore from tiktoken.core import Encoding -from danswer.configs.app_configs import LOG_LEVEL -from danswer.configs.constants import GEN_AI_API_KEY_STORAGE_KEY from danswer.configs.constants import MessageType from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE -from danswer.configs.model_configs import GEN_AI_API_KEY from danswer.configs.model_configs import GEN_AI_MAX_OUTPUT_TOKENS from danswer.configs.model_configs import GEN_AI_MAX_TOKENS from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER -from danswer.configs.model_configs import GEN_AI_MODEL_VERSION from danswer.db.models import ChatMessage -from danswer.dynamic_configs import get_dynamic_config_store -from danswer.dynamic_configs.interface import ConfigNotFoundError -from danswer.indexing.models import InferenceChunk +from danswer.file_store.models import ChatFileType +from danswer.file_store.models import InMemoryChatFile from danswer.llm.interfaces import LLM +from danswer.prompts.constants import CODE_BLOCK_PAT +from danswer.search.models import InferenceChunk from danswer.utils.logger import setup_logger +from shared_configs.configs import LOG_LEVEL + +if TYPE_CHECKING: + from danswer.llm.answering.models import PreviousMessage logger = setup_logger() @@ -83,19 +85,26 @@ def tokenizer_trim_chunks( return new_chunks -def translate_danswer_msg_to_langchain(msg: ChatMessage) -> BaseMessage: +def translate_danswer_msg_to_langchain( + msg: Union[ChatMessage, "PreviousMessage"], +) -> BaseMessage: + # If the message is a `ChatMessage`, it doesn't have the downloaded files + # attached. Just ignore them for now + files = [] if isinstance(msg, ChatMessage) else msg.files + content = build_content_with_imgs(msg.message, files) + if msg.message_type == MessageType.SYSTEM: raise ValueError("System messages are not currently part of history") if msg.message_type == MessageType.ASSISTANT: - return AIMessage(content=msg.message) + return AIMessage(content=content) if msg.message_type == MessageType.USER: - return HumanMessage(content=msg.message) + return HumanMessage(content=content) raise ValueError(f"New message type {msg.message_type} not handled") def translate_history_to_basemessages( - history: list[ChatMessage], + history: list[ChatMessage] | list["PreviousMessage"], ) -> tuple[list[BaseMessage], list[int]]: history_basemessages = [ translate_danswer_msg_to_langchain(msg) @@ -106,6 +115,74 @@ def translate_history_to_basemessages( return history_basemessages, history_token_counts +def _build_content( + message: str, + files: list[InMemoryChatFile] | None = None, +) -> str: + """Applies all non-image files.""" + text_files = ( + [file for file in files if file.file_type == ChatFileType.PLAIN_TEXT] + if files + else None + ) + if not text_files: + return message + + final_message_with_files = "FILES:\n\n" + for file in text_files: + file_content = file.content.decode("utf-8") + file_name_section = f"DOCUMENT: {file.filename}\n" if file.filename else "" + final_message_with_files += ( + f"{file_name_section}{CODE_BLOCK_PAT.format(file_content.strip())}\n\n\n" + ) + final_message_with_files += message + + return final_message_with_files + + +def build_content_with_imgs( + message: str, + files: list[InMemoryChatFile] | None = None, + img_urls: list[str] | None = None, +) -> str | list[str | dict[str, Any]]: # matching Langchain's BaseMessage content type + files = files or [] + img_files = [file for file in files if file.file_type == ChatFileType.IMAGE] + img_urls = img_urls or [] + message_main_content = _build_content(message, files) + + if not img_files and not img_urls: + return message_main_content + + return cast( + list[str | dict[str, Any]], + [ + { + "type": "text", + "text": message_main_content, + }, + ] + + [ + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{file.to_base64()}", + }, + } + for file in files + if file.file_type == "image" + ] + + [ + { + "type": "image_url", + "image_url": { + "url": url, + }, + } + for url in img_urls + ], + ) + + def dict_based_prompt_to_langchain_prompt( messages: list[dict[str, str]] ) -> list[BaseMessage]: @@ -153,20 +230,50 @@ def convert_lm_input_to_basic_string(lm_input: LanguageModelInput) -> str: return prompt_value.to_string() +def message_to_string(message: BaseMessage) -> str: + if not isinstance(message.content, str): + raise RuntimeError("LLM message not in expected format.") + + return message.content + + def message_generator_to_string_generator( - messages: Iterator[BaseMessageChunk], + messages: Iterator[BaseMessage], ) -> Iterator[str]: for message in messages: - if not isinstance(message.content, str): - raise RuntimeError("LLM message not in expected format.") - - yield message.content + yield message_to_string(message) def should_be_verbose() -> bool: return LOG_LEVEL == "debug" +# estimate of the number of tokens in an image url +# is correct when downsampling is used. Is very wrong when OpenAI does not downsample +# TODO: improve this +_IMG_TOKENS = 85 + + +def check_message_tokens( + message: BaseMessage, encode_fn: Callable[[str], list] | None = None +) -> int: + if isinstance(message.content, str): + return check_number_of_tokens(message.content, encode_fn) + + total_tokens = 0 + for part in message.content: + if isinstance(part, str): + total_tokens += check_number_of_tokens(part, encode_fn) + continue + + if part["type"] == "text": + total_tokens += check_number_of_tokens(part["text"], encode_fn) + elif part["type"] == "image_url": + total_tokens += _IMG_TOKENS + + return total_tokens + + def check_number_of_tokens( text: str, encode_fn: Callable[[str], list] | None = None ) -> int: @@ -181,31 +288,23 @@ def check_number_of_tokens( return len(encode_fn(text)) -def get_gen_ai_api_key() -> str | None: - # first check if the key has been provided by the UI - try: - return cast(str, get_dynamic_config_store().load(GEN_AI_API_KEY_STORAGE_KEY)) - except ConfigNotFoundError: - pass - - # if not provided by the UI, fallback to the env variable - return GEN_AI_API_KEY - - -def test_llm(llm: LLM) -> bool: +def test_llm(llm: LLM) -> str | None: # try for up to 2 timeouts (e.g. 10 seconds in total) + error_msg = None for _ in range(2): try: llm.invoke("Do not respond") - return True + return None except Exception as e: - logger.warning(f"GenAI API key failed for the following reason: {e}") + error_msg = str(e) + logger.warning(f"Failed to call LLM with the following error: {error_msg}") - return False + return error_msg def get_llm_max_tokens( - model_name: str | None = GEN_AI_MODEL_VERSION, + model_map: dict, + model_name: str, model_provider: str = GEN_AI_MODEL_PROVIDER, ) -> int: """Best effort attempt to get the max tokens for the LLM""" @@ -213,24 +312,44 @@ def get_llm_max_tokens( # This is an override, so always return this return GEN_AI_MAX_TOKENS - if not model_name: - return 4096 - try: - if model_provider == "openai": - return get_max_tokens(model_name) - return get_max_tokens("/".join([model_provider, model_name])) + model_obj = model_map.get(f"{model_provider}/{model_name}") + if not model_obj: + model_obj = model_map[model_name] + + if "max_input_tokens" in model_obj: + return model_obj["max_input_tokens"] + + if "max_tokens" in model_obj: + return model_obj["max_tokens"] + + raise RuntimeError("No max tokens found for LLM") except Exception: + logger.exception( + f"Failed to get max tokens for LLM with name {model_name}. Defaulting to 4096." + ) return 4096 def get_max_input_tokens( - model_name: str | None = GEN_AI_MODEL_VERSION, - model_provider: str = GEN_AI_MODEL_PROVIDER, + model_name: str, + model_provider: str, output_tokens: int = GEN_AI_MAX_OUTPUT_TOKENS, ) -> int: + # NOTE: we previously used `litellm.get_max_tokens()`, but despite the name, this actually + # returns the max OUTPUT tokens. Under the hood, this uses the `litellm.model_cost` dict, + # and there is no other interface to get what we want. This should be okay though, since the + # `model_cost` dict is a named public interface: + # https://litellm.vercel.app/docs/completion/token_usage#7-model_cost + # model_map is litellm.model_cost + litellm_model_map = litellm.model_cost + input_toks = ( - get_llm_max_tokens(model_name=model_name, model_provider=model_provider) + get_llm_max_tokens( + model_name=model_name, + model_provider=model_provider, + model_map=litellm_model_map, + ) - output_tokens ) diff --git a/backend/danswer/main.py b/backend/danswer/main.py index 6268263ae..1e1460beb 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -1,8 +1,9 @@ +import time +from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager from typing import Any from typing import cast -import nltk # type:ignore -import torch import uvicorn from fastapi import APIRouter from fastapi import FastAPI @@ -25,29 +26,32 @@ from danswer.configs.app_configs import APP_HOST from danswer.configs.app_configs import APP_PORT from danswer.configs.app_configs import AUTH_TYPE from danswer.configs.app_configs import DISABLE_GENERATIVE_AI -from danswer.configs.app_configs import MODEL_SERVER_HOST -from danswer.configs.app_configs import MODEL_SERVER_PORT +from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP +from danswer.configs.app_configs import LOG_ENDPOINT_LATENCY from danswer.configs.app_configs import OAUTH_CLIENT_ID from danswer.configs.app_configs import OAUTH_CLIENT_SECRET -from danswer.configs.app_configs import SECRET +from danswer.configs.app_configs import USER_AUTH_SECRET from danswer.configs.app_configs import WEB_DOMAIN from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION from danswer.configs.constants import AuthType -from danswer.configs.model_configs import ENABLE_RERANKING_REAL_TIME_FLOW -from danswer.configs.model_configs import FAST_GEN_AI_MODEL_VERSION -from danswer.configs.model_configs import GEN_AI_API_ENDPOINT -from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER -from danswer.configs.model_configs import GEN_AI_MODEL_VERSION +from danswer.db.chat import delete_old_default_personas from danswer.db.connector import create_initial_default_connector from danswer.db.connector_credential_pair import associate_default_cc_pair +from danswer.db.connector_credential_pair import get_connector_credential_pairs +from danswer.db.connector_credential_pair import resync_cc_pair from danswer.db.credentials import create_initial_public_credential from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.embedding_model import get_secondary_db_embedding_model from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import warm_up_connections from danswer.db.index_attempt import cancel_indexing_attempts_past_model +from danswer.db.index_attempt import expire_index_attempts +from danswer.db.swap_index import check_index_swap from danswer.document_index.factory import get_default_document_index -from danswer.llm.factory import get_default_llm -from danswer.search.search_nlp_models import warm_up_models +from danswer.llm.llm_initialization import load_llm_providers +from danswer.search.retrieval.search_runner import download_nltk_data +from danswer.search.search_nlp_models import warm_up_encoders +from danswer.server.auth_check import check_router_auth from danswer.server.danswer_api.ingestion import get_danswer_api_key from danswer.server.danswer_api.ingestion import router as danswer_api_router from danswer.server.documents.cc_pair import router as cc_pair_router @@ -55,39 +59,60 @@ from danswer.server.documents.connector import router as connector_router from danswer.server.documents.credential import router as credential_router from danswer.server.documents.document import router as document_router from danswer.server.features.document_set.api import router as document_set_router +from danswer.server.features.folder.api import router as folder_router from danswer.server.features.persona.api import admin_router as admin_persona_router from danswer.server.features.persona.api import basic_router as persona_router from danswer.server.features.prompt.api import basic_router as prompt_router +from danswer.server.features.tool.api import router as tool_router from danswer.server.gpts.api import router as gpts_router from danswer.server.manage.administrative import router as admin_router from danswer.server.manage.get_state import router as state_router +from danswer.server.manage.llm.api import admin_router as llm_admin_router +from danswer.server.manage.llm.api import basic_router as llm_router from danswer.server.manage.secondary_index import router as secondary_index_router from danswer.server.manage.slack_bot import router as slack_bot_management_router from danswer.server.manage.users import router as user_router +from danswer.server.middleware.latency_logging import add_latency_logging_middleware from danswer.server.query_and_chat.chat_backend import router as chat_router from danswer.server.query_and_chat.query_backend import ( admin_router as admin_query_router, ) from danswer.server.query_and_chat.query_backend import basic_router as query_router +from danswer.server.settings.api import admin_router as settings_admin_router +from danswer.server.settings.api import basic_router as settings_router +from danswer.tools.built_in_tools import auto_add_search_tool_to_personas +from danswer.tools.built_in_tools import load_builtin_tools +from danswer.tools.built_in_tools import refresh_built_in_tools_cache from danswer.utils.logger import setup_logger from danswer.utils.telemetry import optional_telemetry from danswer.utils.telemetry import RecordType from danswer.utils.variable_functionality import fetch_versioned_implementation +from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW +from shared_configs.configs import MODEL_SERVER_HOST +from shared_configs.configs import MODEL_SERVER_PORT logger = setup_logger() -def validation_exception_handler( - request: Request, exc: RequestValidationError -) -> JSONResponse: +def validation_exception_handler(request: Request, exc: Exception) -> JSONResponse: + if not isinstance(exc, RequestValidationError): + logger.error( + f"Unexpected exception type in validation_exception_handler - {type(exc)}" + ) + raise exc + exc_str = f"{exc}".replace("\n", " ").replace(" ", " ") logger.exception(f"{request}: {exc_str}") content = {"status_code": 422, "message": exc_str, "data": None} return JSONResponse(content=content, status_code=422) -def value_error_handler(_: Request, exc: ValueError) -> JSONResponse: +def value_error_handler(_: Request, exc: Exception) -> JSONResponse: + if not isinstance(exc, ValueError): + logger.error(f"Unexpected exception type in value_error_handler - {type(exc)}") + raise exc + try: raise (exc) except Exception: @@ -118,8 +143,120 @@ def include_router_with_global_prefix_prepended( application.include_router(router, **final_kwargs) +@asynccontextmanager +async def lifespan(app: FastAPI) -> AsyncGenerator: + engine = get_sqlalchemy_engine() + + verify_auth = fetch_versioned_implementation( + "danswer.auth.users", "verify_auth_setting" + ) + # Will throw exception if an issue is found + verify_auth() + + # Danswer APIs key + api_key = get_danswer_api_key() + logger.info(f"Danswer API Key: {api_key}") + + if OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET: + logger.info("Both OAuth Client ID and Secret are configured.") + + if DISABLE_GENERATIVE_AI: + logger.info("Generative AI Q&A disabled") + + if MULTILINGUAL_QUERY_EXPANSION: + logger.info( + f"Using multilingual flow with languages: {MULTILINGUAL_QUERY_EXPANSION}" + ) + + # fill up Postgres connection pools + await warm_up_connections() + + with Session(engine) as db_session: + check_index_swap(db_session=db_session) + db_embedding_model = get_current_db_embedding_model(db_session) + secondary_db_embedding_model = get_secondary_db_embedding_model(db_session) + + # Break bad state for thrashing indexes + if secondary_db_embedding_model and DISABLE_INDEX_UPDATE_ON_SWAP: + expire_index_attempts( + embedding_model_id=db_embedding_model.id, db_session=db_session + ) + + for cc_pair in get_connector_credential_pairs(db_session): + resync_cc_pair(cc_pair, db_session=db_session) + + # Expire all old embedding models indexing attempts, technically redundant + cancel_indexing_attempts_past_model(db_session) + + logger.info(f'Using Embedding model: "{db_embedding_model.model_name}"') + if db_embedding_model.query_prefix or db_embedding_model.passage_prefix: + logger.info(f'Query embedding prefix: "{db_embedding_model.query_prefix}"') + logger.info( + f'Passage embedding prefix: "{db_embedding_model.passage_prefix}"' + ) + + if ENABLE_RERANKING_REAL_TIME_FLOW: + logger.info("Reranking step of search flow is enabled.") + + logger.info("Verifying query preprocessing (NLTK) data is downloaded") + download_nltk_data() + + logger.info("Verifying default connector/credential exist.") + create_initial_public_credential(db_session) + create_initial_default_connector(db_session) + associate_default_cc_pair(db_session) + + logger.info("Loading LLM providers from env variables") + load_llm_providers(db_session) + + logger.info("Loading default Prompts and Personas") + delete_old_default_personas(db_session) + load_chat_yamls() + + logger.info("Loading built-in tools") + load_builtin_tools(db_session) + refresh_built_in_tools_cache(db_session) + auto_add_search_tool_to_personas(db_session) + + logger.info("Verifying Document Index(s) is/are available.") + document_index = get_default_document_index( + primary_index_name=db_embedding_model.index_name, + secondary_index_name=secondary_db_embedding_model.index_name + if secondary_db_embedding_model + else None, + ) + # Vespa startup is a bit slow, so give it a few seconds + wait_time = 5 + for attempt in range(5): + try: + document_index.ensure_indices_exist( + index_embedding_dim=db_embedding_model.model_dim, + secondary_index_embedding_dim=secondary_db_embedding_model.model_dim + if secondary_db_embedding_model + else None, + ) + break + except Exception: + logger.info(f"Waiting on Vespa, retrying in {wait_time} seconds...") + time.sleep(wait_time) + + logger.info(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}") + warm_up_encoders( + model_name=db_embedding_model.model_name, + normalize=db_embedding_model.normalize, + model_server_host=MODEL_SERVER_HOST, + model_server_port=MODEL_SERVER_PORT, + ) + + optional_telemetry(record_type=RecordType.VERSION, data={"version": __version__}) + + yield + + def get_application() -> FastAPI: - application = FastAPI(title="Danswer Backend", version=__version__) + application = FastAPI( + title="Danswer Backend", version=__version__, lifespan=lifespan + ) include_router_with_global_prefix_prepended(application, chat_router) include_router_with_global_prefix_prepended(application, query_router) @@ -130,6 +267,7 @@ def get_application() -> FastAPI: include_router_with_global_prefix_prepended(application, connector_router) include_router_with_global_prefix_prepended(application, credential_router) include_router_with_global_prefix_prepended(application, cc_pair_router) + include_router_with_global_prefix_prepended(application, folder_router) include_router_with_global_prefix_prepended(application, document_set_router) include_router_with_global_prefix_prepended(application, secondary_index_router) include_router_with_global_prefix_prepended( @@ -138,9 +276,14 @@ def get_application() -> FastAPI: include_router_with_global_prefix_prepended(application, persona_router) include_router_with_global_prefix_prepended(application, admin_persona_router) include_router_with_global_prefix_prepended(application, prompt_router) + include_router_with_global_prefix_prepended(application, tool_router) include_router_with_global_prefix_prepended(application, state_router) include_router_with_global_prefix_prepended(application, danswer_api_router) include_router_with_global_prefix_prepended(application, gpts_router) + include_router_with_global_prefix_prepended(application, settings_router) + include_router_with_global_prefix_prepended(application, settings_admin_router) + include_router_with_global_prefix_prepended(application, llm_admin_router) + include_router_with_global_prefix_prepended(application, llm_router) if AUTH_TYPE == AuthType.DISABLED: # Server logs this during auth setup verification step @@ -185,7 +328,7 @@ def get_application() -> FastAPI: fastapi_users.get_oauth_router( oauth_client, auth_backend, - SECRET, + USER_AUTH_SECRET, associate_by_email=True, is_verified_by_default=True, # Points the user back to the login page @@ -208,111 +351,6 @@ def get_application() -> FastAPI: application.add_exception_handler(ValueError, value_error_handler) - @application.on_event("startup") - def startup_event() -> None: - engine = get_sqlalchemy_engine() - - verify_auth = fetch_versioned_implementation( - "danswer.auth.users", "verify_auth_setting" - ) - # Will throw exception if an issue is found - verify_auth() - - # Danswer APIs key - api_key = get_danswer_api_key() - logger.info(f"Danswer API Key: {api_key}") - - if OAUTH_CLIENT_ID and OAUTH_CLIENT_SECRET: - logger.info("Both OAuth Client ID and Secret are configured.") - - if DISABLE_GENERATIVE_AI: - logger.info("Generative AI Q&A disabled") - else: - logger.info(f"Using LLM Provider: {GEN_AI_MODEL_PROVIDER}") - logger.info(f"Using LLM Model Version: {GEN_AI_MODEL_VERSION}") - if GEN_AI_MODEL_VERSION != FAST_GEN_AI_MODEL_VERSION: - logger.info( - f"Using Fast LLM Model Version: {FAST_GEN_AI_MODEL_VERSION}" - ) - if GEN_AI_API_ENDPOINT: - logger.info(f"Using LLM Endpoint: {GEN_AI_API_ENDPOINT}") - - # Any additional model configs logged here - get_default_llm().log_model_configs() - - if MULTILINGUAL_QUERY_EXPANSION: - logger.info( - f"Using multilingual flow with languages: {MULTILINGUAL_QUERY_EXPANSION}" - ) - - with Session(engine) as db_session: - db_embedding_model = get_current_db_embedding_model(db_session) - secondary_db_embedding_model = get_secondary_db_embedding_model(db_session) - - cancel_indexing_attempts_past_model(db_session) - - logger.info(f'Using Embedding model: "{db_embedding_model.model_name}"') - if db_embedding_model.query_prefix or db_embedding_model.passage_prefix: - logger.info( - f'Query embedding prefix: "{db_embedding_model.query_prefix}"' - ) - logger.info( - f'Passage embedding prefix: "{db_embedding_model.passage_prefix}"' - ) - - if ENABLE_RERANKING_REAL_TIME_FLOW: - logger.info("Reranking step of search flow is enabled.") - - if MODEL_SERVER_HOST: - logger.info( - f"Using Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}" - ) - else: - logger.info("Warming up local NLP models.") - warm_up_models( - model_name=db_embedding_model.model_name, - normalize=db_embedding_model.normalize, - skip_cross_encoders=not ENABLE_RERANKING_REAL_TIME_FLOW, - ) - - if torch.cuda.is_available(): - logger.info("GPU is available") - else: - logger.info("GPU is not available") - logger.info(f"Torch Threads: {torch.get_num_threads()}") - - logger.info("Verifying query preprocessing (NLTK) data is downloaded") - nltk.download("stopwords", quiet=True) - nltk.download("wordnet", quiet=True) - nltk.download("punkt", quiet=True) - - logger.info("Verifying default connector/credential exist.") - create_initial_public_credential(db_session) - create_initial_default_connector(db_session) - associate_default_cc_pair(db_session) - - logger.info("Loading default Prompts and Personas") - load_chat_yamls() - - logger.info("Verifying Document Index(s) is/are available.") - - document_index = get_default_document_index( - primary_index_name=db_embedding_model.index_name, - secondary_index_name=secondary_db_embedding_model.index_name - if secondary_db_embedding_model - else None, - ) - document_index.ensure_indices_exist( - index_embedding_dim=db_embedding_model.model_dim, - secondary_index_embedding_dim=secondary_db_embedding_model.model_dim - if secondary_db_embedding_model - else None, - ) - - optional_telemetry( - record_type=RecordType.VERSION, data={"version": __version__} - ) - application.add_middleware( CORSMiddleware, allow_origins=["*"], # Change this to the list of allowed origins if needed @@ -320,6 +358,11 @@ def get_application() -> FastAPI: allow_methods=["*"], allow_headers=["*"], ) + if LOG_ENDPOINT_LATENCY: + add_latency_logging_middleware(application, logger) + + # Ensure all routes have auth enabled or are explicitly marked as public + check_router_auth(application) return application diff --git a/backend/danswer/one_shot_answer/answer_question.py b/backend/danswer/one_shot_answer/answer_question.py index ff4f2cc00..8d40f9aba 100644 --- a/backend/danswer/one_shot_answer/answer_question.py +++ b/backend/danswer/one_shot_answer/answer_question.py @@ -1,56 +1,73 @@ -import itertools from collections.abc import Callable from collections.abc import Iterator from typing import cast from sqlalchemy.orm import Session -from danswer.chat.chat_utils import compute_max_document_tokens -from danswer.chat.chat_utils import get_chunks_for_qa +from danswer.chat.chat_utils import reorganize_citations +from danswer.chat.models import CitationInfo from danswer.chat.models import DanswerAnswerPiece -from danswer.chat.models import DanswerContext from danswer.chat.models import DanswerContexts from danswer.chat.models import DanswerQuotes -from danswer.chat.models import LLMMetricsContainer from danswer.chat.models import LLMRelevanceFilterResponse from danswer.chat.models import QADocsResponse from danswer.chat.models import StreamingError from danswer.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT from danswer.configs.chat_configs import QA_TIMEOUT from danswer.configs.constants import MessageType -from danswer.configs.model_configs import CHUNK_SIZE from danswer.db.chat import create_chat_session +from danswer.db.chat import create_db_search_doc from danswer.db.chat import create_new_chat_message from danswer.db.chat import get_or_create_root_message -from danswer.db.chat import get_persona_by_id from danswer.db.chat import get_prompt_by_id from danswer.db.chat import translate_db_message_to_chat_message_detail -from danswer.db.embedding_model import get_current_db_embedding_model +from danswer.db.chat import translate_db_search_doc_to_server_search_doc +from danswer.db.engine import get_session_context_manager from danswer.db.models import User -from danswer.document_index.factory import get_default_document_index -from danswer.indexing.models import InferenceChunk +from danswer.llm.answering.answer import Answer +from danswer.llm.answering.models import AnswerStyleConfig +from danswer.llm.answering.models import CitationConfig +from danswer.llm.answering.models import DocumentPruningConfig +from danswer.llm.answering.models import PromptConfig +from danswer.llm.answering.models import QuotesConfig +from danswer.llm.factory import get_llm_for_persona from danswer.llm.utils import get_default_llm_token_encode -from danswer.one_shot_answer.factory import get_question_answer_model from danswer.one_shot_answer.models import DirectQARequest from danswer.one_shot_answer.models import OneShotQAResponse from danswer.one_shot_answer.models import QueryRephrase -from danswer.one_shot_answer.qa_block import no_gen_ai_response from danswer.one_shot_answer.qa_utils import combine_message_thread from danswer.search.models import RerankMetricsContainer from danswer.search.models import RetrievalMetricsContainer -from danswer.search.models import SavedSearchDoc -from danswer.search.request_preprocessing import retrieval_preprocessing -from danswer.search.search_runner import chunks_to_search_docs -from danswer.search.search_runner import full_chunk_search_generator +from danswer.search.utils import chunks_or_sections_to_search_docs from danswer.secondary_llm_flows.answer_validation import get_answer_validity from danswer.secondary_llm_flows.query_expansion import thread_based_query_rephrase from danswer.server.query_and_chat.models import ChatMessageDetail from danswer.server.utils import get_json_line +from danswer.tools.force import ForceUseTool +from danswer.tools.search.search_tool import SEARCH_RESPONSE_SUMMARY_ID +from danswer.tools.search.search_tool import SearchResponseSummary +from danswer.tools.search.search_tool import SearchTool +from danswer.tools.search.search_tool import SECTION_RELEVANCE_LIST_ID +from danswer.tools.tool import ToolResponse +from danswer.tools.tool_runner import ToolRunKickoff from danswer.utils.logger import setup_logger from danswer.utils.timing import log_generator_function_time logger = setup_logger() +AnswerObjectIterator = Iterator[ + QueryRephrase + | QADocsResponse + | LLMRelevanceFilterResponse + | DanswerAnswerPiece + | DanswerQuotes + | DanswerContexts + | StreamingError + | ChatMessageDetail + | CitationInfo + | ToolRunKickoff +] + def stream_answer_objects( query_req: DirectQARequest, @@ -63,23 +80,14 @@ def stream_answer_objects( db_session: Session, # Needed to translate persona num_chunks to tokens to the LLM default_num_chunks: float = MAX_CHUNKS_FED_TO_CHAT, - default_chunk_size: int = CHUNK_SIZE, timeout: int = QA_TIMEOUT, bypass_acl: bool = False, + use_citations: bool = False, + danswerbot_flow: bool = False, retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None] | None = None, rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, - llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None, -) -> Iterator[ - QueryRephrase - | QADocsResponse - | LLMRelevanceFilterResponse - | DanswerAnswerPiece - | DanswerQuotes - | DanswerContexts - | StreamingError - | ChatMessageDetail -]: +) -> AnswerObjectIterator: """Streams in order: 1. [always] Retrieved documents, stops flow if nothing is found 2. [conditional] LLM selected chunk indices if LLM chunk filtering is turned on @@ -97,16 +105,11 @@ def stream_answer_objects( user_id=user_id, persona_id=query_req.persona_id, one_shot=True, + danswerbot_flow=danswerbot_flow, ) llm_tokenizer = get_default_llm_token_encode() - embedding_model = get_current_db_embedding_model(db_session) - - document_index = get_default_document_index( - primary_index_name=embedding_model.index_name, secondary_index_name=None - ) - # Create a chat session which will just store the root message, the query, and the AI response root_message = get_or_create_root_message( chat_session_id=chat_session.id, db_session=db_session @@ -120,177 +123,123 @@ def stream_answer_objects( user_query=query_msg.message, history_str=history_str, ) + # Given back ahead of the documents for latency reasons + # In chat flow it's given back along with the documents yield QueryRephrase(rephrased_query=rephrased_query) - ( - retrieval_request, - predicted_search_type, - predicted_flow, - ) = retrieval_preprocessing( - query=rephrased_query, - retrieval_details=query_req.retrieval_options, - persona=chat_session.persona, - user=user, - db_session=db_session, - bypass_acl=bypass_acl, - ) - - documents_generator = full_chunk_search_generator( - search_query=retrieval_request, - document_index=document_index, - db_session=db_session, - retrieval_metrics_callback=retrieval_metrics_callback, - rerank_metrics_callback=rerank_metrics_callback, - ) - applied_time_cutoff = retrieval_request.filters.time_cutoff - recency_bias_multiplier = retrieval_request.recency_bias_multiplier - run_llm_chunk_filter = not retrieval_request.skip_llm_chunk_filter - - # First fetch and return the top chunks so the user can immediately see some results - top_chunks = cast(list[InferenceChunk], next(documents_generator)) - - top_docs = chunks_to_search_docs(top_chunks) - fake_saved_docs = [SavedSearchDoc.from_search_doc(doc) for doc in top_docs] - - # Since this is in the one shot answer flow, we don't need to actually save the docs to DB - initial_response = QADocsResponse( - top_documents=fake_saved_docs, - predicted_flow=predicted_flow, - predicted_search=predicted_search_type, - applied_source_filters=retrieval_request.filters.source_type, - applied_time_cutoff=applied_time_cutoff, - recency_bias_multiplier=recency_bias_multiplier, - ) - yield initial_response - - # Get the final ordering of chunks for the LLM call - llm_chunk_selection = cast(list[bool], next(documents_generator)) - - # Yield the list of LLM selected chunks for showing the LLM selected icons in the UI - llm_relevance_filtering_response = LLMRelevanceFilterResponse( - relevant_chunk_indices=[ - index for index, value in enumerate(llm_chunk_selection) if value - ] - if run_llm_chunk_filter - else [] - ) - yield llm_relevance_filtering_response - - # Prep chunks to pass to LLM - num_llm_chunks = ( - chat_session.persona.num_chunks - if chat_session.persona.num_chunks is not None - else default_num_chunks - ) - - chunk_token_limit = int(num_llm_chunks * default_chunk_size) - if max_document_tokens: - chunk_token_limit = min(chunk_token_limit, max_document_tokens) - else: - max_document_tokens = compute_max_document_tokens( - persona=chat_session.persona, actual_user_input=query_msg.message - ) - chunk_token_limit = min(chunk_token_limit, max_document_tokens) - - llm_chunks_indices = get_chunks_for_qa( - chunks=top_chunks, - llm_chunk_selection=llm_chunk_selection, - token_limit=chunk_token_limit, - ) - llm_chunks = [top_chunks[i] for i in llm_chunks_indices] - - logger.debug( - f"Chunks fed to LLM: {[chunk.semantic_identifier for chunk in llm_chunks]}" - ) - prompt = None - llm_override = None if query_req.prompt_id is not None: prompt = get_prompt_by_id( - prompt_id=query_req.prompt_id, user_id=user_id, db_session=db_session + prompt_id=query_req.prompt_id, user=user, db_session=db_session ) - persona = get_persona_by_id( - persona_id=query_req.persona_id, user_id=user_id, db_session=db_session - ) - llm_override = persona.llm_model_version_override - - qa_model = get_question_answer_model( - prompt=prompt, - timeout=timeout, - chain_of_thought=query_req.chain_of_thought, - llm_version=llm_override, - ) - - full_prompt_str = ( - qa_model.build_prompt( - query=query_msg.message, history_str=history_str, context_chunks=llm_chunks - ) - if qa_model is not None - else "Gen AI Disabled" - ) + if prompt is None: + if not chat_session.persona.prompts: + raise RuntimeError( + "Persona does not have any prompts - this should never happen" + ) + prompt = chat_session.persona.prompts[0] # Create the first User query message new_user_message = create_new_chat_message( chat_session_id=chat_session.id, parent_message=root_message, prompt_id=query_req.prompt_id, - message=full_prompt_str, - token_count=len(llm_tokenizer(full_prompt_str)), + message=query_msg.message, + token_count=len(llm_tokenizer(query_msg.message)), message_type=MessageType.USER, db_session=db_session, commit=True, ) - response_packets = ( - qa_model.answer_question_stream( - prompt=full_prompt_str, - llm_context_docs=llm_chunks, - metrics_callback=llm_metrics_callback, - ) - if qa_model is not None - else no_gen_ai_response() + llm = get_llm_for_persona(persona=chat_session.persona) + prompt_config = PromptConfig.from_model(prompt) + document_pruning_config = DocumentPruningConfig( + max_chunks=int( + chat_session.persona.num_chunks + if chat_session.persona.num_chunks is not None + else default_num_chunks + ), + max_tokens=max_document_tokens, + use_sections=query_req.chunks_above > 0 or query_req.chunks_below > 0, + ) + search_tool = SearchTool( + db_session=db_session, + user=user, + persona=chat_session.persona, + retrieval_options=query_req.retrieval_options, + prompt_config=prompt_config, + llm_config=llm.config, + pruning_config=document_pruning_config, ) - if qa_model is not None and query_req.return_contexts: - contexts = DanswerContexts( - contexts=[ - DanswerContext( - content=context_doc.content, - document_id=context_doc.document_id, - semantic_identifier=context_doc.semantic_identifier, - blurb=context_doc.semantic_identifier, + answer_config = AnswerStyleConfig( + citation_config=CitationConfig() if use_citations else None, + quotes_config=QuotesConfig() if not use_citations else None, + document_pruning_config=document_pruning_config, + ) + answer = Answer( + question=query_msg.message, + answer_style_config=answer_config, + prompt_config=PromptConfig.from_model(prompt), + llm=get_llm_for_persona(persona=chat_session.persona), + single_message_history=history_str, + tools=[search_tool], + force_use_tool=ForceUseTool( + tool_name=search_tool.name(), + args={"query": rephrased_query}, + ), + # for now, don't use tool calling for this flow, as we haven't + # tested quotes with tool calling too much yet + skip_explicit_tool_calling=True, + ) + # won't be any ImageGenerationDisplay responses since that tool is never passed in + for packet in cast(AnswerObjectIterator, answer.processed_streamed_output): + # for one-shot flow, don't currently do anything with these + if isinstance(packet, ToolResponse): + if packet.id == SEARCH_RESPONSE_SUMMARY_ID: + search_response_summary = cast(SearchResponseSummary, packet.response) + + top_docs = chunks_or_sections_to_search_docs( + search_response_summary.top_sections ) - for context_doc in llm_chunks - ] - ) - response_packets = itertools.chain(response_packets, [contexts]) + reference_db_search_docs = [ + create_db_search_doc( + server_search_doc=top_doc, db_session=db_session + ) + for top_doc in top_docs + ] - # Capture outputs and errors - llm_output = "" - error: str | None = None - for packet in response_packets: - logger.debug(packet) + response_docs = [ + translate_db_search_doc_to_server_search_doc(db_search_doc) + for db_search_doc in reference_db_search_docs + ] - if isinstance(packet, DanswerAnswerPiece): - token = packet.answer_piece - if token: - llm_output += token - elif isinstance(packet, StreamingError): - error = packet.error - - yield packet + initial_response = QADocsResponse( + rephrased_query=rephrased_query, + top_documents=response_docs, + predicted_flow=search_response_summary.predicted_flow, + predicted_search=search_response_summary.predicted_search, + applied_source_filters=search_response_summary.final_filters.source_type, + applied_time_cutoff=search_response_summary.final_filters.time_cutoff, + recency_bias_multiplier=search_response_summary.recency_bias_multiplier, + ) + yield initial_response + elif packet.id == SECTION_RELEVANCE_LIST_ID: + yield LLMRelevanceFilterResponse(relevant_chunk_indices=packet.response) + else: + yield packet # Saving Gen AI answer and responding with message info gen_ai_response_message = create_new_chat_message( chat_session_id=chat_session.id, parent_message=new_user_message, prompt_id=query_req.prompt_id, - message=llm_output, - token_count=len(llm_tokenizer(llm_output)), + message=answer.llm_answer, + token_count=len(llm_tokenizer(answer.llm_answer)), message_type=MessageType.ASSISTANT, - error=error, - reference_docs=None, # Don't need to save reference docs for one shot flow + error=None, + reference_docs=reference_db_search_docs, db_session=db_session, commit=True, ) @@ -308,17 +257,17 @@ def stream_search_answer( user: User | None, max_document_tokens: int | None, max_history_tokens: int | None, - db_session: Session, ) -> Iterator[str]: - objects = stream_answer_objects( - query_req=query_req, - user=user, - max_document_tokens=max_document_tokens, - max_history_tokens=max_history_tokens, - db_session=db_session, - ) - for obj in objects: - yield get_json_line(obj.dict()) + with get_session_context_manager() as session: + objects = stream_answer_objects( + query_req=query_req, + user=user, + max_document_tokens=max_document_tokens, + max_history_tokens=max_history_tokens, + db_session=session, + ) + for obj in objects: + yield get_json_line(obj.dict()) def get_search_answer( @@ -330,10 +279,11 @@ def get_search_answer( answer_generation_timeout: int = QA_TIMEOUT, enable_reflexion: bool = False, bypass_acl: bool = False, + use_citations: bool = False, + danswerbot_flow: bool = False, retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None] | None = None, rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, - llm_metrics_callback: Callable[[LLMMetricsContainer], None] | None = None, ) -> OneShotQAResponse: """Collects the streamed one shot answer responses into a single object""" qa_response = OneShotQAResponse() @@ -345,10 +295,11 @@ def get_search_answer( max_history_tokens=max_history_tokens, db_session=db_session, bypass_acl=bypass_acl, + use_citations=use_citations, + danswerbot_flow=danswerbot_flow, timeout=answer_generation_timeout, retrieval_metrics_callback=retrieval_metrics_callback, rerank_metrics_callback=rerank_metrics_callback, - llm_metrics_callback=llm_metrics_callback, ) answer = "" @@ -363,6 +314,11 @@ def get_search_answer( qa_response.llm_chunks_indices = packet.relevant_chunk_indices elif isinstance(packet, DanswerQuotes): qa_response.quotes = packet + elif isinstance(packet, CitationInfo): + if qa_response.citations: + qa_response.citations.append(packet) + else: + qa_response.citations = [packet] elif isinstance(packet, DanswerContexts): qa_response.contexts = packet elif isinstance(packet, StreamingError): @@ -381,4 +337,10 @@ def get_search_answer( else: qa_response.answer_valid = True + if use_citations and qa_response.answer and qa_response.citations: + # Reorganize citation nums to be in the same order as the answer + qa_response.answer, qa_response.citations = reorganize_citations( + qa_response.answer, qa_response.citations + ) + return qa_response diff --git a/backend/danswer/one_shot_answer/factory.py b/backend/danswer/one_shot_answer/factory.py deleted file mode 100644 index 122ed6ac0..000000000 --- a/backend/danswer/one_shot_answer/factory.py +++ /dev/null @@ -1,48 +0,0 @@ -from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE -from danswer.configs.chat_configs import QA_TIMEOUT -from danswer.db.models import Prompt -from danswer.llm.exceptions import GenAIDisabledException -from danswer.llm.factory import get_default_llm -from danswer.one_shot_answer.interfaces import QAModel -from danswer.one_shot_answer.qa_block import QABlock -from danswer.one_shot_answer.qa_block import QAHandler -from danswer.one_shot_answer.qa_block import SingleMessageQAHandler -from danswer.one_shot_answer.qa_block import WeakLLMQAHandler -from danswer.utils.logger import setup_logger - -logger = setup_logger() - - -def get_question_answer_model( - prompt: Prompt | None, - api_key: str | None = None, - timeout: int = QA_TIMEOUT, - chain_of_thought: bool = False, - llm_version: str | None = None, - qa_model_version: str | None = QA_PROMPT_OVERRIDE, -) -> QAModel | None: - if chain_of_thought: - raise NotImplementedError("COT has been disabled") - - system_prompt = prompt.system_prompt if prompt is not None else None - task_prompt = prompt.task_prompt if prompt is not None else None - - try: - llm = get_default_llm( - api_key=api_key, - timeout=timeout, - gen_ai_model_version_override=llm_version, - ) - except GenAIDisabledException: - return None - - if qa_model_version == "weak": - qa_handler: QAHandler = WeakLLMQAHandler( - system_prompt=system_prompt, task_prompt=task_prompt - ) - else: - qa_handler = SingleMessageQAHandler( - system_prompt=system_prompt, task_prompt=task_prompt - ) - - return QABlock(llm=llm, qa_handler=qa_handler) diff --git a/backend/danswer/one_shot_answer/interfaces.py b/backend/danswer/one_shot_answer/interfaces.py deleted file mode 100644 index ca916d699..000000000 --- a/backend/danswer/one_shot_answer/interfaces.py +++ /dev/null @@ -1,26 +0,0 @@ -import abc -from collections.abc import Callable - -from danswer.chat.models import AnswerQuestionStreamReturn -from danswer.chat.models import LLMMetricsContainer -from danswer.indexing.models import InferenceChunk - - -class QAModel: - @abc.abstractmethod - def build_prompt( - self, - query: str, - history_str: str, - context_chunks: list[InferenceChunk], - ) -> str: - raise NotImplementedError - - @abc.abstractmethod - def answer_question_stream( - self, - prompt: str, - llm_context_docs: list[InferenceChunk], - metrics_callback: Callable[[LLMMetricsContainer], None] | None = None, - ) -> AnswerQuestionStreamReturn: - raise NotImplementedError diff --git a/backend/danswer/one_shot_answer/models.py b/backend/danswer/one_shot_answer/models.py index 6401b3440..868199164 100644 --- a/backend/danswer/one_shot_answer/models.py +++ b/backend/danswer/one_shot_answer/models.py @@ -1,12 +1,15 @@ from typing import Any from pydantic import BaseModel +from pydantic import Field from pydantic import root_validator +from danswer.chat.models import CitationInfo from danswer.chat.models import DanswerContexts from danswer.chat.models import DanswerQuotes from danswer.chat.models import QADocsResponse from danswer.configs.constants import MessageType +from danswer.search.models import ChunkContext from danswer.search.models import RetrievalDetails @@ -17,14 +20,17 @@ class QueryRephrase(BaseModel): class ThreadMessage(BaseModel): message: str sender: str | None - role: MessageType + role: MessageType = MessageType.USER -class DirectQARequest(BaseModel): +class DirectQARequest(ChunkContext): messages: list[ThreadMessage] prompt_id: int | None persona_id: int - retrieval_options: RetrievalDetails + retrieval_options: RetrievalDetails = Field(default_factory=RetrievalDetails) + # This is to forcibly skip (or run) the step, if None it uses the system defaults + skip_rerank: bool | None = None + skip_llm_chunk_filter: bool | None = None chain_of_thought: bool = False return_contexts: bool = False @@ -50,6 +56,7 @@ class OneShotQAResponse(BaseModel): answer: str | None = None rephrase: str | None = None quotes: DanswerQuotes | None = None + citations: list[CitationInfo] | None = None docs: QADocsResponse | None = None llm_chunks_indices: list[int] | None = None error_msg: str | None = None diff --git a/backend/danswer/one_shot_answer/qa_block.py b/backend/danswer/one_shot_answer/qa_block.py deleted file mode 100644 index c7b702d26..000000000 --- a/backend/danswer/one_shot_answer/qa_block.py +++ /dev/null @@ -1,313 +0,0 @@ -import abc -import re -from collections.abc import Callable -from collections.abc import Iterator -from typing import cast - -from danswer.chat.chat_utils import build_complete_context_str -from danswer.chat.models import AnswerQuestionStreamReturn -from danswer.chat.models import DanswerAnswer -from danswer.chat.models import DanswerAnswerPiece -from danswer.chat.models import DanswerQuotes -from danswer.chat.models import LlmDoc -from danswer.chat.models import LLMMetricsContainer -from danswer.chat.models import StreamingError -from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION -from danswer.configs.constants import DISABLED_GEN_AI_MSG -from danswer.indexing.models import InferenceChunk -from danswer.llm.interfaces import LLM -from danswer.llm.utils import check_number_of_tokens -from danswer.llm.utils import get_default_llm_token_encode -from danswer.one_shot_answer.interfaces import QAModel -from danswer.one_shot_answer.qa_utils import process_answer -from danswer.one_shot_answer.qa_utils import process_model_tokens -from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK -from danswer.prompts.direct_qa_prompts import COT_PROMPT -from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK -from danswer.prompts.direct_qa_prompts import JSON_PROMPT -from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT -from danswer.prompts.direct_qa_prompts import ONE_SHOT_SYSTEM_PROMPT -from danswer.prompts.direct_qa_prompts import ONE_SHOT_TASK_PROMPT -from danswer.prompts.direct_qa_prompts import PARAMATERIZED_PROMPT -from danswer.prompts.direct_qa_prompts import PARAMATERIZED_PROMPT_WITHOUT_CONTEXT -from danswer.prompts.direct_qa_prompts import WEAK_LLM_PROMPT -from danswer.prompts.direct_qa_prompts import WEAK_MODEL_SYSTEM_PROMPT -from danswer.prompts.direct_qa_prompts import WEAK_MODEL_TASK_PROMPT -from danswer.utils.logger import setup_logger -from danswer.utils.text_processing import clean_up_code_blocks -from danswer.utils.text_processing import escape_newlines - -logger = setup_logger() - - -class QAHandler(abc.ABC): - @property - @abc.abstractmethod - def is_json_output(self) -> bool: - """Does the model output a valid json with answer and quotes keys? Most flows with a - capable model should output a json. This hints to the model that the output is used - with a downstream system rather than freeform creative output. Most models should be - finetuned to recognize this.""" - raise NotImplementedError - - @abc.abstractmethod - def build_prompt( - self, - query: str, - history_str: str, - context_chunks: list[InferenceChunk], - ) -> str: - raise NotImplementedError - - def process_llm_token_stream( - self, tokens: Iterator[str], context_chunks: list[InferenceChunk] - ) -> AnswerQuestionStreamReturn: - yield from process_model_tokens( - tokens=tokens, - context_docs=context_chunks, - is_json_prompt=self.is_json_output, - ) - - -class WeakLLMQAHandler(QAHandler): - """Since Danswer supports a variety of LLMs, this less demanding prompt is provided - as an option to use with weaker LLMs such as small version, low float precision, quantized, - or distilled models. It only uses one context document and has very weak requirements of - output format. - """ - - def __init__( - self, - system_prompt: str | None, - task_prompt: str | None, - ) -> None: - if not system_prompt and not task_prompt: - self.system_prompt = WEAK_MODEL_SYSTEM_PROMPT - self.task_prompt = WEAK_MODEL_TASK_PROMPT - else: - self.system_prompt = system_prompt or "" - self.task_prompt = task_prompt or "" - - self.task_prompt = self.task_prompt.rstrip() - if self.task_prompt and self.task_prompt[0] != "\n": - self.task_prompt = "\n" + self.task_prompt - - @property - def is_json_output(self) -> bool: - return False - - def build_prompt( - self, - query: str, - history_str: str, - context_chunks: list[InferenceChunk], - ) -> str: - context_block = "" - if context_chunks: - context_block = CONTEXT_BLOCK.format( - context_docs_str=context_chunks[0].content - ) - - prompt_str = WEAK_LLM_PROMPT.format( - system_prompt=self.system_prompt, - context_block=context_block, - task_prompt=self.task_prompt, - user_query=query, - ) - return prompt_str - - -class SingleMessageQAHandler(QAHandler): - def __init__( - self, - system_prompt: str | None, - task_prompt: str | None, - use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), - ) -> None: - self.use_language_hint = use_language_hint - if not system_prompt and not task_prompt: - self.system_prompt = ONE_SHOT_SYSTEM_PROMPT - self.task_prompt = ONE_SHOT_TASK_PROMPT - else: - self.system_prompt = system_prompt or "" - self.task_prompt = task_prompt or "" - - self.task_prompt = self.task_prompt.rstrip() - if self.task_prompt and self.task_prompt[0] != "\n": - self.task_prompt = "\n" + self.task_prompt - - @property - def is_json_output(self) -> bool: - return True - - def build_prompt( - self, query: str, history_str: str, context_chunks: list[InferenceChunk] - ) -> str: - context_block = "" - if context_chunks: - context_docs_str = build_complete_context_str( - cast(list[LlmDoc | InferenceChunk], context_chunks) - ) - context_block = CONTEXT_BLOCK.format(context_docs_str=context_docs_str) - - history_block = "" - if history_str: - history_block = HISTORY_BLOCK.format(history_str=history_str) - - full_prompt = JSON_PROMPT.format( - system_prompt=self.system_prompt, - context_block=context_block, - history_block=history_block, - task_prompt=self.task_prompt, - user_query=query, - language_hint_or_none=LANGUAGE_HINT.strip() - if self.use_language_hint - else "", - ).strip() - return full_prompt - - -# This one isn't used, currently only streaming prompts are used -class SingleMessageScratchpadHandler(QAHandler): - def __init__( - self, - system_prompt: str | None, - task_prompt: str | None, - use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), - ) -> None: - self.use_language_hint = use_language_hint - if not system_prompt and not task_prompt: - self.system_prompt = ONE_SHOT_SYSTEM_PROMPT - self.task_prompt = ONE_SHOT_TASK_PROMPT - else: - self.system_prompt = system_prompt or "" - self.task_prompt = task_prompt or "" - - self.task_prompt = self.task_prompt.rstrip() - if self.task_prompt and self.task_prompt[0] != "\n": - self.task_prompt = "\n" + self.task_prompt - - @property - def is_json_output(self) -> bool: - return True - - def build_prompt( - self, query: str, history_str: str, context_chunks: list[InferenceChunk] - ) -> str: - context_docs_str = build_complete_context_str( - cast(list[LlmDoc | InferenceChunk], context_chunks) - ) - - # Outdated - prompt = COT_PROMPT.format( - context_docs_str=context_docs_str, - user_query=query, - language_hint_or_none=LANGUAGE_HINT.strip() - if self.use_language_hint - else "", - ).strip() - - return prompt - - def process_llm_output( - self, model_output: str, context_chunks: list[InferenceChunk] - ) -> tuple[DanswerAnswer, DanswerQuotes]: - logger.debug(model_output) - - model_clean = clean_up_code_blocks(model_output) - - match = re.search(r'{\s*"answer":', model_clean) - if not match: - return DanswerAnswer(answer=None), DanswerQuotes(quotes=[]) - - final_json = escape_newlines(model_clean[match.start() :]) - - return process_answer( - final_json, context_chunks, is_json_prompt=self.is_json_output - ) - - def process_llm_token_stream( - self, tokens: Iterator[str], context_chunks: list[InferenceChunk] - ) -> AnswerQuestionStreamReturn: - # Can be supported but the parsing is more involved, not handling until needed - raise ValueError( - "This Scratchpad approach is not suitable for real time uses like streaming" - ) - - -def build_dummy_prompt( - system_prompt: str, task_prompt: str, retrieval_disabled: bool -) -> str: - if retrieval_disabled: - return PARAMATERIZED_PROMPT_WITHOUT_CONTEXT.format( - user_query="", - system_prompt=system_prompt, - task_prompt=task_prompt, - ).strip() - - return PARAMATERIZED_PROMPT.format( - context_docs_str="", - user_query="", - system_prompt=system_prompt, - task_prompt=task_prompt, - ).strip() - - -def no_gen_ai_response() -> Iterator[DanswerAnswerPiece]: - yield DanswerAnswerPiece(answer_piece=DISABLED_GEN_AI_MSG) - - -class QABlock(QAModel): - def __init__(self, llm: LLM, qa_handler: QAHandler) -> None: - self._llm = llm - self._qa_handler = qa_handler - - def build_prompt( - self, - query: str, - history_str: str, - context_chunks: list[InferenceChunk], - ) -> str: - prompt = self._qa_handler.build_prompt( - query=query, history_str=history_str, context_chunks=context_chunks - ) - return prompt - - def answer_question_stream( - self, - prompt: str, - llm_context_docs: list[InferenceChunk], - metrics_callback: Callable[[LLMMetricsContainer], None] | None = None, - ) -> AnswerQuestionStreamReturn: - tokens_stream = self._llm.stream(prompt) - - captured_tokens = [] - - try: - for answer_piece in self._qa_handler.process_llm_token_stream( - iter(tokens_stream), llm_context_docs - ): - if ( - isinstance(answer_piece, DanswerAnswerPiece) - and answer_piece.answer_piece - ): - captured_tokens.append(answer_piece.answer_piece) - yield answer_piece - - except Exception as e: - yield StreamingError(error=str(e)) - - if metrics_callback is not None: - prompt_tokens = check_number_of_tokens( - text=str(prompt), encode_fn=get_default_llm_token_encode() - ) - - response_tokens = check_number_of_tokens( - text="".join(captured_tokens), encode_fn=get_default_llm_token_encode() - ) - - metrics_callback( - LLMMetricsContainer( - prompt_tokens=prompt_tokens, response_tokens=response_tokens - ) - ) diff --git a/backend/danswer/one_shot_answer/qa_utils.py b/backend/danswer/one_shot_answer/qa_utils.py index 032d24345..e912a915e 100644 --- a/backend/danswer/one_shot_answer/qa_utils.py +++ b/backend/danswer/one_shot_answer/qa_utils.py @@ -1,275 +1,14 @@ -import math -import re from collections.abc import Callable from collections.abc import Generator -from collections.abc import Iterator -from json.decoder import JSONDecodeError -from typing import Optional -from typing import Tuple -import regex - -from danswer.chat.models import DanswerAnswer -from danswer.chat.models import DanswerAnswerPiece -from danswer.chat.models import DanswerQuote -from danswer.chat.models import DanswerQuotes -from danswer.configs.chat_configs import QUOTE_ALLOWED_ERROR_PERCENT from danswer.configs.constants import MessageType -from danswer.indexing.models import InferenceChunk from danswer.llm.utils import get_default_llm_token_encode from danswer.one_shot_answer.models import ThreadMessage -from danswer.prompts.constants import ANSWER_PAT -from danswer.prompts.constants import QUOTE_PAT -from danswer.prompts.constants import UNCERTAINTY_PAT from danswer.utils.logger import setup_logger -from danswer.utils.text_processing import clean_model_quote -from danswer.utils.text_processing import clean_up_code_blocks -from danswer.utils.text_processing import extract_embedded_json -from danswer.utils.text_processing import shared_precompare_cleanup logger = setup_logger() -def _extract_answer_quotes_freeform( - answer_raw: str, -) -> Tuple[Optional[str], Optional[list[str]]]: - """Splits the model output into an Answer and 0 or more Quote sections. - Splits by the Quote pattern, if not exist then assume it's all answer and no quotes - """ - # If no answer section, don't care about the quote - if answer_raw.lower().strip().startswith(QUOTE_PAT.lower()): - return None, None - - # Sometimes model regenerates the Answer: pattern despite it being provided in the prompt - if answer_raw.lower().startswith(ANSWER_PAT.lower()): - answer_raw = answer_raw[len(ANSWER_PAT) :] - - # Accept quote sections starting with the lower case version - answer_raw = answer_raw.replace( - f"\n{QUOTE_PAT}".lower(), f"\n{QUOTE_PAT}" - ) # Just in case model unreliable - - sections = re.split(rf"(?<=\n){QUOTE_PAT}", answer_raw) - sections_clean = [ - str(section).strip() for section in sections if str(section).strip() - ] - if not sections_clean: - return None, None - - answer = str(sections_clean[0]) - if len(sections) == 1: - return answer, None - return answer, sections_clean[1:] - - -def _extract_answer_quotes_json( - answer_dict: dict[str, str | list[str]] -) -> Tuple[Optional[str], Optional[list[str]]]: - answer_dict = {k.lower(): v for k, v in answer_dict.items()} - answer = str(answer_dict.get("answer")) - quotes = answer_dict.get("quotes") or answer_dict.get("quote") - if isinstance(quotes, str): - quotes = [quotes] - return answer, quotes - - -def _extract_answer_json(raw_model_output: str) -> dict: - try: - answer_json = extract_embedded_json(raw_model_output) - except (ValueError, JSONDecodeError): - # LLMs get confused when handling the list in the json. Sometimes it doesn't attend - # enough to the previous { token so it just ends the list of quotes and stops there - # here, we add logic to try to fix this LLM error. - answer_json = extract_embedded_json(raw_model_output + "}") - - if "answer" not in answer_json: - raise ValueError("Model did not output an answer as expected.") - - return answer_json - - -def separate_answer_quotes( - answer_raw: str, is_json_prompt: bool = False -) -> Tuple[Optional[str], Optional[list[str]]]: - """Takes in a raw model output and pulls out the answer and the quotes sections.""" - if is_json_prompt: - model_raw_json = _extract_answer_json(answer_raw) - return _extract_answer_quotes_json(model_raw_json) - - return _extract_answer_quotes_freeform(clean_up_code_blocks(answer_raw)) - - -def match_quotes_to_docs( - quotes: list[str], - chunks: list[InferenceChunk], - max_error_percent: float = QUOTE_ALLOWED_ERROR_PERCENT, - fuzzy_search: bool = False, - prefix_only_length: int = 100, -) -> DanswerQuotes: - danswer_quotes: list[DanswerQuote] = [] - for quote in quotes: - max_edits = math.ceil(float(len(quote)) * max_error_percent) - - for chunk in chunks: - if not chunk.source_links: - continue - - quote_clean = shared_precompare_cleanup( - clean_model_quote(quote, trim_length=prefix_only_length) - ) - chunk_clean = shared_precompare_cleanup(chunk.content) - - # Finding the offset of the quote in the plain text - if fuzzy_search: - re_search_str = ( - r"(" + re.escape(quote_clean) + r"){e<=" + str(max_edits) + r"}" - ) - found = regex.search(re_search_str, chunk_clean) - if not found: - continue - offset = found.span()[0] - else: - if quote_clean not in chunk_clean: - continue - offset = chunk_clean.index(quote_clean) - - # Extracting the link from the offset - curr_link = None - for link_offset, link in chunk.source_links.items(): - # Should always find one because offset is at least 0 and there - # must be a 0 link_offset - if int(link_offset) <= offset: - curr_link = link - else: - break - - danswer_quotes.append( - DanswerQuote( - quote=quote, - document_id=chunk.document_id, - link=curr_link, - source_type=chunk.source_type, - semantic_identifier=chunk.semantic_identifier, - blurb=chunk.blurb, - ) - ) - break - - return DanswerQuotes(quotes=danswer_quotes) - - -def process_answer( - answer_raw: str, - chunks: list[InferenceChunk], - is_json_prompt: bool = True, -) -> tuple[DanswerAnswer, DanswerQuotes]: - """Used (1) in the non-streaming case to process the model output - into an Answer and Quotes AND (2) after the complete streaming response - has been received to process the model output into an Answer and Quotes.""" - answer, quote_strings = separate_answer_quotes(answer_raw, is_json_prompt) - if answer == UNCERTAINTY_PAT or not answer: - if answer == UNCERTAINTY_PAT: - logger.debug("Answer matched UNCERTAINTY_PAT") - else: - logger.debug("No answer extracted from raw output") - return DanswerAnswer(answer=None), DanswerQuotes(quotes=[]) - - logger.info(f"Answer: {answer}") - if not quote_strings: - logger.debug("No quotes extracted from raw output") - return DanswerAnswer(answer=answer), DanswerQuotes(quotes=[]) - logger.info(f"All quotes (including unmatched): {quote_strings}") - quotes = match_quotes_to_docs(quote_strings, chunks) - logger.debug(f"Final quotes: {quotes}") - - return DanswerAnswer(answer=answer), quotes - - -def _stream_json_answer_end(answer_so_far: str, next_token: str) -> bool: - next_token = next_token.replace('\\"', "") - # If the previous character is an escape token, don't consider the first character of next_token - # This does not work if it's an escaped escape sign before the " but this is rare, not worth handling - if answer_so_far and answer_so_far[-1] == "\\": - next_token = next_token[1:] - if '"' in next_token: - return True - return False - - -def _extract_quotes_from_completed_token_stream( - model_output: str, context_chunks: list[InferenceChunk], is_json_prompt: bool = True -) -> DanswerQuotes: - answer, quotes = process_answer(model_output, context_chunks, is_json_prompt) - if answer: - logger.info(answer) - elif model_output: - logger.warning("Answer extraction from model output failed.") - - return quotes - - -def process_model_tokens( - tokens: Iterator[str], - context_docs: list[InferenceChunk], - is_json_prompt: bool = True, -) -> Generator[DanswerAnswerPiece | DanswerQuotes, None, None]: - """Used in the streaming case to process the model output - into an Answer and Quotes - - Yields Answer tokens back out in a dict for streaming to frontend - When Answer section ends, yields dict with answer_finished key - Collects all the tokens at the end to form the complete model output""" - quote_pat = f"\n{QUOTE_PAT}" - # Sometimes worse model outputs new line instead of : - quote_loose = f"\n{quote_pat[:-1]}\n" - # Sometime model outputs two newlines before quote section - quote_pat_full = f"\n{quote_pat}" - model_output: str = "" - found_answer_start = False if is_json_prompt else True - found_answer_end = False - hold_quote = "" - for token in tokens: - model_previous = model_output - model_output += token - - if not found_answer_start and '{"answer":"' in re.sub(r"\s", "", model_output): - # Note, if the token that completes the pattern has additional text, for example if the token is "? - # Then the chars after " will not be streamed, but this is ok as it prevents streaming the ? in the - # event that the model outputs the UNCERTAINTY_PAT - found_answer_start = True - - # Prevent heavy cases of hallucinations where model is not even providing a json until later - if is_json_prompt and len(model_output) > 40: - logger.warning("LLM did not produce json as prompted") - found_answer_end = True - - continue - - if found_answer_start and not found_answer_end: - if is_json_prompt and _stream_json_answer_end(model_previous, token): - found_answer_end = True - yield DanswerAnswerPiece(answer_piece=None) - continue - elif not is_json_prompt: - if quote_pat in hold_quote + token or quote_loose in hold_quote + token: - found_answer_end = True - yield DanswerAnswerPiece(answer_piece=None) - continue - if hold_quote + token in quote_pat_full: - hold_quote += token - continue - yield DanswerAnswerPiece(answer_piece=hold_quote + token) - hold_quote = "" - - logger.debug(f"Raw Model QnA Output: {model_output}") - - yield _extract_quotes_from_completed_token_stream( - model_output=model_output, - context_chunks=context_docs, - is_json_prompt=is_json_prompt, - ) - - def simulate_streaming_response(model_out: str) -> Generator[str, None, None]: """Mock streaming by generating the passed in model output, character by character""" for token in model_out: diff --git a/backend/danswer/prompts/answer_validation.py b/backend/danswer/prompts/answer_validation.py index 1a29a48ce..28d184aca 100644 --- a/backend/danswer/prompts/answer_validation.py +++ b/backend/danswer/prompts/answer_validation.py @@ -1,18 +1,46 @@ # The following prompts are used for verifying the LLM answer after it is already produced. # Reflexion flow essentially. This feature can be toggled on/off +from danswer.configs.app_configs import CUSTOM_ANSWER_VALIDITY_CONDITIONS from danswer.prompts.constants import ANSWER_PAT from danswer.prompts.constants import QUESTION_PAT +ANSWER_VALIDITY_CONDITIONS = ( + """ +1. Query is asking for information that varies by person or is subjective. If there is not a \ +globally true answer, the language model should not respond, therefore any answer is invalid. +2. Answer addresses a related but different query. To be helpful, the model may provide \ +related information about a query but it won't match what the user is asking, this is invalid. +3. Answer is just some form of "I don\'t know" or "not enough information" without significant \ +additional useful information. Explaining why it does not know or cannot answer is invalid. +""" + if not CUSTOM_ANSWER_VALIDITY_CONDITIONS + else "\n".join( + [ + f"{indice+1}. {condition}" + for indice, condition in enumerate(CUSTOM_ANSWER_VALIDITY_CONDITIONS) + ] + ) +) + +ANSWER_FORMAT = ( + """ +1. True or False +2. True or False +3. True or False +""" + if not CUSTOM_ANSWER_VALIDITY_CONDITIONS + else "\n".join( + [ + f"{indice+1}. True or False" + for indice, _ in enumerate(CUSTOM_ANSWER_VALIDITY_CONDITIONS) + ] + ) +) ANSWER_VALIDITY_PROMPT = f""" You are an assistant to identify invalid query/answer pairs coming from a large language model. The query/answer pair is invalid if any of the following are True: -1. Query is asking for information that varies by person or is subjective. If there is not a \ -globally true answer, the language model should not respond, therefore any answer is invalid. -2. Answer addresses a related but different query. To be helpful, the model may provide provide \ -related information about a query but it won't match what the user is asking, this is invalid. -3. Answer is just some form of "I don\'t know" or "not enough information" without significant \ -additional useful information. Explaining why it does not know or cannot answer is invalid. +{ANSWER_VALIDITY_CONDITIONS} {QUESTION_PAT} {{user_query}} {ANSWER_PAT} {{llm_answer}} @@ -20,9 +48,7 @@ additional useful information. Explaining why it does not know or cannot answer ------------------------ You MUST answer in EXACTLY the following format: ``` -1. True or False -2. True or False -3. True or False +{ANSWER_FORMAT} Final Answer: Valid or Invalid ``` diff --git a/backend/danswer/prompts/chat_prompts.py b/backend/danswer/prompts/chat_prompts.py index bdb938090..e0b20243b 100644 --- a/backend/danswer/prompts/chat_prompts.py +++ b/backend/danswer/prompts/chat_prompts.py @@ -14,8 +14,8 @@ CITATION_REMINDER = """ Remember to provide inline citations in the format [1], [2], [3], etc. """ +ADDITIONAL_INFO = "\n\nAdditional Information:\n\t- {datetime_info}." -DEFAULT_IGNORE_STATEMENT = " Ignore any context documents that are not relevant." CHAT_USER_PROMPT = f""" Refer to the following context documents when responding to me.{{optional_ignore_statement}} @@ -48,9 +48,11 @@ CHAT_USER_CONTEXT_FREE_PROMPT = f""" # consider doing COT for this and keep it brief, but likely only small gains. SKIP_SEARCH = "Skip Search" YES_SEARCH = "Yes Search" + AGGRESSIVE_SEARCH_TEMPLATE = f""" Given the conversation history and a follow up query, determine if the system should call \ an external search tool to better answer the latest user input. +Your default response is {YES_SEARCH}. Respond "{SKIP_SEARCH}" if either: - There is sufficient information in chat history to FULLY and ACCURATELY answer the query AND \ @@ -62,7 +64,32 @@ Conversation History: {{chat_history}} {GENERAL_SEP_PAT} -If you are unsure, respond with {YES_SEARCH}. +If you are at all unsure, respond with {YES_SEARCH}. +Respond with EXACTLY and ONLY "{YES_SEARCH}" or "{SKIP_SEARCH}" + +Follow Up Input: +{{final_query}} +""".strip() + + +# TODO, templatize this so users don't need to make code changes to use this +AGGRESSIVE_SEARCH_TEMPLATE_LLAMA2 = f""" +You are an expert of a critical system. Given the conversation history and a follow up query, \ +determine if the system should call an external search tool to better answer the latest user input. + +Your default response is {YES_SEARCH}. +If you are even slightly unsure, respond with {YES_SEARCH}. + +Respond "{SKIP_SEARCH}" if any of these are true: +- There is sufficient information in chat history to FULLY and ACCURATELY answer the query. +- The query is some form of request that does not require additional information to handle. +- You are absolutely sure about the question and there is no ambiguity in the answer or question. + +Conversation History: +{GENERAL_SEP_PAT} +{{chat_history}} +{GENERAL_SEP_PAT} + Respond with EXACTLY and ONLY "{YES_SEARCH}" or "{SKIP_SEARCH}" Follow Up Input: diff --git a/backend/danswer/prompts/constants.py b/backend/danswer/prompts/constants.py index 5fb9dbf84..40a37fc32 100644 --- a/backend/danswer/prompts/constants.py +++ b/backend/danswer/prompts/constants.py @@ -12,3 +12,5 @@ QUOTE_PAT = "Quote:" QUOTES_PAT_PLURAL = "Quotes:" INVALID_PAT = "Invalid:" SOURCES_KEY = "sources" + +DEFAULT_IGNORE_STATEMENT = " Ignore any context documents that are not relevant." diff --git a/backend/danswer/prompts/direct_qa_prompts.py b/backend/danswer/prompts/direct_qa_prompts.py index ddfdf2e08..ee1b492be 100644 --- a/backend/danswer/prompts/direct_qa_prompts.py +++ b/backend/danswer/prompts/direct_qa_prompts.py @@ -2,6 +2,7 @@ # It is used also for the one shot direct QA flow import json +from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT from danswer.prompts.constants import FINAL_QUERY_PAT from danswer.prompts.constants import GENERAL_SEP_PAT from danswer.prompts.constants import QUESTION_PAT @@ -96,6 +97,35 @@ SAMPLE RESPONSE: """.strip() +# similar to the chat flow, but with the option of including a +# "conversation history" block +CITATIONS_PROMPT = f""" +Refer to the following context documents when responding to me.{DEFAULT_IGNORE_STATEMENT} +CONTEXT: +{GENERAL_SEP_PAT} +{{context_docs_str}} +{GENERAL_SEP_PAT} + +{{history_block}}{{task_prompt}} + +{QUESTION_PAT.upper()} +{{user_query}} +""" + +# with tool calling, the documents are in a separate "tool" message +# NOTE: need to add the extra line about "getting right to the point" since the +# tool calling models from OpenAI tend to be more verbose +CITATIONS_PROMPT_FOR_TOOL_CALLING = f""" +Refer to the provided context documents when responding to me.{DEFAULT_IGNORE_STATEMENT} \ +You should always get right to the point, and never use extraneous language. + +{{task_prompt}} + +{QUESTION_PAT.upper()} +{{user_query}} +""" + + # For weak LLM which only takes one chunk and cannot output json # Also not requiring quotes as it tends to not work WEAK_LLM_PROMPT = f""" diff --git a/backend/danswer/prompts/prompt_utils.py b/backend/danswer/prompts/prompt_utils.py index 4c0de783f..9dc939eb1 100644 --- a/backend/danswer/prompts/prompt_utils.py +++ b/backend/danswer/prompts/prompt_utils.py @@ -1,9 +1,191 @@ +from collections.abc import Sequence from datetime import datetime +from typing import cast + +from langchain_core.messages import BaseMessage + +from danswer.chat.models import LlmDoc +from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION +from danswer.configs.constants import DocumentSource +from danswer.db.models import Prompt +from danswer.llm.answering.models import PromptConfig +from danswer.prompts.chat_prompts import ADDITIONAL_INFO +from danswer.prompts.chat_prompts import CITATION_REMINDER +from danswer.prompts.constants import CODE_BLOCK_PAT +from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT +from danswer.search.models import InferenceChunk -def get_current_llm_day_time() -> str: +MOST_BASIC_PROMPT = "You are a helpful AI assistant." +DANSWER_DATETIME_REPLACEMENT = "DANSWER_DATETIME_REPLACEMENT" +BASIC_TIME_STR = "The current date is {datetime_info}." + + +def get_current_llm_day_time( + include_day_of_week: bool = True, full_sentence: bool = True +) -> str: current_datetime = datetime.now() # Format looks like: "October 16, 2023 14:30" formatted_datetime = current_datetime.strftime("%B %d, %Y %H:%M") day_of_week = current_datetime.strftime("%A") - return f"The current day and time is {day_of_week} {formatted_datetime}" + if full_sentence: + return f"The current day and time is {day_of_week} {formatted_datetime}" + if include_day_of_week: + return f"{day_of_week} {formatted_datetime}" + return f"{formatted_datetime}" + + +def add_time_to_system_prompt(system_prompt: str) -> str: + if DANSWER_DATETIME_REPLACEMENT in system_prompt: + return system_prompt.replace( + DANSWER_DATETIME_REPLACEMENT, + get_current_llm_day_time(full_sentence=False, include_day_of_week=True), + ) + + if system_prompt: + return system_prompt + ADDITIONAL_INFO.format( + datetime_info=get_current_llm_day_time() + ) + else: + return ( + MOST_BASIC_PROMPT + + " " + + BASIC_TIME_STR.format(datetime_info=get_current_llm_day_time()) + ) + + +def build_task_prompt_reminders( + prompt: Prompt | PromptConfig, + use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), + citation_str: str = CITATION_REMINDER, + language_hint_str: str = LANGUAGE_HINT, +) -> str: + base_task = prompt.task_prompt + citation_or_nothing = citation_str if prompt.include_citations else "" + language_hint_or_nothing = language_hint_str.lstrip() if use_language_hint else "" + return base_task + citation_or_nothing + language_hint_or_nothing + + +# Maps connector enum string to a more natural language representation for the LLM +# If not on the list, uses the original but slightly cleaned up, see below +CONNECTOR_NAME_MAP = { + "web": "Website", + "requesttracker": "Request Tracker", + "github": "GitHub", + "file": "File Upload", +} + + +def clean_up_source(source_str: str) -> str: + if source_str in CONNECTOR_NAME_MAP: + return CONNECTOR_NAME_MAP[source_str] + return source_str.replace("_", " ").title() + + +def build_doc_context_str( + semantic_identifier: str, + source_type: DocumentSource, + content: str, + metadata_dict: dict[str, str | list[str]], + updated_at: datetime | None, + ind: int, + include_metadata: bool = True, +) -> str: + context_str = "" + if include_metadata: + context_str += f"DOCUMENT {ind}: {semantic_identifier}\n" + context_str += f"Source: {clean_up_source(source_type)}\n" + + for k, v in metadata_dict.items(): + if isinstance(v, list): + v_str = ", ".join(v) + context_str += f"{k.capitalize()}: {v_str}\n" + else: + context_str += f"{k.capitalize()}: {v}\n" + + if updated_at: + update_str = updated_at.strftime("%B %d, %Y %H:%M") + context_str += f"Updated: {update_str}\n" + context_str += f"{CODE_BLOCK_PAT.format(content.strip())}\n\n\n" + return context_str + + +def build_complete_context_str( + context_docs: Sequence[LlmDoc | InferenceChunk], + include_metadata: bool = True, +) -> str: + context_str = "" + for ind, doc in enumerate(context_docs, start=1): + context_str += build_doc_context_str( + semantic_identifier=doc.semantic_identifier, + source_type=doc.source_type, + content=doc.content, + metadata_dict=doc.metadata, + updated_at=doc.updated_at, + ind=ind, + include_metadata=include_metadata, + ) + + return context_str.strip() + + +_PER_MESSAGE_TOKEN_BUFFER = 7 + + +def find_last_index(lst: list[int], max_prompt_tokens: int) -> int: + """From the back, find the index of the last element to include + before the list exceeds the maximum""" + running_sum = 0 + + last_ind = 0 + for i in range(len(lst) - 1, -1, -1): + running_sum += lst[i] + _PER_MESSAGE_TOKEN_BUFFER + if running_sum > max_prompt_tokens: + last_ind = i + 1 + break + if last_ind >= len(lst): + raise ValueError("Last message alone is too large!") + return last_ind + + +def drop_messages_history_overflow( + messages_with_token_cnts: list[tuple[BaseMessage, int]], + max_allowed_tokens: int, +) -> list[BaseMessage]: + """As message history grows, messages need to be dropped starting from the furthest in the past. + The System message should be kept if at all possible and the latest user input which is inserted in the + prompt template must be included""" + + final_messages: list[BaseMessage] = [] + messages, token_counts = cast( + tuple[list[BaseMessage], list[int]], zip(*messages_with_token_cnts) + ) + system_msg = ( + final_messages[0] + if final_messages and final_messages[0].type == "system" + else None + ) + + history_msgs = messages[:-1] + final_msg = messages[-1] + if final_msg.type != "human": + if final_msg.type != "tool": + raise ValueError("Last message must be user input OR a tool result") + else: + final_msgs = messages[-3:] + history_msgs = messages[:-3] + else: + final_msgs = [final_msg] + + # Start dropping from the history if necessary + ind_prev_msg_start = find_last_index( + token_counts, max_prompt_tokens=max_allowed_tokens + ) + + if system_msg and ind_prev_msg_start <= len(history_msgs): + final_messages.append(system_msg) + + final_messages.extend(history_msgs[ind_prev_msg_start:]) + final_messages.extend(final_msgs) + + return final_messages diff --git a/backend/danswer/prompts/token_counts.py b/backend/danswer/prompts/token_counts.py index 35d082b8d..de77d0b8d 100644 --- a/backend/danswer/prompts/token_counts.py +++ b/backend/danswer/prompts/token_counts.py @@ -1,10 +1,11 @@ from danswer.llm.utils import check_number_of_tokens +from danswer.prompts.chat_prompts import ADDITIONAL_INFO from danswer.prompts.chat_prompts import CHAT_USER_PROMPT from danswer.prompts.chat_prompts import CITATION_REMINDER -from danswer.prompts.chat_prompts import DEFAULT_IGNORE_STATEMENT from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT +from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT - +from danswer.prompts.prompt_utils import get_current_llm_day_time # tokens outside of the actual persona's "user_prompt" that make up the end # user message @@ -22,3 +23,8 @@ CITATION_STATEMENT_TOKEN_CNT = check_number_of_tokens(REQUIRE_CITATION_STATEMENT CITATION_REMINDER_TOKEN_CNT = check_number_of_tokens(CITATION_REMINDER) LANGUAGE_HINT_TOKEN_CNT = check_number_of_tokens(LANGUAGE_HINT) + +# If the date/time is inserted directly as a replacement in the prompt, this is a slight over count +ADDITIONAL_INFO_TOKEN_CNT = check_number_of_tokens( + ADDITIONAL_INFO.format(datetime_info=get_current_llm_day_time()) +) diff --git a/backend/danswer/search/enums.py b/backend/danswer/search/enums.py new file mode 100644 index 000000000..399083355 --- /dev/null +++ b/backend/danswer/search/enums.py @@ -0,0 +1,35 @@ +"""NOTE: this needs to be separate from models.py because of circular imports. +Both search/models.py and db/models.py import enums from this file AND +search/models.py imports from db/models.py.""" +from enum import Enum + + +class OptionalSearchSetting(str, Enum): + ALWAYS = "always" + NEVER = "never" + # Determine whether to run search based on history and latest query + AUTO = "auto" + + +class RecencyBiasSetting(str, Enum): + FAVOR_RECENT = "favor_recent" # 2x decay rate + BASE_DECAY = "base_decay" + NO_DECAY = "no_decay" + # Determine based on query if to use base_decay or favor_recent + AUTO = "auto" + + +class SearchType(str, Enum): + KEYWORD = "keyword" + SEMANTIC = "semantic" + HYBRID = "hybrid" + + +class QueryFlow(str, Enum): + SEARCH = "search" + QUESTION_ANSWER = "question-answer" + + +class EmbedTextType(str, Enum): + QUERY = "query" + PASSAGE = "passage" diff --git a/backend/danswer/search/models.py b/backend/danswer/search/models.py index 93c5a1868..16a64d820 100644 --- a/backend/danswer/search/models.py +++ b/backend/danswer/search/models.py @@ -1,46 +1,26 @@ from datetime import datetime -from enum import Enum from typing import Any from pydantic import BaseModel +from pydantic import validator from danswer.configs.chat_configs import DISABLE_LLM_CHUNK_FILTER +from danswer.configs.chat_configs import HYBRID_ALPHA from danswer.configs.chat_configs import NUM_RERANKED_RESULTS from danswer.configs.chat_configs import NUM_RETURNED_HITS from danswer.configs.constants import DocumentSource -from danswer.configs.model_configs import ENABLE_RERANKING_REAL_TIME_FLOW +from danswer.db.models import Persona +from danswer.indexing.models import BaseChunk +from danswer.search.enums import OptionalSearchSetting +from danswer.search.enums import SearchType +from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW + MAX_METRICS_CONTENT = ( 200 # Just need enough characters to identify where in the doc the chunk is ) -class OptionalSearchSetting(str, Enum): - ALWAYS = "always" - NEVER = "never" - # Determine whether to run search based on history and latest query - AUTO = "auto" - - -class RecencyBiasSetting(str, Enum): - FAVOR_RECENT = "favor_recent" # 2x decay rate - BASE_DECAY = "base_decay" - NO_DECAY = "no_decay" - # Determine based on query if to use base_decay or favor_recent - AUTO = "auto" - - -class SearchType(str, Enum): - KEYWORD = "keyword" - SEMANTIC = "semantic" - HYBRID = "hybrid" - - -class QueryFlow(str, Enum): - SEARCH = "search" - QUESTION_ANSWER = "question-answer" - - class Tag(BaseModel): tag_key: str tag_value: str @@ -64,7 +44,45 @@ class ChunkMetric(BaseModel): score: float -class SearchQuery(BaseModel): +class ChunkContext(BaseModel): + # Additional surrounding context options, if full doc, then chunks are deduped + # If surrounding context overlap, it is combined into one + chunks_above: int = 0 + chunks_below: int = 0 + full_doc: bool = False + + @validator("chunks_above", "chunks_below", pre=True, each_item=False) + def check_non_negative(cls, value: int, field: Any) -> int: + if value < 0: + raise ValueError(f"{field.name} must be non-negative") + return value + + +class SearchRequest(ChunkContext): + """Input to the SearchPipeline.""" + + query: str + search_type: SearchType = SearchType.HYBRID + + human_selected_filters: BaseFilters | None = None + enable_auto_detect_filters: bool | None = None + persona: Persona | None = None + + # if None, no offset / limit + offset: int | None = None + limit: int | None = None + + recency_bias_multiplier: float = 1.0 + hybrid_alpha: float = HYBRID_ALPHA + # This is to forcibly skip (or run) the step, if None it uses the system defaults + skip_rerank: bool | None = None + skip_llm_chunk_filter: bool | None = None + + class Config: + arbitrary_types_allowed = True + + +class SearchQuery(ChunkContext): query: str filters: IndexFilters recency_bias_multiplier: float @@ -72,9 +90,9 @@ class SearchQuery(BaseModel): offset: int = 0 search_type: SearchType = SearchType.HYBRID skip_rerank: bool = not ENABLE_RERANKING_REAL_TIME_FLOW + skip_llm_chunk_filter: bool = DISABLE_LLM_CHUNK_FILTER # Only used if not skip_rerank num_rerank: int | None = NUM_RERANKED_RESULTS - skip_llm_chunk_filter: bool = DISABLE_LLM_CHUNK_FILTER # Only used if not skip_llm_chunk_filter max_llm_filter_chunks: int = NUM_RERANKED_RESULTS @@ -82,15 +100,15 @@ class SearchQuery(BaseModel): frozen = True -class RetrievalDetails(BaseModel): +class RetrievalDetails(ChunkContext): # Use LLM to determine whether to do a retrieval or only rely on existing history # If the Persona is configured to not run search (0 chunks), this is bypassed # If no Prompt is configured, the only search results are shown, this is bypassed - run_search: OptionalSearchSetting + run_search: OptionalSearchSetting = OptionalSearchSetting.ALWAYS # Is this a real-time/streaming call or a question where Danswer can take more time? # Used to determine reranking flow - real_time: bool - # The following have defaults in the Persona settings which can be overriden via + real_time: bool = True + # The following have defaults in the Persona settings which can be overridden via # the query, if None, then use Persona settings filters: BaseFilters | None = None enable_auto_detect_filters: bool | None = None @@ -99,6 +117,63 @@ class RetrievalDetails(BaseModel): limit: int | None = None +class InferenceChunk(BaseChunk): + document_id: str + source_type: DocumentSource + semantic_identifier: str + boost: int + recency_bias: float + score: float | None + hidden: bool + metadata: dict[str, str | list[str]] + # Matched sections in the chunk. Uses Vespa syntax e.g. TEXT + # to specify that a set of words should be highlighted. For example: + # ["the answer is 42", "he couldn't find an answer"] + match_highlights: list[str] + # when the doc was last updated + updated_at: datetime | None + primary_owners: list[str] | None = None + secondary_owners: list[str] | None = None + + @property + def unique_id(self) -> str: + return f"{self.document_id}__{self.chunk_id}" + + def __repr__(self) -> str: + blurb_words = self.blurb.split() + short_blurb = "" + for word in blurb_words: + if not short_blurb: + short_blurb = word + continue + if len(short_blurb) > 25: + break + short_blurb += " " + word + return f"Inference Chunk: {self.document_id} - {short_blurb}..." + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, InferenceChunk): + return False + return (self.document_id, self.chunk_id) == (other.document_id, other.chunk_id) + + def __hash__(self) -> int: + return hash((self.document_id, self.chunk_id)) + + +class InferenceSection(InferenceChunk): + """Section is a combination of chunks. A section could be a single chunk, several consecutive + chunks or the entire document""" + + combined_content: str + + @classmethod + def from_chunk( + cls, inf_chunk: InferenceChunk, content: str | None = None + ) -> "InferenceSection": + inf_chunk_data = inf_chunk.dict() + return cls(**inf_chunk_data, combined_content=content or inf_chunk.content) + + class SearchDoc(BaseModel): document_id: str chunk_ind: int @@ -138,11 +213,18 @@ class SavedSearchDoc(SearchDoc): def from_search_doc( cls, search_doc: SearchDoc, db_doc_id: int = 0 ) -> "SavedSearchDoc": - """IMPORTANT: careful using this and not providing a db_doc_id""" + """IMPORTANT: careful using this and not providing a db_doc_id If db_doc_id is not + provided, it won't be able to actually fetch the saved doc and info later on. So only skip + providing this if the SavedSearchDoc will not be used in the future""" search_doc_data = search_doc.dict() - search_doc_data["score"] = search_doc_data.get("score", 0.0) + search_doc_data["score"] = search_doc_data.get("score") or 0.0 return cls(**search_doc_data, db_doc_id=db_doc_id) + def __lt__(self, other: Any) -> bool: + if not isinstance(other, SavedSearchDoc): + return NotImplemented + return self.score < other.score + class RetrievalDocs(BaseModel): top_documents: list[SavedSearchDoc] diff --git a/backend/danswer/search/pipeline.py b/backend/danswer/search/pipeline.py new file mode 100644 index 000000000..0c757232a --- /dev/null +++ b/backend/danswer/search/pipeline.py @@ -0,0 +1,350 @@ +from collections import defaultdict +from collections.abc import Callable +from collections.abc import Generator +from typing import cast + +from pydantic import BaseModel +from sqlalchemy.orm import Session + +from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION +from danswer.db.embedding_model import get_current_db_embedding_model +from danswer.db.models import User +from danswer.document_index.factory import get_default_document_index +from danswer.search.enums import QueryFlow +from danswer.search.enums import SearchType +from danswer.search.models import IndexFilters +from danswer.search.models import InferenceChunk +from danswer.search.models import InferenceSection +from danswer.search.models import RerankMetricsContainer +from danswer.search.models import RetrievalMetricsContainer +from danswer.search.models import SearchQuery +from danswer.search.models import SearchRequest +from danswer.search.postprocessing.postprocessing import search_postprocessing +from danswer.search.preprocessing.preprocessing import retrieval_preprocessing +from danswer.search.retrieval.search_runner import retrieve_chunks +from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel + + +class ChunkRange(BaseModel): + chunk: InferenceChunk + start: int + end: int + combined_content: str | None = None + + +def merge_chunk_intervals(chunk_ranges: list[ChunkRange]) -> list[ChunkRange]: + """This acts on a single document to merge the overlapping ranges of sections + Algo explained here for easy understanding: https://leetcode.com/problems/merge-intervals + """ + sorted_ranges = sorted(chunk_ranges, key=lambda x: x.start) + + ans: list[ChunkRange] = [] + + for chunk_range in sorted_ranges: + if not ans or ans[-1].end < chunk_range.start: + ans.append(chunk_range) + else: + ans[-1].end = max(ans[-1].end, chunk_range.end) + + return ans + + +class SearchPipeline: + def __init__( + self, + search_request: SearchRequest, + user: User | None, + db_session: Session, + bypass_acl: bool = False, # NOTE: VERY DANGEROUS, USE WITH CAUTION + retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None] + | None = None, + rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, + ): + self.search_request = search_request + self.user = user + self.db_session = db_session + self.bypass_acl = bypass_acl + self.retrieval_metrics_callback = retrieval_metrics_callback + self.rerank_metrics_callback = rerank_metrics_callback + + self.embedding_model = get_current_db_embedding_model(db_session) + self.document_index = get_default_document_index( + primary_index_name=self.embedding_model.index_name, + secondary_index_name=None, + ) + + self._search_query: SearchQuery | None = None + self._predicted_search_type: SearchType | None = None + self._predicted_flow: QueryFlow | None = None + + self._retrieved_chunks: list[InferenceChunk] | None = None + self._retrieved_sections: list[InferenceSection] | None = None + self._reranked_chunks: list[InferenceChunk] | None = None + self._reranked_sections: list[InferenceSection] | None = None + self._relevant_chunk_indices: list[int] | None = None + + # If chunks have been merged, the LLM filter flow no longer applies + # as the indices no longer match. Can be implemented later as needed + self.ran_merge_chunk = False + + # generator state + self._postprocessing_generator: Generator[ + list[InferenceChunk] | list[str], None, None + ] | None = None + + def _combine_chunks(self, post_rerank: bool) -> list[InferenceSection]: + if not post_rerank and self._retrieved_sections: + return self._retrieved_sections + if post_rerank and self._reranked_sections: + return self._reranked_sections + + if not post_rerank: + chunks = self.retrieved_chunks + else: + chunks = self.reranked_chunks + + if self._search_query is None: + # Should never happen + raise RuntimeError("Failed in Query Preprocessing") + + functions_with_args: list[tuple[Callable, tuple]] = [] + final_inference_sections = [] + + # Nothing to combine, just return the chunks + if ( + not self._search_query.chunks_above + and not self._search_query.chunks_below + and not self._search_query.full_doc + ): + return [InferenceSection.from_chunk(chunk) for chunk in chunks] + + # If chunk merges have been run, LLM reranking loses meaning + # Needs reimplementation, out of scope for now + self.ran_merge_chunk = True + + # Full doc setting takes priority + if self._search_query.full_doc: + seen_document_ids = set() + unique_chunks = [] + for chunk in chunks: + if chunk.document_id not in seen_document_ids: + seen_document_ids.add(chunk.document_id) + unique_chunks.append(chunk) + + functions_with_args.append( + ( + self.document_index.id_based_retrieval, + ( + chunk.document_id, + None, # Start chunk ind + None, # End chunk ind + # There is no chunk level permissioning, this expansion around chunks + # can be assumed to be safe + IndexFilters(access_control_list=None), + ), + ) + ) + + list_inference_chunks = run_functions_tuples_in_parallel( + functions_with_args, allow_failures=False + ) + + for ind, chunk in enumerate(unique_chunks): + inf_chunks = list_inference_chunks[ind] + combined_content = "\n".join([chunk.content for chunk in inf_chunks]) + final_inference_sections.append( + InferenceSection.from_chunk(chunk, content=combined_content) + ) + + return final_inference_sections + + # General flow: + # - Combine chunks into lists by document_id + # - For each document, run merge-intervals to get combined ranges + # - Fetch all of the new chunks with contents for the combined ranges + # - Map it back to the combined ranges (which each know their "center" chunk) + # - Reiterate the chunks again and map to the results above based on the chunk. + # This maintains the original chunks ordering. Note, we cannot simply sort by score here + # as reranking flow may wipe the scores for a lot of the chunks. + doc_chunk_ranges_map = defaultdict(list) + for chunk in chunks: + doc_chunk_ranges_map[chunk.document_id].append( + ChunkRange( + chunk=chunk, + start=max(0, chunk.chunk_id - self._search_query.chunks_above), + # No max known ahead of time, filter will handle this anyway + end=chunk.chunk_id + self._search_query.chunks_below, + ) + ) + + merged_ranges = [ + merge_chunk_intervals(ranges) for ranges in doc_chunk_ranges_map.values() + ] + reverse_map = {r.chunk: r for doc_ranges in merged_ranges for r in doc_ranges} + + for chunk_range in reverse_map.values(): + functions_with_args.append( + ( + self.document_index.id_based_retrieval, + ( + chunk_range.chunk.document_id, + chunk_range.start, + chunk_range.end, + # There is no chunk level permissioning, this expansion around chunks + # can be assumed to be safe + IndexFilters(access_control_list=None), + ), + ) + ) + + # list of list of inference chunks where the inner list needs to be combined for content + list_inference_chunks = run_functions_tuples_in_parallel( + functions_with_args, allow_failures=False + ) + + for ind, chunk_range in enumerate(reverse_map.values()): + inf_chunks = list_inference_chunks[ind] + combined_content = "\n".join([chunk.content for chunk in inf_chunks]) + chunk_range.combined_content = combined_content + + for chunk in chunks: + if chunk not in reverse_map: + continue + chunk_range = reverse_map[chunk] + final_inference_sections.append( + InferenceSection.from_chunk( + chunk_range.chunk, content=chunk_range.combined_content + ) + ) + + return final_inference_sections + + """Pre-processing""" + + def _run_preprocessing(self) -> None: + ( + final_search_query, + predicted_search_type, + predicted_flow, + ) = retrieval_preprocessing( + search_request=self.search_request, + user=self.user, + db_session=self.db_session, + bypass_acl=self.bypass_acl, + ) + self._predicted_search_type = predicted_search_type + self._predicted_flow = predicted_flow + self._search_query = final_search_query + + @property + def search_query(self) -> SearchQuery: + if self._search_query is not None: + return self._search_query + + self._run_preprocessing() + return cast(SearchQuery, self._search_query) + + @property + def predicted_search_type(self) -> SearchType: + if self._predicted_search_type is not None: + return self._predicted_search_type + + self._run_preprocessing() + return cast(SearchType, self._predicted_search_type) + + @property + def predicted_flow(self) -> QueryFlow: + if self._predicted_flow is not None: + return self._predicted_flow + + self._run_preprocessing() + return cast(QueryFlow, self._predicted_flow) + + """Retrieval""" + + @property + def retrieved_chunks(self) -> list[InferenceChunk]: + if self._retrieved_chunks is not None: + return self._retrieved_chunks + + self._retrieved_chunks = retrieve_chunks( + query=self.search_query, + document_index=self.document_index, + db_session=self.db_session, + hybrid_alpha=self.search_request.hybrid_alpha, + multilingual_expansion_str=MULTILINGUAL_QUERY_EXPANSION, + retrieval_metrics_callback=self.retrieval_metrics_callback, + ) + + return cast(list[InferenceChunk], self._retrieved_chunks) + + @property + def retrieved_sections(self) -> list[InferenceSection]: + # Calls retrieved_chunks inside + self._retrieved_sections = self._combine_chunks(post_rerank=False) + return self._retrieved_sections + + """Post-Processing""" + + @property + def reranked_chunks(self) -> list[InferenceChunk]: + if self._reranked_chunks is not None: + return self._reranked_chunks + + self._postprocessing_generator = search_postprocessing( + search_query=self.search_query, + retrieved_chunks=self.retrieved_chunks, + rerank_metrics_callback=self.rerank_metrics_callback, + ) + self._reranked_chunks = cast( + list[InferenceChunk], next(self._postprocessing_generator) + ) + return self._reranked_chunks + + @property + def reranked_sections(self) -> list[InferenceSection]: + # Calls reranked_chunks inside + self._reranked_sections = self._combine_chunks(post_rerank=True) + return self._reranked_sections + + @property + def relevant_chunk_indices(self) -> list[int]: + # If chunks have been merged, then we cannot simply rely on the leading chunk + # relevance, there is no way to get the full relevance of the Section now + # without running a more token heavy pass. This can be an option but not + # implementing now. + if self.ran_merge_chunk: + return [] + + if self._relevant_chunk_indices is not None: + return self._relevant_chunk_indices + + # run first step of postprocessing generator if not already done + reranked_docs = self.reranked_chunks + + relevant_chunk_ids = next( + cast(Generator[list[str], None, None], self._postprocessing_generator) + ) + self._relevant_chunk_indices = [ + ind + for ind, chunk in enumerate(reranked_docs) + if chunk.unique_id in relevant_chunk_ids + ] + return self._relevant_chunk_indices + + @property + def chunk_relevance_list(self) -> list[bool]: + return [ + True if ind in self.relevant_chunk_indices else False + for ind in range(len(self.reranked_chunks)) + ] + + @property + def section_relevance_list(self) -> list[bool]: + if self.ran_merge_chunk: + return [False] * len(self.reranked_sections) + + return [ + True if ind in self.relevant_chunk_indices else False + for ind in range(len(self.reranked_chunks)) + ] diff --git a/backend/danswer/search/postprocessing/postprocessing.py b/backend/danswer/search/postprocessing/postprocessing.py new file mode 100644 index 000000000..f7c750eaf --- /dev/null +++ b/backend/danswer/search/postprocessing/postprocessing.py @@ -0,0 +1,223 @@ +from collections.abc import Callable +from collections.abc import Generator +from typing import cast + +import numpy + +from danswer.configs.model_configs import CROSS_ENCODER_RANGE_MAX +from danswer.configs.model_configs import CROSS_ENCODER_RANGE_MIN +from danswer.document_index.document_index_utils import ( + translate_boost_count_to_multiplier, +) +from danswer.search.models import ChunkMetric +from danswer.search.models import InferenceChunk +from danswer.search.models import MAX_METRICS_CONTENT +from danswer.search.models import RerankMetricsContainer +from danswer.search.models import SearchQuery +from danswer.search.models import SearchType +from danswer.search.search_nlp_models import CrossEncoderEnsembleModel +from danswer.secondary_llm_flows.chunk_usefulness import llm_batch_eval_chunks +from danswer.utils.logger import setup_logger +from danswer.utils.threadpool_concurrency import FunctionCall +from danswer.utils.threadpool_concurrency import run_functions_in_parallel +from danswer.utils.timing import log_function_time + + +logger = setup_logger() + + +def _log_top_chunk_links(search_flow: str, chunks: list[InferenceChunk]) -> None: + top_links = [ + c.source_links[0] if c.source_links is not None else "No Link" for c in chunks + ] + logger.info(f"Top links from {search_flow} search: {', '.join(top_links)}") + + +def should_rerank(query: SearchQuery) -> bool: + # Don't re-rank for keyword search + return query.search_type != SearchType.KEYWORD and not query.skip_rerank + + +def should_apply_llm_based_relevance_filter(query: SearchQuery) -> bool: + return not query.skip_llm_chunk_filter + + +@log_function_time(print_only=True) +def semantic_reranking( + query: str, + chunks: list[InferenceChunk], + model_min: int = CROSS_ENCODER_RANGE_MIN, + model_max: int = CROSS_ENCODER_RANGE_MAX, + rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, +) -> tuple[list[InferenceChunk], list[int]]: + """Reranks chunks based on cross-encoder models. Additionally provides the original indices + of the chunks in their new sorted order. + + Note: this updates the chunks in place, it updates the chunk scores which came from retrieval + """ + cross_encoders = CrossEncoderEnsembleModel() + passages = [chunk.content for chunk in chunks] + sim_scores_floats = cross_encoders.predict(query=query, passages=passages) + + sim_scores = [numpy.array(scores) for scores in sim_scores_floats] + + raw_sim_scores = cast(numpy.ndarray, sum(sim_scores) / len(sim_scores)) + + cross_models_min = numpy.min(sim_scores) + + shifted_sim_scores = sum( + [enc_n_scores - cross_models_min for enc_n_scores in sim_scores] + ) / len(sim_scores) + + boosts = [translate_boost_count_to_multiplier(chunk.boost) for chunk in chunks] + recency_multiplier = [chunk.recency_bias for chunk in chunks] + boosted_sim_scores = shifted_sim_scores * boosts * recency_multiplier + normalized_b_s_scores = (boosted_sim_scores + cross_models_min - model_min) / ( + model_max - model_min + ) + orig_indices = [i for i in range(len(normalized_b_s_scores))] + scored_results = list( + zip(normalized_b_s_scores, raw_sim_scores, chunks, orig_indices) + ) + scored_results.sort(key=lambda x: x[0], reverse=True) + ranked_sim_scores, ranked_raw_scores, ranked_chunks, ranked_indices = zip( + *scored_results + ) + + logger.debug( + f"Reranked (Boosted + Time Weighted) similarity scores: {ranked_sim_scores}" + ) + + # Assign new chunk scores based on reranking + for ind, chunk in enumerate(ranked_chunks): + chunk.score = ranked_sim_scores[ind] + + if rerank_metrics_callback is not None: + chunk_metrics = [ + ChunkMetric( + document_id=chunk.document_id, + chunk_content_start=chunk.content[:MAX_METRICS_CONTENT], + first_link=chunk.source_links[0] if chunk.source_links else None, + score=chunk.score if chunk.score is not None else 0, + ) + for chunk in ranked_chunks + ] + + rerank_metrics_callback( + RerankMetricsContainer( + metrics=chunk_metrics, raw_similarity_scores=ranked_raw_scores # type: ignore + ) + ) + + return list(ranked_chunks), list(ranked_indices) + + +def rerank_chunks( + query: SearchQuery, + chunks_to_rerank: list[InferenceChunk], + rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, +) -> list[InferenceChunk]: + ranked_chunks, _ = semantic_reranking( + query=query.query, + chunks=chunks_to_rerank[: query.num_rerank], + rerank_metrics_callback=rerank_metrics_callback, + ) + lower_chunks = chunks_to_rerank[query.num_rerank :] + # Scores from rerank cannot be meaningfully combined with scores without rerank + for lower_chunk in lower_chunks: + lower_chunk.score = None + ranked_chunks.extend(lower_chunks) + return ranked_chunks + + +@log_function_time(print_only=True) +def filter_chunks( + query: SearchQuery, + chunks_to_filter: list[InferenceChunk], +) -> list[str]: + """Filters chunks based on whether the LLM thought they were relevant to the query. + + Returns a list of the unique chunk IDs that were marked as relevant""" + chunks_to_filter = chunks_to_filter[: query.max_llm_filter_chunks] + llm_chunk_selection = llm_batch_eval_chunks( + query=query.query, + chunk_contents=[chunk.content for chunk in chunks_to_filter], + ) + return [ + chunk.unique_id + for ind, chunk in enumerate(chunks_to_filter) + if llm_chunk_selection[ind] + ] + + +def search_postprocessing( + search_query: SearchQuery, + retrieved_chunks: list[InferenceChunk], + rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, +) -> Generator[list[InferenceChunk] | list[str], None, None]: + post_processing_tasks: list[FunctionCall] = [] + + rerank_task_id = None + chunks_yielded = False + if should_rerank(search_query): + post_processing_tasks.append( + FunctionCall( + rerank_chunks, + ( + search_query, + retrieved_chunks, + rerank_metrics_callback, + ), + ) + ) + rerank_task_id = post_processing_tasks[-1].result_id + else: + final_chunks = retrieved_chunks + # NOTE: if we don't rerank, we can return the chunks immediately + # since we know this is the final order + _log_top_chunk_links(search_query.search_type.value, final_chunks) + yield final_chunks + chunks_yielded = True + + llm_filter_task_id = None + if should_apply_llm_based_relevance_filter(search_query): + post_processing_tasks.append( + FunctionCall( + filter_chunks, + (search_query, retrieved_chunks[: search_query.max_llm_filter_chunks]), + ) + ) + llm_filter_task_id = post_processing_tasks[-1].result_id + + post_processing_results = ( + run_functions_in_parallel(post_processing_tasks) + if post_processing_tasks + else {} + ) + reranked_chunks = cast( + list[InferenceChunk] | None, + post_processing_results.get(str(rerank_task_id)) if rerank_task_id else None, + ) + if reranked_chunks: + if chunks_yielded: + logger.error( + "Trying to yield re-ranked chunks, but chunks were already yielded. This should never happen." + ) + else: + _log_top_chunk_links(search_query.search_type.value, reranked_chunks) + yield reranked_chunks + + llm_chunk_selection = cast( + list[str] | None, + post_processing_results.get(str(llm_filter_task_id)) + if llm_filter_task_id + else None, + ) + if llm_chunk_selection is not None: + yield [ + chunk.unique_id + for chunk in reranked_chunks or retrieved_chunks + if chunk.unique_id in llm_chunk_selection + ] + else: + yield cast(list[str], []) diff --git a/backend/danswer/search/access_filters.py b/backend/danswer/search/preprocessing/access_filters.py similarity index 100% rename from backend/danswer/search/access_filters.py rename to backend/danswer/search/preprocessing/access_filters.py diff --git a/backend/danswer/search/danswer_helper.py b/backend/danswer/search/preprocessing/danswer_helper.py similarity index 92% rename from backend/danswer/search/danswer_helper.py rename to backend/danswer/search/preprocessing/danswer_helper.py index e3de6f923..88e465dac 100644 --- a/backend/danswer/search/danswer_helper.py +++ b/backend/danswer/search/preprocessing/danswer_helper.py @@ -1,17 +1,20 @@ -from transformers import AutoTokenizer # type:ignore +from typing import TYPE_CHECKING -from danswer.search.models import QueryFlow +from danswer.search.enums import QueryFlow from danswer.search.models import SearchType +from danswer.search.retrieval.search_runner import remove_stop_words_and_punctuation from danswer.search.search_nlp_models import get_default_tokenizer from danswer.search.search_nlp_models import IntentModel -from danswer.search.search_runner import remove_stop_words_and_punctuation from danswer.server.query_and_chat.models import HelperResponse from danswer.utils.logger import setup_logger logger = setup_logger() +if TYPE_CHECKING: + from transformers import AutoTokenizer # type:ignore -def count_unk_tokens(text: str, tokenizer: AutoTokenizer) -> int: + +def count_unk_tokens(text: str, tokenizer: "AutoTokenizer") -> int: """Unclear if the wordpiece tokenizer used is actually tokenizing anything as the [UNK] token It splits up even foreign characters and unicode emojis without using UNK""" tokenized_text = tokenizer.tokenize(text) diff --git a/backend/danswer/search/request_preprocessing.py b/backend/danswer/search/preprocessing/preprocessing.py similarity index 73% rename from backend/danswer/search/request_preprocessing.py rename to backend/danswer/search/preprocessing/preprocessing.py index e74618d39..ab22c5d67 100644 --- a/backend/danswer/search/request_preprocessing.py +++ b/backend/danswer/search/preprocessing/preprocessing.py @@ -5,25 +5,23 @@ from danswer.configs.chat_configs import DISABLE_LLM_CHUNK_FILTER from danswer.configs.chat_configs import DISABLE_LLM_FILTER_EXTRACTION from danswer.configs.chat_configs import FAVOR_RECENT_DECAY_MULTIPLIER from danswer.configs.chat_configs import NUM_RETURNED_HITS -from danswer.configs.model_configs import ENABLE_RERANKING_ASYNC_FLOW -from danswer.configs.model_configs import ENABLE_RERANKING_REAL_TIME_FLOW -from danswer.db.models import Persona from danswer.db.models import User -from danswer.search.access_filters import build_access_filters_for_user -from danswer.search.danswer_helper import query_intent +from danswer.search.enums import QueryFlow +from danswer.search.enums import RecencyBiasSetting from danswer.search.models import BaseFilters from danswer.search.models import IndexFilters -from danswer.search.models import QueryFlow -from danswer.search.models import RecencyBiasSetting -from danswer.search.models import RetrievalDetails from danswer.search.models import SearchQuery +from danswer.search.models import SearchRequest from danswer.search.models import SearchType +from danswer.search.preprocessing.access_filters import build_access_filters_for_user +from danswer.search.preprocessing.danswer_helper import query_intent from danswer.secondary_llm_flows.source_filter import extract_source_filter from danswer.secondary_llm_flows.time_filter import extract_time_filter from danswer.utils.logger import setup_logger from danswer.utils.threadpool_concurrency import FunctionCall from danswer.utils.threadpool_concurrency import run_functions_in_parallel from danswer.utils.timing import log_function_time +from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW logger = setup_logger() @@ -31,15 +29,12 @@ logger = setup_logger() @log_function_time(print_only=True) def retrieval_preprocessing( - query: str, - retrieval_details: RetrievalDetails, - persona: Persona, + search_request: SearchRequest, user: User | None, db_session: Session, bypass_acl: bool = False, include_query_intent: bool = True, - skip_rerank_realtime: bool = not ENABLE_RERANKING_REAL_TIME_FLOW, - skip_rerank_non_realtime: bool = not ENABLE_RERANKING_ASYNC_FLOW, + enable_auto_detect_filters: bool = False, disable_llm_filter_extraction: bool = DISABLE_LLM_FILTER_EXTRACTION, disable_llm_chunk_filter: bool = DISABLE_LLM_CHUNK_FILTER, base_recency_decay: float = BASE_RECENCY_DECAY, @@ -50,8 +45,12 @@ def retrieval_preprocessing( Then any filters or settings as part of the query are used Then defaults to Persona settings if not specified by the query """ + query = search_request.query + limit = search_request.limit + offset = search_request.offset + persona = search_request.persona - preset_filters = retrieval_details.filters or BaseFilters() + preset_filters = search_request.human_selected_filters or BaseFilters() if persona and persona.document_sets and preset_filters.document_set is None: preset_filters.document_set = [ document_set.name for document_set in persona.document_sets @@ -65,16 +64,20 @@ def retrieval_preprocessing( if disable_llm_filter_extraction: auto_detect_time_filter = False auto_detect_source_filter = False - elif retrieval_details.enable_auto_detect_filters is False: + elif enable_auto_detect_filters is False: logger.debug("Retrieval details disables auto detect filters") auto_detect_time_filter = False auto_detect_source_filter = False - elif persona.llm_filter_extraction is False: + elif persona and persona.llm_filter_extraction is False: logger.debug("Persona disables auto detect filters") auto_detect_time_filter = False auto_detect_source_filter = False - if time_filter is not None and persona.recency_bias != RecencyBiasSetting.AUTO: + if ( + time_filter is not None + and persona + and persona.recency_bias != RecencyBiasSetting.AUTO + ): auto_detect_time_filter = False logger.debug("Not extract time filter - already provided") if source_filter is not None: @@ -138,24 +141,29 @@ def retrieval_preprocessing( access_control_list=user_acl_filters, ) - # Tranformer-based re-ranking to run at same time as LLM chunk relevance filter - # This one is only set globally, not via query or Persona settings - skip_reranking = ( - skip_rerank_realtime - if retrieval_details.real_time - else skip_rerank_non_realtime - ) + llm_chunk_filter = False + if search_request.skip_llm_chunk_filter is not None: + llm_chunk_filter = not search_request.skip_llm_chunk_filter + elif persona: + llm_chunk_filter = persona.llm_relevance_filter - llm_chunk_filter = persona.llm_relevance_filter if disable_llm_chunk_filter: + if llm_chunk_filter: + logger.info( + "LLM chunk filtering would have run but has been globally disabled" + ) llm_chunk_filter = False + skip_rerank = search_request.skip_rerank + if skip_rerank is None: + skip_rerank = not ENABLE_RERANKING_REAL_TIME_FLOW + # Decays at 1 / (1 + (multiplier * num years)) - if persona.recency_bias == RecencyBiasSetting.NO_DECAY: + if persona and persona.recency_bias == RecencyBiasSetting.NO_DECAY: recency_bias_multiplier = 0.0 - elif persona.recency_bias == RecencyBiasSetting.BASE_DECAY: + elif persona and persona.recency_bias == RecencyBiasSetting.BASE_DECAY: recency_bias_multiplier = base_recency_decay - elif persona.recency_bias == RecencyBiasSetting.FAVOR_RECENT: + elif persona and persona.recency_bias == RecencyBiasSetting.FAVOR_RECENT: recency_bias_multiplier = base_recency_decay * favor_recent_decay_multiplier else: if predicted_favor_recent: @@ -166,15 +174,16 @@ def retrieval_preprocessing( return ( SearchQuery( query=query, - search_type=persona.search_type, + search_type=persona.search_type if persona else SearchType.HYBRID, filters=final_filters, recency_bias_multiplier=recency_bias_multiplier, - num_hits=retrieval_details.limit - if retrieval_details.limit is not None - else NUM_RETURNED_HITS, - offset=retrieval_details.offset or 0, - skip_rerank=skip_reranking, + num_hits=limit if limit is not None else NUM_RETURNED_HITS, + offset=offset or 0, + skip_rerank=skip_rerank, skip_llm_chunk_filter=not llm_chunk_filter, + chunks_above=search_request.chunks_above, + chunks_below=search_request.chunks_below, + full_doc=search_request.full_doc, ), predicted_search_type, predicted_flow, diff --git a/backend/danswer/search/retrieval/search_runner.py b/backend/danswer/search/retrieval/search_runner.py new file mode 100644 index 000000000..411db5b0f --- /dev/null +++ b/backend/danswer/search/retrieval/search_runner.py @@ -0,0 +1,285 @@ +import string +from collections.abc import Callable + +import nltk # type:ignore +from nltk.corpus import stopwords # type:ignore +from nltk.stem import WordNetLemmatizer # type:ignore +from nltk.tokenize import word_tokenize # type:ignore +from sqlalchemy.orm import Session + +from danswer.chat.models import LlmDoc +from danswer.configs.chat_configs import HYBRID_ALPHA +from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION +from danswer.db.embedding_model import get_current_db_embedding_model +from danswer.document_index.interfaces import DocumentIndex +from danswer.search.enums import EmbedTextType +from danswer.search.models import ChunkMetric +from danswer.search.models import IndexFilters +from danswer.search.models import InferenceChunk +from danswer.search.models import MAX_METRICS_CONTENT +from danswer.search.models import RetrievalMetricsContainer +from danswer.search.models import SearchQuery +from danswer.search.models import SearchType +from danswer.search.search_nlp_models import EmbeddingModel +from danswer.secondary_llm_flows.query_expansion import multilingual_query_expansion +from danswer.utils.logger import setup_logger +from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel +from danswer.utils.timing import log_function_time +from shared_configs.configs import MODEL_SERVER_HOST +from shared_configs.configs import MODEL_SERVER_PORT + + +logger = setup_logger() + + +def download_nltk_data() -> None: + resources = { + "stopwords": "corpora/stopwords", + "wordnet": "corpora/wordnet", + "punkt": "tokenizers/punkt", + } + + for resource_name, resource_path in resources.items(): + try: + nltk.data.find(resource_path) + logger.info(f"{resource_name} is already downloaded.") + except LookupError: + try: + logger.info(f"Downloading {resource_name}...") + nltk.download(resource_name, quiet=True) + logger.info(f"{resource_name} downloaded successfully.") + except Exception as e: + logger.error(f"Failed to download {resource_name}. Error: {e}") + + +def lemmatize_text(text: str) -> list[str]: + try: + lemmatizer = WordNetLemmatizer() + word_tokens = word_tokenize(text) + return [lemmatizer.lemmatize(word) for word in word_tokens] + except Exception: + return text.split(" ") + + +def remove_stop_words_and_punctuation(text: str) -> list[str]: + try: + stop_words = set(stopwords.words("english")) + word_tokens = word_tokenize(text) + text_trimmed = [ + word + for word in word_tokens + if (word.casefold() not in stop_words and word not in string.punctuation) + ] + return text_trimmed or word_tokens + except Exception: + return text.split(" ") + + +def query_processing( + query: str, +) -> str: + query = " ".join(remove_stop_words_and_punctuation(query)) + query = " ".join(lemmatize_text(query)) + return query + + +def combine_retrieval_results( + chunk_sets: list[list[InferenceChunk]], +) -> list[InferenceChunk]: + all_chunks = [chunk for chunk_set in chunk_sets for chunk in chunk_set] + + unique_chunks: dict[tuple[str, int], InferenceChunk] = {} + for chunk in all_chunks: + key = (chunk.document_id, chunk.chunk_id) + if key not in unique_chunks: + unique_chunks[key] = chunk + continue + + stored_chunk_score = unique_chunks[key].score or 0 + this_chunk_score = chunk.score or 0 + if stored_chunk_score < this_chunk_score: + unique_chunks[key] = chunk + + sorted_chunks = sorted( + unique_chunks.values(), key=lambda x: x.score or 0, reverse=True + ) + + return sorted_chunks + + +@log_function_time(print_only=True) +def doc_index_retrieval( + query: SearchQuery, + document_index: DocumentIndex, + db_session: Session, + hybrid_alpha: float = HYBRID_ALPHA, +) -> list[InferenceChunk]: + if query.search_type == SearchType.KEYWORD: + top_chunks = document_index.keyword_retrieval( + query=query.query, + filters=query.filters, + time_decay_multiplier=query.recency_bias_multiplier, + num_to_retrieve=query.num_hits, + ) + else: + db_embedding_model = get_current_db_embedding_model(db_session) + + model = EmbeddingModel( + model_name=db_embedding_model.model_name, + query_prefix=db_embedding_model.query_prefix, + passage_prefix=db_embedding_model.passage_prefix, + normalize=db_embedding_model.normalize, + # The below are globally set, this flow always uses the indexing one + server_host=MODEL_SERVER_HOST, + server_port=MODEL_SERVER_PORT, + ) + + query_embedding = model.encode([query.query], text_type=EmbedTextType.QUERY)[0] + + if query.search_type == SearchType.SEMANTIC: + top_chunks = document_index.semantic_retrieval( + query=query.query, + query_embedding=query_embedding, + filters=query.filters, + time_decay_multiplier=query.recency_bias_multiplier, + num_to_retrieve=query.num_hits, + ) + + elif query.search_type == SearchType.HYBRID: + top_chunks = document_index.hybrid_retrieval( + query=query.query, + query_embedding=query_embedding, + filters=query.filters, + time_decay_multiplier=query.recency_bias_multiplier, + num_to_retrieve=query.num_hits, + offset=query.offset, + hybrid_alpha=hybrid_alpha, + ) + + else: + raise RuntimeError("Invalid Search Flow") + + return top_chunks + + +def _simplify_text(text: str) -> str: + return "".join( + char for char in text if char not in string.punctuation and not char.isspace() + ).lower() + + +def retrieve_chunks( + query: SearchQuery, + document_index: DocumentIndex, + db_session: Session, + hybrid_alpha: float = HYBRID_ALPHA, # Only applicable to hybrid search + multilingual_expansion_str: str | None = MULTILINGUAL_QUERY_EXPANSION, + retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None] + | None = None, +) -> list[InferenceChunk]: + """Returns a list of the best chunks from an initial keyword/semantic/ hybrid search.""" + # Don't do query expansion on complex queries, rephrasings likely would not work well + if not multilingual_expansion_str or "\n" in query.query or "\r" in query.query: + top_chunks = doc_index_retrieval( + query=query, + document_index=document_index, + db_session=db_session, + hybrid_alpha=hybrid_alpha, + ) + else: + simplified_queries = set() + run_queries: list[tuple[Callable, tuple]] = [] + + # Currently only uses query expansion on multilingual use cases + query_rephrases = multilingual_query_expansion( + query.query, multilingual_expansion_str + ) + # Just to be extra sure, add the original query. + query_rephrases.append(query.query) + for rephrase in set(query_rephrases): + # Sometimes the model rephrases the query in the same language with minor changes + # Avoid doing an extra search with the minor changes as this biases the results + simplified_rephrase = _simplify_text(rephrase) + if simplified_rephrase in simplified_queries: + continue + simplified_queries.add(simplified_rephrase) + + q_copy = query.copy(update={"query": rephrase}, deep=True) + run_queries.append( + ( + doc_index_retrieval, + (q_copy, document_index, db_session, hybrid_alpha), + ) + ) + parallel_search_results = run_functions_tuples_in_parallel(run_queries) + top_chunks = combine_retrieval_results(parallel_search_results) + + if not top_chunks: + logger.info( + f"{query.search_type.value.capitalize()} search returned no results " + f"with filters: {query.filters}" + ) + return [] + + if retrieval_metrics_callback is not None: + chunk_metrics = [ + ChunkMetric( + document_id=chunk.document_id, + chunk_content_start=chunk.content[:MAX_METRICS_CONTENT], + first_link=chunk.source_links[0] if chunk.source_links else None, + score=chunk.score if chunk.score is not None else 0, + ) + for chunk in top_chunks + ] + retrieval_metrics_callback( + RetrievalMetricsContainer( + search_type=query.search_type, metrics=chunk_metrics + ) + ) + + return top_chunks + + +def combine_inference_chunks(inf_chunks: list[InferenceChunk]) -> LlmDoc: + if not inf_chunks: + raise ValueError("Cannot combine empty list of chunks") + + # Use the first link of the document + first_chunk = inf_chunks[0] + chunk_texts = [chunk.content for chunk in inf_chunks] + return LlmDoc( + document_id=first_chunk.document_id, + content="\n".join(chunk_texts), + blurb=first_chunk.blurb, + semantic_identifier=first_chunk.semantic_identifier, + source_type=first_chunk.source_type, + metadata=first_chunk.metadata, + updated_at=first_chunk.updated_at, + link=first_chunk.source_links[0] if first_chunk.source_links else None, + source_links=first_chunk.source_links, + ) + + +def inference_documents_from_ids( + doc_identifiers: list[tuple[str, int]], + document_index: DocumentIndex, +) -> list[LlmDoc]: + # Currently only fetches whole docs + doc_ids_set = set(doc_id for doc_id, chunk_id in doc_identifiers) + + # No need for ACL here because the doc ids were validated beforehand + filters = IndexFilters(access_control_list=None) + + functions_with_args: list[tuple[Callable, tuple]] = [ + (document_index.id_based_retrieval, (doc_id, None, None, filters)) + for doc_id in doc_ids_set + ] + + parallel_results = run_functions_tuples_in_parallel( + functions_with_args, allow_failures=True + ) + + # Any failures to retrieve would give a None, drop the Nones and empty lists + inference_chunks_sets = [res for res in parallel_results if res] + + return [combine_inference_chunks(chunk_set) for chunk_set in inference_chunks_sets] diff --git a/backend/danswer/search/search_nlp_models.py b/backend/danswer/search/search_nlp_models.py index 8b1f3b59f..761d9aa79 100644 --- a/backend/danswer/search/search_nlp_models.py +++ b/backend/danswer/search/search_nlp_models.py @@ -1,54 +1,39 @@ import gc -import logging import os -from enum import Enum +import time from typing import Optional from typing import TYPE_CHECKING -import numpy as np import requests +from transformers import logging as transformer_logging # type:ignore -from danswer.configs.app_configs import MODEL_SERVER_HOST -from danswer.configs.app_configs import MODEL_SERVER_PORT -from danswer.configs.model_configs import CROSS_EMBED_CONTEXT_SIZE -from danswer.configs.model_configs import CROSS_ENCODER_MODEL_ENSEMBLE from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE from danswer.configs.model_configs import DOCUMENT_ENCODER_MODEL -from danswer.configs.model_configs import INTENT_MODEL_VERSION -from danswer.configs.model_configs import QUERY_MAX_CONTEXT_SIZE +from danswer.search.enums import EmbedTextType from danswer.utils.logger import setup_logger -from shared_models.model_server_models import EmbedRequest -from shared_models.model_server_models import EmbedResponse -from shared_models.model_server_models import IntentRequest -from shared_models.model_server_models import IntentResponse -from shared_models.model_server_models import RerankRequest -from shared_models.model_server_models import RerankResponse +from shared_configs.configs import MODEL_SERVER_HOST +from shared_configs.configs import MODEL_SERVER_PORT +from shared_configs.model_server_models import EmbedRequest +from shared_configs.model_server_models import EmbedResponse +from shared_configs.model_server_models import IntentRequest +from shared_configs.model_server_models import IntentResponse +from shared_configs.model_server_models import RerankRequest +from shared_configs.model_server_models import RerankResponse +transformer_logging.set_verbosity_error() os.environ["TOKENIZERS_PARALLELISM"] = "false" +os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1" +os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1" logger = setup_logger() -# Remove useless info about layer initialization -logging.getLogger("transformers").setLevel(logging.ERROR) if TYPE_CHECKING: - from sentence_transformers import CrossEncoder # type: ignore - from sentence_transformers import SentenceTransformer # type: ignore from transformers import AutoTokenizer # type: ignore - from transformers import TFDistilBertForSequenceClassification # type: ignore _TOKENIZER: tuple[Optional["AutoTokenizer"], str | None] = (None, None) -_EMBED_MODEL: tuple[Optional["SentenceTransformer"], str | None] = (None, None) -_RERANK_MODELS: Optional[list["CrossEncoder"]] = None -_INTENT_TOKENIZER: Optional["AutoTokenizer"] = None -_INTENT_MODEL: Optional["TFDistilBertForSequenceClassification"] = None - - -class EmbedTextType(str, Enum): - QUERY = "query" - PASSAGE = "passage" def clean_model_name(model_str: str) -> str: @@ -82,86 +67,10 @@ def get_default_tokenizer(model_name: str | None = None) -> "AutoTokenizer": return _TOKENIZER[0] -def get_local_embedding_model( - model_name: str, - max_context_length: int = DOC_EMBEDDING_CONTEXT_SIZE, -) -> "SentenceTransformer": - # NOTE: doing a local import here to avoid reduce memory usage caused by - # processes importing this file despite not using any of this - from sentence_transformers import SentenceTransformer # type: ignore - - global _EMBED_MODEL - if ( - _EMBED_MODEL[0] is None - or max_context_length != _EMBED_MODEL[0].max_seq_length - or model_name != _EMBED_MODEL[1] - ): - if _EMBED_MODEL[0] is not None: - del _EMBED_MODEL - gc.collect() - - logger.info(f"Loading {model_name}") - _EMBED_MODEL = (SentenceTransformer(model_name), model_name) - _EMBED_MODEL[0].max_seq_length = max_context_length - return _EMBED_MODEL[0] - - -def get_local_reranking_model_ensemble( - model_names: list[str] = CROSS_ENCODER_MODEL_ENSEMBLE, - max_context_length: int = CROSS_EMBED_CONTEXT_SIZE, -) -> list["CrossEncoder"]: - # NOTE: doing a local import here to avoid reduce memory usage caused by - # processes importing this file despite not using any of this - from sentence_transformers import CrossEncoder - - global _RERANK_MODELS - if _RERANK_MODELS is None or max_context_length != _RERANK_MODELS[0].max_length: - _RERANK_MODELS = [] - for model_name in model_names: - logger.info(f"Loading {model_name}") - model = CrossEncoder(model_name) - model.max_length = max_context_length - _RERANK_MODELS.append(model) - return _RERANK_MODELS - - -def get_intent_model_tokenizer( - model_name: str = INTENT_MODEL_VERSION, -) -> "AutoTokenizer": - # NOTE: doing a local import here to avoid reduce memory usage caused by - # processes importing this file despite not using any of this - from transformers import AutoTokenizer # type: ignore - - global _INTENT_TOKENIZER - if _INTENT_TOKENIZER is None: - _INTENT_TOKENIZER = AutoTokenizer.from_pretrained(model_name) - return _INTENT_TOKENIZER - - -def get_local_intent_model( - model_name: str = INTENT_MODEL_VERSION, - max_context_length: int = QUERY_MAX_CONTEXT_SIZE, -) -> "TFDistilBertForSequenceClassification": - # NOTE: doing a local import here to avoid reduce memory usage caused by - # processes importing this file despite not using any of this - from transformers import TFDistilBertForSequenceClassification # type: ignore - - global _INTENT_MODEL - if _INTENT_MODEL is None or max_context_length != _INTENT_MODEL.max_seq_length: - _INTENT_MODEL = TFDistilBertForSequenceClassification.from_pretrained( - model_name - ) - _INTENT_MODEL.max_seq_length = max_context_length - return _INTENT_MODEL - - def build_model_server_url( - model_server_host: str | None, - model_server_port: int | None, -) -> str | None: - if not model_server_host or model_server_port is None: - return None - + model_server_host: str, + model_server_port: int, +) -> str: model_server_url = f"{model_server_host}:{model_server_port}" # use protocol if provided @@ -179,8 +88,8 @@ class EmbeddingModel: query_prefix: str | None, passage_prefix: str | None, normalize: bool, - server_host: str | None, # Changes depending on indexing or inference - server_port: int | None, + server_host: str, # Changes depending on indexing or inference + server_port: int, # The following are globals are currently not configurable max_seq_length: int = DOC_EMBEDDING_CONTEXT_SIZE, ) -> None: @@ -191,17 +100,7 @@ class EmbeddingModel: self.normalize = normalize model_server_url = build_model_server_url(server_host, server_port) - self.embed_server_endpoint = ( - f"{model_server_url}/encoder/bi-encoder-embed" if model_server_url else None - ) - - def load_model(self) -> Optional["SentenceTransformer"]: - if self.embed_server_endpoint: - return None - - return get_local_embedding_model( - model_name=self.model_name, max_context_length=self.max_seq_length - ) + self.embed_server_endpoint = f"{model_server_url}/encoder/bi-encoder-embed" def encode(self, texts: list[str], text_type: EmbedTextType) -> list[list[float]]: if text_type == EmbedTextType.QUERY and self.query_prefix: @@ -211,157 +110,67 @@ class EmbeddingModel: else: prefixed_texts = texts - if self.embed_server_endpoint: - embed_request = EmbedRequest( - texts=prefixed_texts, - model_name=self.model_name, - normalize_embeddings=self.normalize, - ) + embed_request = EmbedRequest( + texts=prefixed_texts, + model_name=self.model_name, + max_context_length=self.max_seq_length, + normalize_embeddings=self.normalize, + ) - try: - response = requests.post( - self.embed_server_endpoint, json=embed_request.dict() - ) - response.raise_for_status() + response = requests.post(self.embed_server_endpoint, json=embed_request.dict()) + response.raise_for_status() - return EmbedResponse(**response.json()).embeddings - except requests.RequestException as e: - logger.exception(f"Failed to get Embedding: {e}") - raise - - local_model = self.load_model() - - if local_model is None: - raise RuntimeError("Failed to load local Embedding Model") - - return local_model.encode( - prefixed_texts, normalize_embeddings=self.normalize - ).tolist() + return EmbedResponse(**response.json()).embeddings class CrossEncoderEnsembleModel: def __init__( self, - model_names: list[str] = CROSS_ENCODER_MODEL_ENSEMBLE, - max_seq_length: int = CROSS_EMBED_CONTEXT_SIZE, - model_server_host: str | None = MODEL_SERVER_HOST, + model_server_host: str = MODEL_SERVER_HOST, model_server_port: int = MODEL_SERVER_PORT, ) -> None: - self.model_names = model_names - self.max_seq_length = max_seq_length - model_server_url = build_model_server_url(model_server_host, model_server_port) - self.rerank_server_endpoint = ( - model_server_url + "/encoder/cross-encoder-scores" - if model_server_url - else None - ) - - def load_model(self) -> list["CrossEncoder"] | None: - if self.rerank_server_endpoint: - return None - - return get_local_reranking_model_ensemble( - model_names=self.model_names, max_context_length=self.max_seq_length - ) + self.rerank_server_endpoint = model_server_url + "/encoder/cross-encoder-scores" def predict(self, query: str, passages: list[str]) -> list[list[float]]: - if self.rerank_server_endpoint: - rerank_request = RerankRequest(query=query, documents=passages) + rerank_request = RerankRequest(query=query, documents=passages) - try: - response = requests.post( - self.rerank_server_endpoint, json=rerank_request.dict() - ) - response.raise_for_status() + response = requests.post( + self.rerank_server_endpoint, json=rerank_request.dict() + ) + response.raise_for_status() - return RerankResponse(**response.json()).scores - except requests.RequestException as e: - logger.exception(f"Failed to get Reranking Scores: {e}") - raise - - local_models = self.load_model() - - if local_models is None: - raise RuntimeError("Failed to load local Reranking Model Ensemble") - - scores = [ - cross_encoder.predict([(query, passage) for passage in passages]).tolist() # type: ignore - for cross_encoder in local_models - ] - - return scores + return RerankResponse(**response.json()).scores class IntentModel: def __init__( self, - model_name: str = INTENT_MODEL_VERSION, - max_seq_length: int = QUERY_MAX_CONTEXT_SIZE, - model_server_host: str | None = MODEL_SERVER_HOST, + model_server_host: str = MODEL_SERVER_HOST, model_server_port: int = MODEL_SERVER_PORT, ) -> None: - self.model_name = model_name - self.max_seq_length = max_seq_length - model_server_url = build_model_server_url(model_server_host, model_server_port) - self.intent_server_endpoint = ( - model_server_url + "/custom/intent-model" if model_server_url else None - ) - - def load_model(self) -> Optional["SentenceTransformer"]: - if self.intent_server_endpoint: - return None - - return get_local_intent_model( - model_name=self.model_name, max_context_length=self.max_seq_length - ) + self.intent_server_endpoint = model_server_url + "/custom/intent-model" def predict( self, query: str, ) -> list[float]: - # NOTE: doing a local import here to avoid reduce memory usage caused by - # processes importing this file despite not using any of this - import tensorflow as tf # type: ignore + intent_request = IntentRequest(query=query) - if self.intent_server_endpoint: - intent_request = IntentRequest(query=query) - - try: - response = requests.post( - self.intent_server_endpoint, json=intent_request.dict() - ) - response.raise_for_status() - - return IntentResponse(**response.json()).class_probs - except requests.RequestException as e: - logger.exception(f"Failed to get Embedding: {e}") - raise - - tokenizer = get_intent_model_tokenizer() - local_model = self.load_model() - - if local_model is None: - raise RuntimeError("Failed to load local Intent Model") - - intent_model = get_local_intent_model() - model_input = tokenizer( - query, return_tensors="tf", truncation=True, padding=True + response = requests.post( + self.intent_server_endpoint, json=intent_request.dict() ) + response.raise_for_status() - predictions = intent_model(model_input)[0] - probabilities = tf.nn.softmax(predictions, axis=-1) - class_percentages = np.round(probabilities.numpy() * 100, 2) - - return list(class_percentages.tolist()[0]) + return IntentResponse(**response.json()).class_probs -def warm_up_models( +def warm_up_encoders( model_name: str, normalize: bool, - skip_cross_encoders: bool = False, - indexer_only: bool = False, + model_server_host: str = MODEL_SERVER_HOST, + model_server_port: int = MODEL_SERVER_PORT, ) -> None: warm_up_str = ( "Danswer is amazing! Check out our easy deployment guide at " @@ -373,23 +182,23 @@ def warm_up_models( embed_model = EmbeddingModel( model_name=model_name, normalize=normalize, - # These don't matter, if it's a remote model, this function shouldn't be called + # Not a big deal if prefix is incorrect query_prefix=None, passage_prefix=None, - server_host=None, - server_port=None, + server_host=model_server_host, + server_port=model_server_port, ) - embed_model.encode(texts=[warm_up_str], text_type=EmbedTextType.QUERY) - - if indexer_only: - return - - if not skip_cross_encoders: - CrossEncoderEnsembleModel().predict(query=warm_up_str, passages=[warm_up_str]) - - intent_tokenizer = get_intent_model_tokenizer() - inputs = intent_tokenizer( - warm_up_str, return_tensors="tf", truncation=True, padding=True - ) - get_local_intent_model()(inputs) + # First time downloading the models it may take even longer, but just in case, + # retry the whole server + wait_time = 5 + for attempt in range(20): + try: + embed_model.encode(texts=[warm_up_str], text_type=EmbedTextType.QUERY) + return + except Exception: + logger.exception( + f"Failed to run test embedding, retrying in {wait_time} seconds..." + ) + time.sleep(wait_time) + raise Exception("Failed to run test embedding.") diff --git a/backend/danswer/search/search_runner.py b/backend/danswer/search/search_runner.py deleted file mode 100644 index 943b696ac..000000000 --- a/backend/danswer/search/search_runner.py +++ /dev/null @@ -1,645 +0,0 @@ -import string -from collections.abc import Callable -from collections.abc import Iterator -from typing import cast - -import numpy -from nltk.corpus import stopwords # type:ignore -from nltk.stem import WordNetLemmatizer # type:ignore -from nltk.tokenize import word_tokenize # type:ignore -from sqlalchemy.orm import Session - -from danswer.chat.models import LlmDoc -from danswer.configs.app_configs import MODEL_SERVER_HOST -from danswer.configs.app_configs import MODEL_SERVER_PORT -from danswer.configs.chat_configs import HYBRID_ALPHA -from danswer.configs.chat_configs import MULTILINGUAL_QUERY_EXPANSION -from danswer.configs.chat_configs import NUM_RERANKED_RESULTS -from danswer.configs.model_configs import CROSS_ENCODER_RANGE_MAX -from danswer.configs.model_configs import CROSS_ENCODER_RANGE_MIN -from danswer.configs.model_configs import SIM_SCORE_RANGE_HIGH -from danswer.configs.model_configs import SIM_SCORE_RANGE_LOW -from danswer.db.embedding_model import get_current_db_embedding_model -from danswer.document_index.document_index_utils import ( - translate_boost_count_to_multiplier, -) -from danswer.document_index.interfaces import DocumentIndex -from danswer.indexing.models import InferenceChunk -from danswer.search.models import ChunkMetric -from danswer.search.models import IndexFilters -from danswer.search.models import MAX_METRICS_CONTENT -from danswer.search.models import RerankMetricsContainer -from danswer.search.models import RetrievalMetricsContainer -from danswer.search.models import SearchDoc -from danswer.search.models import SearchQuery -from danswer.search.models import SearchType -from danswer.search.search_nlp_models import CrossEncoderEnsembleModel -from danswer.search.search_nlp_models import EmbeddingModel -from danswer.search.search_nlp_models import EmbedTextType -from danswer.secondary_llm_flows.chunk_usefulness import llm_batch_eval_chunks -from danswer.secondary_llm_flows.query_expansion import multilingual_query_expansion -from danswer.utils.logger import setup_logger -from danswer.utils.threadpool_concurrency import FunctionCall -from danswer.utils.threadpool_concurrency import run_functions_in_parallel -from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel -from danswer.utils.timing import log_function_time - - -logger = setup_logger() - - -def _log_top_chunk_links(search_flow: str, chunks: list[InferenceChunk]) -> None: - top_links = [ - c.source_links[0] if c.source_links is not None else "No Link" for c in chunks - ] - logger.info(f"Top links from {search_flow} search: {', '.join(top_links)}") - - -def lemmatize_text(text: str) -> list[str]: - lemmatizer = WordNetLemmatizer() - word_tokens = word_tokenize(text) - return [lemmatizer.lemmatize(word) for word in word_tokens] - - -def remove_stop_words_and_punctuation(text: str) -> list[str]: - stop_words = set(stopwords.words("english")) - word_tokens = word_tokenize(text) - text_trimmed = [ - word - for word in word_tokens - if (word.casefold() not in stop_words and word not in string.punctuation) - ] - return text_trimmed or word_tokens - - -def query_processing( - query: str, -) -> str: - query = " ".join(remove_stop_words_and_punctuation(query)) - query = " ".join(lemmatize_text(query)) - return query - - -def chunks_to_search_docs(chunks: list[InferenceChunk] | None) -> list[SearchDoc]: - search_docs = ( - [ - SearchDoc( - document_id=chunk.document_id, - chunk_ind=chunk.chunk_id, - semantic_identifier=chunk.semantic_identifier or "Unknown", - link=chunk.source_links.get(0) if chunk.source_links else None, - blurb=chunk.blurb, - source_type=chunk.source_type, - boost=chunk.boost, - hidden=chunk.hidden, - metadata=chunk.metadata, - score=chunk.score, - match_highlights=chunk.match_highlights, - updated_at=chunk.updated_at, - primary_owners=chunk.primary_owners, - secondary_owners=chunk.secondary_owners, - ) - for chunk in chunks - ] - if chunks - else [] - ) - return search_docs - - -def combine_retrieval_results( - chunk_sets: list[list[InferenceChunk]], -) -> list[InferenceChunk]: - all_chunks = [chunk for chunk_set in chunk_sets for chunk in chunk_set] - - unique_chunks: dict[tuple[str, int], InferenceChunk] = {} - for chunk in all_chunks: - key = (chunk.document_id, chunk.chunk_id) - if key not in unique_chunks: - unique_chunks[key] = chunk - continue - - stored_chunk_score = unique_chunks[key].score or 0 - this_chunk_score = chunk.score or 0 - if stored_chunk_score < this_chunk_score: - unique_chunks[key] = chunk - - sorted_chunks = sorted( - unique_chunks.values(), key=lambda x: x.score or 0, reverse=True - ) - - return sorted_chunks - - -@log_function_time(print_only=True) -def doc_index_retrieval( - query: SearchQuery, - document_index: DocumentIndex, - db_session: Session, - hybrid_alpha: float = HYBRID_ALPHA, -) -> list[InferenceChunk]: - if query.search_type == SearchType.KEYWORD: - top_chunks = document_index.keyword_retrieval( - query=query.query, - filters=query.filters, - time_decay_multiplier=query.recency_bias_multiplier, - num_to_retrieve=query.num_hits, - ) - else: - db_embedding_model = get_current_db_embedding_model(db_session) - - model = EmbeddingModel( - model_name=db_embedding_model.model_name, - query_prefix=db_embedding_model.query_prefix, - passage_prefix=db_embedding_model.passage_prefix, - normalize=db_embedding_model.normalize, - # The below are globally set, this flow always uses the indexing one - server_host=MODEL_SERVER_HOST, - server_port=MODEL_SERVER_PORT, - ) - - query_embedding = model.encode([query.query], text_type=EmbedTextType.QUERY)[0] - - if query.search_type == SearchType.SEMANTIC: - top_chunks = document_index.semantic_retrieval( - query=query.query, - query_embedding=query_embedding, - filters=query.filters, - time_decay_multiplier=query.recency_bias_multiplier, - num_to_retrieve=query.num_hits, - ) - - elif query.search_type == SearchType.HYBRID: - top_chunks = document_index.hybrid_retrieval( - query=query.query, - query_embedding=query_embedding, - filters=query.filters, - time_decay_multiplier=query.recency_bias_multiplier, - num_to_retrieve=query.num_hits, - offset=query.offset, - hybrid_alpha=hybrid_alpha, - ) - - else: - raise RuntimeError("Invalid Search Flow") - - return top_chunks - - -@log_function_time(print_only=True) -def semantic_reranking( - query: str, - chunks: list[InferenceChunk], - rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, - model_min: int = CROSS_ENCODER_RANGE_MIN, - model_max: int = CROSS_ENCODER_RANGE_MAX, -) -> tuple[list[InferenceChunk], list[int]]: - """Reranks chunks based on cross-encoder models. Additionally provides the original indices - of the chunks in their new sorted order. - - Note: this updates the chunks in place, it updates the chunk scores which came from retrieval - """ - cross_encoders = CrossEncoderEnsembleModel() - passages = [chunk.content for chunk in chunks] - sim_scores_floats = cross_encoders.predict(query=query, passages=passages) - - sim_scores = [numpy.array(scores) for scores in sim_scores_floats] - - raw_sim_scores = cast(numpy.ndarray, sum(sim_scores) / len(sim_scores)) - - cross_models_min = numpy.min(sim_scores) - - shifted_sim_scores = sum( - [enc_n_scores - cross_models_min for enc_n_scores in sim_scores] - ) / len(sim_scores) - - boosts = [translate_boost_count_to_multiplier(chunk.boost) for chunk in chunks] - recency_multiplier = [chunk.recency_bias for chunk in chunks] - boosted_sim_scores = shifted_sim_scores * boosts * recency_multiplier - normalized_b_s_scores = (boosted_sim_scores + cross_models_min - model_min) / ( - model_max - model_min - ) - orig_indices = [i for i in range(len(normalized_b_s_scores))] - scored_results = list( - zip(normalized_b_s_scores, raw_sim_scores, chunks, orig_indices) - ) - scored_results.sort(key=lambda x: x[0], reverse=True) - ranked_sim_scores, ranked_raw_scores, ranked_chunks, ranked_indices = zip( - *scored_results - ) - - logger.debug( - f"Reranked (Boosted + Time Weighted) similarity scores: {ranked_sim_scores}" - ) - - # Assign new chunk scores based on reranking - for ind, chunk in enumerate(ranked_chunks): - chunk.score = ranked_sim_scores[ind] - - if rerank_metrics_callback is not None: - chunk_metrics = [ - ChunkMetric( - document_id=chunk.document_id, - chunk_content_start=chunk.content[:MAX_METRICS_CONTENT], - first_link=chunk.source_links[0] if chunk.source_links else None, - score=chunk.score if chunk.score is not None else 0, - ) - for chunk in ranked_chunks - ] - - rerank_metrics_callback( - RerankMetricsContainer( - metrics=chunk_metrics, raw_similarity_scores=ranked_raw_scores - ) - ) - - return list(ranked_chunks), list(ranked_indices) - - -def apply_boost_legacy( - chunks: list[InferenceChunk], - norm_min: float = SIM_SCORE_RANGE_LOW, - norm_max: float = SIM_SCORE_RANGE_HIGH, -) -> list[InferenceChunk]: - scores = [chunk.score or 0 for chunk in chunks] - boosts = [translate_boost_count_to_multiplier(chunk.boost) for chunk in chunks] - - logger.debug(f"Raw similarity scores: {scores}") - - score_min = min(scores) - score_max = max(scores) - score_range = score_max - score_min - - if score_range != 0: - boosted_scores = [ - ((score - score_min) / score_range) * boost - for score, boost in zip(scores, boosts) - ] - unnormed_boosted_scores = [ - score * score_range + score_min for score in boosted_scores - ] - else: - unnormed_boosted_scores = [ - score * boost for score, boost in zip(scores, boosts) - ] - - norm_min = min(norm_min, min(scores)) - norm_max = max(norm_max, max(scores)) - # This should never be 0 unless user has done some weird/wrong settings - norm_range = norm_max - norm_min - - # For score display purposes - if norm_range != 0: - re_normed_scores = [ - ((score - norm_min) / norm_range) for score in unnormed_boosted_scores - ] - else: - re_normed_scores = unnormed_boosted_scores - - rescored_chunks = list(zip(re_normed_scores, chunks)) - rescored_chunks.sort(key=lambda x: x[0], reverse=True) - sorted_boosted_scores, boost_sorted_chunks = zip(*rescored_chunks) - - final_chunks = list(boost_sorted_chunks) - final_scores = list(sorted_boosted_scores) - for ind, chunk in enumerate(final_chunks): - chunk.score = final_scores[ind] - - logger.debug(f"Boost sorted similary scores: {list(final_scores)}") - - return final_chunks - - -def apply_boost( - chunks: list[InferenceChunk], - # Need the range of values to not be too spread out for applying boost - # therefore norm across only the top few results - norm_cutoff: int = NUM_RERANKED_RESULTS, - norm_min: float = SIM_SCORE_RANGE_LOW, - norm_max: float = SIM_SCORE_RANGE_HIGH, -) -> list[InferenceChunk]: - scores = [chunk.score or 0.0 for chunk in chunks] - logger.debug(f"Raw similarity scores: {scores}") - - boosts = [translate_boost_count_to_multiplier(chunk.boost) for chunk in chunks] - recency_multiplier = [chunk.recency_bias for chunk in chunks] - - norm_min = min(norm_min, min(scores[:norm_cutoff])) - norm_max = max(norm_max, max(scores[:norm_cutoff])) - # This should never be 0 unless user has done some weird/wrong settings - norm_range = norm_max - norm_min - - boosted_scores = [ - max(0, (score - norm_min) * boost * recency / norm_range) - for score, boost, recency in zip(scores, boosts, recency_multiplier) - ] - - rescored_chunks = list(zip(boosted_scores, chunks)) - rescored_chunks.sort(key=lambda x: x[0], reverse=True) - sorted_boosted_scores, boost_sorted_chunks = zip(*rescored_chunks) - - final_chunks = list(boost_sorted_chunks) - final_scores = list(sorted_boosted_scores) - for ind, chunk in enumerate(final_chunks): - chunk.score = final_scores[ind] - - logger.debug( - f"Boosted + Time Weighted sorted similarity scores: {list(final_scores)}" - ) - - return final_chunks - - -def _simplify_text(text: str) -> str: - return "".join( - char for char in text if char not in string.punctuation and not char.isspace() - ).lower() - - -def retrieve_chunks( - query: SearchQuery, - document_index: DocumentIndex, - db_session: Session, - hybrid_alpha: float = HYBRID_ALPHA, # Only applicable to hybrid search - multilingual_expansion_str: str | None = MULTILINGUAL_QUERY_EXPANSION, - retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None] - | None = None, -) -> list[InferenceChunk]: - """Returns a list of the best chunks from an initial keyword/semantic/ hybrid search.""" - # Don't do query expansion on complex queries, rephrasings likely would not work well - if not multilingual_expansion_str or "\n" in query.query or "\r" in query.query: - top_chunks = doc_index_retrieval( - query=query, - document_index=document_index, - db_session=db_session, - hybrid_alpha=hybrid_alpha, - ) - else: - simplified_queries = set() - run_queries: list[tuple[Callable, tuple]] = [] - - # Currently only uses query expansion on multilingual use cases - query_rephrases = multilingual_query_expansion( - query.query, multilingual_expansion_str - ) - # Just to be extra sure, add the original query. - query_rephrases.append(query.query) - for rephrase in set(query_rephrases): - # Sometimes the model rephrases the query in the same language with minor changes - # Avoid doing an extra search with the minor changes as this biases the results - simplified_rephrase = _simplify_text(rephrase) - if simplified_rephrase in simplified_queries: - continue - simplified_queries.add(simplified_rephrase) - - q_copy = query.copy(update={"query": rephrase}, deep=True) - run_queries.append( - ( - doc_index_retrieval, - (q_copy, document_index, db_session, hybrid_alpha), - ) - ) - parallel_search_results = run_functions_tuples_in_parallel(run_queries) - top_chunks = combine_retrieval_results(parallel_search_results) - - if not top_chunks: - logger.info( - f"{query.search_type.value.capitalize()} search returned no results " - f"with filters: {query.filters}" - ) - return [] - - if retrieval_metrics_callback is not None: - chunk_metrics = [ - ChunkMetric( - document_id=chunk.document_id, - chunk_content_start=chunk.content[:MAX_METRICS_CONTENT], - first_link=chunk.source_links[0] if chunk.source_links else None, - score=chunk.score if chunk.score is not None else 0, - ) - for chunk in top_chunks - ] - retrieval_metrics_callback( - RetrievalMetricsContainer( - search_type=query.search_type, metrics=chunk_metrics - ) - ) - - return top_chunks - - -def should_rerank(query: SearchQuery) -> bool: - # Don't re-rank for keyword search - return query.search_type != SearchType.KEYWORD and not query.skip_rerank - - -def should_apply_llm_based_relevance_filter(query: SearchQuery) -> bool: - return not query.skip_llm_chunk_filter - - -def rerank_chunks( - query: SearchQuery, - chunks_to_rerank: list[InferenceChunk], - rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, -) -> list[InferenceChunk]: - ranked_chunks, _ = semantic_reranking( - query=query.query, - chunks=chunks_to_rerank[: query.num_rerank], - rerank_metrics_callback=rerank_metrics_callback, - ) - lower_chunks = chunks_to_rerank[query.num_rerank :] - # Scores from rerank cannot be meaningfully combined with scores without rerank - for lower_chunk in lower_chunks: - lower_chunk.score = None - ranked_chunks.extend(lower_chunks) - return ranked_chunks - - -@log_function_time(print_only=True) -def filter_chunks( - query: SearchQuery, - chunks_to_filter: list[InferenceChunk], -) -> list[str]: - """Filters chunks based on whether the LLM thought they were relevant to the query. - - Returns a list of the unique chunk IDs that were marked as relevant""" - chunks_to_filter = chunks_to_filter[: query.max_llm_filter_chunks] - llm_chunk_selection = llm_batch_eval_chunks( - query=query.query, - chunk_contents=[chunk.content for chunk in chunks_to_filter], - ) - return [ - chunk.unique_id - for ind, chunk in enumerate(chunks_to_filter) - if llm_chunk_selection[ind] - ] - - -def full_chunk_search( - query: SearchQuery, - document_index: DocumentIndex, - db_session: Session, - hybrid_alpha: float = HYBRID_ALPHA, # Only applicable to hybrid search - multilingual_expansion_str: str | None = MULTILINGUAL_QUERY_EXPANSION, - retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None] - | None = None, - rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, -) -> tuple[list[InferenceChunk], list[bool]]: - """A utility which provides an easier interface than `full_chunk_search_generator`. - Rather than returning the chunks and llm relevance filter results in two separate - yields, just returns them both at once.""" - search_generator = full_chunk_search_generator( - search_query=query, - document_index=document_index, - db_session=db_session, - hybrid_alpha=hybrid_alpha, - multilingual_expansion_str=multilingual_expansion_str, - retrieval_metrics_callback=retrieval_metrics_callback, - rerank_metrics_callback=rerank_metrics_callback, - ) - top_chunks = cast(list[InferenceChunk], next(search_generator)) - llm_chunk_selection = cast(list[bool], next(search_generator)) - return top_chunks, llm_chunk_selection - - -def empty_search_generator() -> Iterator[list[InferenceChunk] | list[bool]]: - yield cast(list[InferenceChunk], []) - yield cast(list[bool], []) - - -def full_chunk_search_generator( - search_query: SearchQuery, - document_index: DocumentIndex, - db_session: Session, - hybrid_alpha: float = HYBRID_ALPHA, # Only applicable to hybrid search - multilingual_expansion_str: str | None = MULTILINGUAL_QUERY_EXPANSION, - retrieval_metrics_callback: Callable[[RetrievalMetricsContainer], None] - | None = None, - rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, -) -> Iterator[list[InferenceChunk] | list[bool]]: - """Always yields twice. Once with the selected chunks and once with the LLM relevance filter result. - If LLM filter results are turned off, returns a list of False - """ - chunks_yielded = False - - retrieved_chunks = retrieve_chunks( - query=search_query, - document_index=document_index, - db_session=db_session, - hybrid_alpha=hybrid_alpha, - multilingual_expansion_str=multilingual_expansion_str, - retrieval_metrics_callback=retrieval_metrics_callback, - ) - - if not retrieved_chunks: - yield cast(list[InferenceChunk], []) - yield cast(list[bool], []) - return - - post_processing_tasks: list[FunctionCall] = [] - - rerank_task_id = None - if should_rerank(search_query): - post_processing_tasks.append( - FunctionCall( - rerank_chunks, - ( - search_query, - retrieved_chunks, - rerank_metrics_callback, - ), - ) - ) - rerank_task_id = post_processing_tasks[-1].result_id - else: - final_chunks = retrieved_chunks - # NOTE: if we don't rerank, we can return the chunks immediately - # since we know this is the final order - _log_top_chunk_links(search_query.search_type.value, final_chunks) - yield final_chunks - chunks_yielded = True - - llm_filter_task_id = None - if should_apply_llm_based_relevance_filter(search_query): - post_processing_tasks.append( - FunctionCall( - filter_chunks, - (search_query, retrieved_chunks[: search_query.max_llm_filter_chunks]), - ) - ) - llm_filter_task_id = post_processing_tasks[-1].result_id - - post_processing_results = ( - run_functions_in_parallel(post_processing_tasks) - if post_processing_tasks - else {} - ) - reranked_chunks = cast( - list[InferenceChunk] | None, - post_processing_results.get(str(rerank_task_id)) if rerank_task_id else None, - ) - if reranked_chunks: - if chunks_yielded: - logger.error( - "Trying to yield re-ranked chunks, but chunks were already yielded. This should never happen." - ) - else: - _log_top_chunk_links(search_query.search_type.value, reranked_chunks) - yield reranked_chunks - - llm_chunk_selection = cast( - list[str] | None, - post_processing_results.get(str(llm_filter_task_id)) - if llm_filter_task_id - else None, - ) - if llm_chunk_selection is not None: - yield [ - chunk.unique_id in llm_chunk_selection - for chunk in reranked_chunks or retrieved_chunks - ] - else: - yield [False for _ in reranked_chunks or retrieved_chunks] - - -def combine_inference_chunks(inf_chunks: list[InferenceChunk]) -> LlmDoc: - if not inf_chunks: - raise ValueError("Cannot combine empty list of chunks") - - # Use the first link of the document - first_chunk = inf_chunks[0] - chunk_texts = [chunk.content for chunk in inf_chunks] - return LlmDoc( - document_id=first_chunk.document_id, - content="\n".join(chunk_texts), - semantic_identifier=first_chunk.semantic_identifier, - source_type=first_chunk.source_type, - metadata=first_chunk.metadata, - updated_at=first_chunk.updated_at, - link=first_chunk.source_links[0] if first_chunk.source_links else None, - ) - - -def inference_documents_from_ids( - doc_identifiers: list[tuple[str, int]], - document_index: DocumentIndex, -) -> list[LlmDoc]: - # Currently only fetches whole docs - doc_ids_set = set(doc_id for doc_id, chunk_id in doc_identifiers) - - # No need for ACL here because the doc ids were validated beforehand - filters = IndexFilters(access_control_list=None) - - functions_with_args: list[tuple[Callable, tuple]] = [ - (document_index.id_based_retrieval, (doc_id, None, filters)) - for doc_id in doc_ids_set - ] - - parallel_results = run_functions_tuples_in_parallel( - functions_with_args, allow_failures=True - ) - - # Any failures to retrieve would give a None, drop the Nones and empty lists - inference_chunks_sets = [res for res in parallel_results if res] - - return [combine_inference_chunks(chunk_set) for chunk_set in inference_chunks_sets] diff --git a/backend/danswer/search/utils.py b/backend/danswer/search/utils.py new file mode 100644 index 000000000..fbcb205e3 --- /dev/null +++ b/backend/danswer/search/utils.py @@ -0,0 +1,34 @@ +from collections.abc import Sequence + +from danswer.search.models import InferenceChunk +from danswer.search.models import InferenceSection +from danswer.search.models import SearchDoc + + +def chunks_or_sections_to_search_docs( + chunks: Sequence[InferenceChunk | InferenceSection] | None, +) -> list[SearchDoc]: + search_docs = ( + [ + SearchDoc( + document_id=chunk.document_id, + chunk_ind=chunk.chunk_id, + semantic_identifier=chunk.semantic_identifier or "Unknown", + link=chunk.source_links.get(0) if chunk.source_links else None, + blurb=chunk.blurb, + source_type=chunk.source_type, + boost=chunk.boost, + hidden=chunk.hidden, + metadata=chunk.metadata, + score=chunk.score, + match_highlights=chunk.match_highlights, + updated_at=chunk.updated_at, + primary_owners=chunk.primary_owners, + secondary_owners=chunk.secondary_owners, + ) + for chunk in chunks + ] + if chunks + else [] + ) + return search_docs diff --git a/backend/danswer/secondary_llm_flows/answer_validation.py b/backend/danswer/secondary_llm_flows/answer_validation.py index 88a153da4..2ef3787c1 100644 --- a/backend/danswer/secondary_llm_flows/answer_validation.py +++ b/backend/danswer/secondary_llm_flows/answer_validation.py @@ -1,6 +1,7 @@ from danswer.llm.exceptions import GenAIDisabledException from danswer.llm.factory import get_default_llm from danswer.llm.utils import dict_based_prompt_to_langchain_prompt +from danswer.llm.utils import message_to_string from danswer.prompts.answer_validation import ANSWER_VALIDITY_PROMPT from danswer.utils.logger import setup_logger from danswer.utils.timing import log_function_time @@ -52,7 +53,7 @@ def get_answer_validity( messages = _get_answer_validation_messages(query, answer) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) - model_output = llm.invoke(filled_llm_prompt) + model_output = message_to_string(llm.invoke(filled_llm_prompt)) logger.debug(model_output) validity = _extract_validity(model_output) diff --git a/backend/danswer/secondary_llm_flows/chat_session_naming.py b/backend/danswer/secondary_llm_flows/chat_session_naming.py index aa604131b..5f4182e42 100644 --- a/backend/danswer/secondary_llm_flows/chat_session_naming.py +++ b/backend/danswer/secondary_llm_flows/chat_session_naming.py @@ -5,6 +5,7 @@ from danswer.llm.exceptions import GenAIDisabledException from danswer.llm.factory import get_default_llm from danswer.llm.interfaces import LLM from danswer.llm.utils import dict_based_prompt_to_langchain_prompt +from danswer.llm.utils import message_to_string from danswer.prompts.chat_prompts import CHAT_NAMING from danswer.utils.logger import setup_logger @@ -39,7 +40,7 @@ def get_renamed_conversation_name( prompt_msgs = get_chat_rename_messages(history_str) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(prompt_msgs) - new_name_raw = llm.invoke(filled_llm_prompt) + new_name_raw = message_to_string(llm.invoke(filled_llm_prompt)) new_name = new_name_raw.strip().strip(' "') diff --git a/backend/danswer/secondary_llm_flows/choose_search.py b/backend/danswer/secondary_llm_flows/choose_search.py index 9e07bf647..5016cf055 100644 --- a/backend/danswer/secondary_llm_flows/choose_search.py +++ b/backend/danswer/secondary_llm_flows/choose_search.py @@ -6,10 +6,10 @@ from danswer.chat.chat_utils import combine_message_chain from danswer.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF from danswer.db.models import ChatMessage -from danswer.llm.exceptions import GenAIDisabledException -from danswer.llm.factory import get_default_llm +from danswer.llm.answering.models import PreviousMessage from danswer.llm.interfaces import LLM from danswer.llm.utils import dict_based_prompt_to_langchain_prompt +from danswer.llm.utils import message_to_string from danswer.llm.utils import translate_danswer_msg_to_langchain from danswer.prompts.chat_prompts import AGGRESSIVE_SEARCH_TEMPLATE from danswer.prompts.chat_prompts import NO_SEARCH @@ -27,8 +27,8 @@ def check_if_need_search_multi_message( history: list[ChatMessage], llm: LLM, ) -> bool: - # Always start with a retrieval - if not history: + # Retrieve on start or when choosing is globally disabled + if not history or DISABLE_LLM_CHOOSE_SEARCH: return True prompt_msgs: list[BaseMessage] = [SystemMessage(content=REQUIRE_SEARCH_SYSTEM_MSG)] @@ -38,7 +38,7 @@ def check_if_need_search_multi_message( prompt_msgs.append(HumanMessage(content=f"{last_query}\n\n{REQUIRE_SEARCH_HINT}")) - model_out = llm.invoke(prompt_msgs) + model_out = message_to_string(llm.invoke(prompt_msgs)) if (NO_SEARCH.split()[0] + " ").lower() in model_out.lower(): return False @@ -47,10 +47,9 @@ def check_if_need_search_multi_message( def check_if_need_search( - query_message: ChatMessage, - history: list[ChatMessage], - llm: LLM | None = None, - disable_llm_check: bool = DISABLE_LLM_CHOOSE_SEARCH, + query: str, + history: list[PreviousMessage], + llm: LLM, ) -> bool: def _get_search_messages( question: str, @@ -67,27 +66,18 @@ def check_if_need_search( return messages - if disable_llm_check: + # Choosing is globally disabled, use search + if DISABLE_LLM_CHOOSE_SEARCH: return True - if llm is None: - try: - llm = get_default_llm() - except GenAIDisabledException: - # If Generative AI is turned off the always run Search as Danswer is being used - # as just a search engine - return True - history_str = combine_message_chain( messages=history, token_limit=GEN_AI_HISTORY_CUTOFF ) - prompt_msgs = _get_search_messages( - question=query_message.message, history_str=history_str - ) + prompt_msgs = _get_search_messages(question=query, history_str=history_str) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(prompt_msgs) - require_search_output = llm.invoke(filled_llm_prompt) + require_search_output = message_to_string(llm.invoke(filled_llm_prompt)) logger.debug(f"Run search prediction: {require_search_output}") diff --git a/backend/danswer/secondary_llm_flows/chunk_usefulness.py b/backend/danswer/secondary_llm_flows/chunk_usefulness.py index 2db06bdba..d37feb0c0 100644 --- a/backend/danswer/secondary_llm_flows/chunk_usefulness.py +++ b/backend/danswer/secondary_llm_flows/chunk_usefulness.py @@ -3,6 +3,7 @@ from collections.abc import Callable from danswer.llm.exceptions import GenAIDisabledException from danswer.llm.factory import get_default_llm from danswer.llm.utils import dict_based_prompt_to_langchain_prompt +from danswer.llm.utils import message_to_string from danswer.prompts.llm_chunk_filter import CHUNK_FILTER_PROMPT from danswer.prompts.llm_chunk_filter import NONUSEFUL_PAT from danswer.utils.logger import setup_logger @@ -44,7 +45,7 @@ def llm_eval_chunk(query: str, chunk_content: str) -> bool: # When running in a batch, it takes as long as the longest thread # And when running a large batch, one may fail and take the whole timeout # instead cap it to 5 seconds - model_output = llm.invoke(filled_llm_prompt) + model_output = message_to_string(llm.invoke(filled_llm_prompt)) logger.debug(model_output) return _extract_usefulness(model_output) diff --git a/backend/danswer/secondary_llm_flows/query_expansion.py b/backend/danswer/secondary_llm_flows/query_expansion.py index d0fb19b73..2f221bfa9 100644 --- a/backend/danswer/secondary_llm_flows/query_expansion.py +++ b/backend/danswer/secondary_llm_flows/query_expansion.py @@ -1,13 +1,15 @@ from collections.abc import Callable -from typing import cast from danswer.chat.chat_utils import combine_message_chain +from danswer.configs.chat_configs import DISABLE_LLM_QUERY_REPHRASE from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF from danswer.db.models import ChatMessage +from danswer.llm.answering.models import PreviousMessage from danswer.llm.exceptions import GenAIDisabledException from danswer.llm.factory import get_default_llm from danswer.llm.interfaces import LLM from danswer.llm.utils import dict_based_prompt_to_langchain_prompt +from danswer.llm.utils import message_to_string from danswer.prompts.chat_prompts import HISTORY_QUERY_REPHRASE from danswer.prompts.miscellaneous_prompts import LANGUAGE_REPHRASE_PROMPT from danswer.utils.logger import setup_logger @@ -40,7 +42,7 @@ def llm_multilingual_query_expansion(query: str, language: str) -> str: messages = _get_rephrase_messages() filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) - model_output = llm.invoke(filled_llm_prompt) + model_output = message_to_string(llm.invoke(filled_llm_prompt)) logger.debug(model_output) return model_output @@ -86,50 +88,42 @@ def get_contextual_rephrase_messages( def history_based_query_rephrase( - query_message: ChatMessage, - history: list[ChatMessage], - llm: LLM | None = None, + query: str, + history: list[ChatMessage] | list[PreviousMessage], + llm: LLM, size_heuristic: int = 200, punctuation_heuristic: int = 10, skip_first_rephrase: bool = False, ) -> str: - user_query = cast(str, query_message.message) - - if not user_query: - raise ValueError("Can't rephrase/search an empty query") - - if llm is None: - try: - llm = get_default_llm() - except GenAIDisabledException: - # If Generative AI is turned off, just return the original query - return user_query + # Globally disabled, just use the exact user query + if DISABLE_LLM_QUERY_REPHRASE: + return query # For some use cases, the first query should be untouched. Later queries must be rephrased # due to needing context but the first query has no context. if skip_first_rephrase and not history: - return user_query + return query # If it's a very large query, assume it's a copy paste which we may want to find exactly # or at least very closely, so don't rephrase it - if len(user_query) >= size_heuristic: - return user_query + if len(query) >= size_heuristic: + return query # If there is an unusually high number of punctuations, it's probably not natural language # so don't rephrase it - if count_punctuation(user_query) >= punctuation_heuristic: - return user_query + if count_punctuation(query) >= punctuation_heuristic: + return query history_str = combine_message_chain( messages=history, token_limit=GEN_AI_HISTORY_CUTOFF ) prompt_msgs = get_contextual_rephrase_messages( - question=user_query, history_str=history_str + question=query, history_str=history_str ) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(prompt_msgs) - rephrased_query = llm.invoke(filled_llm_prompt) + rephrased_query = message_to_string(llm.invoke(filled_llm_prompt)) logger.debug(f"Rephrased combined query: {rephrased_query}") @@ -164,7 +158,7 @@ def thread_based_query_rephrase( ) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(prompt_msgs) - rephrased_query = llm.invoke(filled_llm_prompt) + rephrased_query = message_to_string(llm.invoke(filled_llm_prompt)) logger.debug(f"Rephrased combined query: {rephrased_query}") diff --git a/backend/danswer/secondary_llm_flows/query_validation.py b/backend/danswer/secondary_llm_flows/query_validation.py index 22ba49e68..4130b7ee3 100644 --- a/backend/danswer/secondary_llm_flows/query_validation.py +++ b/backend/danswer/secondary_llm_flows/query_validation.py @@ -7,6 +7,8 @@ from danswer.configs.chat_configs import DISABLE_LLM_QUERY_ANSWERABILITY from danswer.llm.exceptions import GenAIDisabledException from danswer.llm.factory import get_default_llm from danswer.llm.utils import dict_based_prompt_to_langchain_prompt +from danswer.llm.utils import message_generator_to_string_generator +from danswer.llm.utils import message_to_string from danswer.prompts.constants import ANSWERABLE_PAT from danswer.prompts.constants import THOUGHT_PAT from danswer.prompts.query_validation import ANSWERABLE_PROMPT @@ -56,7 +58,7 @@ def get_query_answerability( messages = get_query_validation_messages(user_query) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) - model_output = llm.invoke(filled_llm_prompt) + model_output = message_to_string(llm.invoke(filled_llm_prompt)) reasoning = extract_answerability_reasoning(model_output) answerable = extract_answerability_bool(model_output) @@ -86,11 +88,10 @@ def stream_query_answerability( ).dict() ) return - messages = get_query_validation_messages(user_query) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) try: - tokens = llm.stream(filled_llm_prompt) + tokens = message_generator_to_string_generator(llm.stream(filled_llm_prompt)) reasoning_pat_found = False model_output = "" hold_answerable = "" diff --git a/backend/danswer/secondary_llm_flows/source_filter.py b/backend/danswer/secondary_llm_flows/source_filter.py index 969bd9282..6a27963ff 100644 --- a/backend/danswer/secondary_llm_flows/source_filter.py +++ b/backend/danswer/secondary_llm_flows/source_filter.py @@ -9,6 +9,7 @@ from danswer.db.engine import get_sqlalchemy_engine from danswer.llm.exceptions import GenAIDisabledException from danswer.llm.factory import get_default_llm from danswer.llm.utils import dict_based_prompt_to_langchain_prompt +from danswer.llm.utils import message_to_string from danswer.prompts.constants import SOURCES_KEY from danswer.prompts.filter_extration import FILE_SOURCE_WARNING from danswer.prompts.filter_extration import SOURCE_FILTER_PROMPT @@ -157,7 +158,7 @@ def extract_source_filter( messages = _get_source_filter_messages(query=query, valid_sources=valid_sources) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) - model_output = llm.invoke(filled_llm_prompt) + model_output = message_to_string(llm.invoke(filled_llm_prompt)) logger.debug(model_output) return _extract_source_filters_from_llm_out(model_output) diff --git a/backend/danswer/secondary_llm_flows/time_filter.py b/backend/danswer/secondary_llm_flows/time_filter.py index be2799f8f..9080dc1f9 100644 --- a/backend/danswer/secondary_llm_flows/time_filter.py +++ b/backend/danswer/secondary_llm_flows/time_filter.py @@ -8,6 +8,7 @@ from dateutil.parser import parse from danswer.llm.exceptions import GenAIDisabledException from danswer.llm.factory import get_default_llm from danswer.llm.utils import dict_based_prompt_to_langchain_prompt +from danswer.llm.utils import message_to_string from danswer.prompts.filter_extration import TIME_FILTER_PROMPT from danswer.prompts.prompt_utils import get_current_llm_day_time from danswer.utils.logger import setup_logger @@ -153,7 +154,7 @@ def extract_time_filter(query: str) -> tuple[datetime | None, bool]: messages = _get_time_filter_messages(query) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) - model_output = llm.invoke(filled_llm_prompt) + model_output = message_to_string(llm.invoke(filled_llm_prompt)) logger.debug(model_output) return _extract_time_filter_from_llm_out(model_output) diff --git a/backend/danswer/server/auth_check.py b/backend/danswer/server/auth_check.py new file mode 100644 index 000000000..53ef572da --- /dev/null +++ b/backend/danswer/server/auth_check.py @@ -0,0 +1,107 @@ +from typing import cast + +from fastapi import FastAPI +from fastapi.dependencies.models import Dependant +from starlette.routing import BaseRoute + +from danswer.auth.users import current_admin_user +from danswer.auth.users import current_user +from danswer.configs.app_configs import APP_API_PREFIX +from danswer.server.danswer_api.ingestion import api_key_dep + + +PUBLIC_ENDPOINT_SPECS = [ + # built-in documentation functions + ("/openapi.json", {"GET", "HEAD"}), + ("/docs", {"GET", "HEAD"}), + ("/docs/oauth2-redirect", {"GET", "HEAD"}), + ("/redoc", {"GET", "HEAD"}), + # should always be callable, will just return 401 if not authenticated + ("/me", {"GET"}), + # just returns 200 to validate that the server is up + ("/health", {"GET"}), + # just returns auth type, needs to be accessible before the user is logged + # in to determine what flow to give the user + ("/auth/type", {"GET"}), + # just gets the version of Danswer (e.g. 0.3.11) + ("/version", {"GET"}), + # stuff related to basic auth + ("/auth/register", {"POST"}), + ("/auth/login", {"POST"}), + ("/auth/logout", {"POST"}), + ("/auth/forgot-password", {"POST"}), + ("/auth/reset-password", {"POST"}), + ("/auth/request-verify-token", {"POST"}), + ("/auth/verify", {"POST"}), + ("/users/me", {"GET"}), + ("/users/me", {"PATCH"}), + ("/users/{id}", {"GET"}), + ("/users/{id}", {"PATCH"}), + ("/users/{id}", {"DELETE"}), + # oauth + ("/auth/oauth/authorize", {"GET"}), + ("/auth/oauth/callback", {"GET"}), +] + + +def is_route_in_spec_list( + route: BaseRoute, public_endpoint_specs: list[tuple[str, set[str]]] +) -> bool: + if not hasattr(route, "path") or not hasattr(route, "methods"): + return False + + # try adding the prefix AND not adding the prefix, since some endpoints + # are not prefixed (e.g. /openapi.json) + if (route.path, route.methods) in public_endpoint_specs: + return True + + processed_global_prefix = f"/{APP_API_PREFIX.strip('/')}" if APP_API_PREFIX else "" + if not processed_global_prefix: + return False + + for endpoint_spec in public_endpoint_specs: + base_path, methods = endpoint_spec + prefixed_path = f"{processed_global_prefix}/{base_path.strip('/')}" + + if prefixed_path == route.path and route.methods == methods: + return True + + return False + + +def check_router_auth( + application: FastAPI, + public_endpoint_specs: list[tuple[str, set[str]]] = PUBLIC_ENDPOINT_SPECS, +) -> None: + """Ensures that all endpoints on the passed in application either + (1) have auth enabled OR + (2) are explicitly marked as a public endpoint + """ + for route in application.routes: + # explicitly marked as public + if is_route_in_spec_list(route, public_endpoint_specs): + continue + + # check for auth + found_auth = False + route_dependant_obj = cast( + Dependant | None, route.dependant if hasattr(route, "dependant") else None + ) + if route_dependant_obj: + for dependency in route_dependant_obj.dependencies: + depends_fn = dependency.cache_key[0] + if ( + depends_fn == current_user + or depends_fn == current_admin_user + or depends_fn == api_key_dep + ): + found_auth = True + break + + if not found_auth: + # uncomment to print out all route(s) that are missing auth + # print(f"(\"{route.path}\", {set(route.methods)}),") + + raise RuntimeError( + f"Did not find current_user or current_admin_user dependency in route - {route}" + ) diff --git a/backend/danswer/server/danswer_api/ingestion.py b/backend/danswer/server/danswer_api/ingestion.py index 8856e20d6..33a5b4f52 100644 --- a/backend/danswer/server/danswer_api/ingestion.py +++ b/backend/danswer/server/danswer_api/ingestion.py @@ -1,5 +1,4 @@ import secrets -from typing import cast from fastapi import APIRouter from fastapi import Depends @@ -10,22 +9,21 @@ from sqlalchemy.orm import Session from danswer.configs.constants import DocumentSource from danswer.connectors.models import Document from danswer.connectors.models import IndexAttemptMetadata -from danswer.db.connector import fetch_connector_by_id -from danswer.db.connector import fetch_ingestion_connector_by_name -from danswer.db.connector_credential_pair import get_connector_credential_pair -from danswer.db.credentials import fetch_credential_by_id +from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id +from danswer.db.document import get_documents_by_cc_pair +from danswer.db.document import get_ingestion_documents from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.embedding_model import get_secondary_db_embedding_model from danswer.db.engine import get_session from danswer.document_index.document_index_utils import get_both_index_names from danswer.document_index.factory import get_default_document_index -from danswer.dynamic_configs import get_dynamic_config_store +from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.indexing.embedder import DefaultIndexingEmbedder from danswer.indexing.indexing_pipeline import build_indexing_pipeline +from danswer.server.danswer_api.models import DocMinimalInfo from danswer.server.danswer_api.models import IngestionDocument from danswer.server.danswer_api.models import IngestionResult -from danswer.server.models import ApiKey from danswer.utils.logger import setup_logger logger = setup_logger() @@ -48,7 +46,7 @@ def get_danswer_api_key(key_len: int = 30, dont_regenerate: bool = False) -> str logger.info("Generating Danswer API Key") api_key = "dn_" + secrets.token_urlsafe(key_len) - kv_store.store(_DANSWER_API_KEY, api_key) + kv_store.store(_DANSWER_API_KEY, api_key, encrypt=True) return api_key @@ -69,75 +67,45 @@ def api_key_dep(authorization: str = Header(...)) -> str: return token -# Provides a way to recover if the api key is deleted for some reason -# Can also just restart the server to regenerate a new one -def api_key_dep_if_exist(authorization: str | None = Header(None)) -> str | None: - token = authorization.removeprefix("Bearer ").strip() if authorization else None - saved_key = get_danswer_api_key(dont_regenerate=True) - if not saved_key: - return None - - if token != saved_key: - raise HTTPException(status_code=401, detail="Invalid API key") - - return token +@router.get("/connector-docs/{cc_pair_id}") +def get_docs_by_connector_credential_pair( + cc_pair_id: int, + _: str = Depends(api_key_dep), + db_session: Session = Depends(get_session), +) -> list[DocMinimalInfo]: + db_docs = get_documents_by_cc_pair(cc_pair_id=cc_pair_id, db_session=db_session) + return [ + DocMinimalInfo( + document_id=doc.id, + semantic_id=doc.semantic_id, + link=doc.link, + ) + for doc in db_docs + ] -@router.post("/regenerate-key") -def regenerate_key(_: str | None = Depends(api_key_dep_if_exist)) -> ApiKey: - delete_danswer_api_key() - return ApiKey(api_key=cast(str, get_danswer_api_key())) +@router.get("/ingestion") +def get_ingestion_docs( + _: str = Depends(api_key_dep), + db_session: Session = Depends(get_session), +) -> list[DocMinimalInfo]: + db_docs = get_ingestion_documents(db_session) + return [ + DocMinimalInfo( + document_id=doc.id, + semantic_id=doc.semantic_id, + link=doc.link, + ) + for doc in db_docs + ] -@router.post("/doc-ingestion") -def document_ingestion( +@router.post("/ingestion") +def upsert_ingestion_doc( doc_info: IngestionDocument, _: str = Depends(api_key_dep), db_session: Session = Depends(get_session), ) -> IngestionResult: - """Currently only attaches docs to existing connectors (cc-pairs). - Or to the default ingestion connector that is accessible to all users - - Things to note: - - The document id if not provided is automatically generated from the semantic identifier - so if the document source type etc is updated, it won't create a duplicate - """ - if doc_info.credential_id: - credential_id = doc_info.credential_id - credential = fetch_credential_by_id( - credential_id=credential_id, - user=None, - db_session=db_session, - assume_admin=True, - ) - if credential is None: - raise ValueError("Invalid Credential for doc, does not exist.") - else: - credential_id = 0 - - connector_id = doc_info.connector_id - # If user provides id and name, id takes precedence - if connector_id is not None: - connector = fetch_connector_by_id(connector_id, db_session) - if connector is None: - raise ValueError("Invalid Connector for doc, id does not exist.") - elif doc_info.connector_name: - connector = fetch_ingestion_connector_by_name( - doc_info.connector_name, db_session - ) - if connector is None: - raise ValueError("Invalid Connector for doc, name does not exist.") - connector_id = connector.id - else: - connector_id = 0 - - cc_pair = get_connector_credential_pair( - connector_id=connector_id, credential_id=credential_id, db_session=db_session - ) - if cc_pair is None: - raise ValueError("Connector and Credential not associated.") - - # Disregard whatever value is passed, this must be True doc_info.document.from_ingestion_api = True document = Document.from_base(doc_info.document) @@ -146,6 +114,14 @@ def document_ingestion( if document.source == DocumentSource.INGESTION_API: document.source = DocumentSource.FILE + cc_pair = get_connector_credential_pair_from_id( + cc_pair_id=doc_info.cc_pair_id or 0, db_session=db_session + ) + if cc_pair is None: + raise HTTPException( + status_code=400, detail="Connector-Credential Pair specified does not exist" + ) + # Need to index for both the primary and secondary index if possible curr_ind_name, sec_ind_name = get_both_index_names(db_session) curr_doc_index = get_default_document_index( @@ -165,13 +141,14 @@ def document_ingestion( embedder=index_embedding_model, document_index=curr_doc_index, ignore_time_skip=True, + db_session=db_session, ) new_doc, chunks = indexing_pipeline( documents=[document], index_attempt_metadata=IndexAttemptMetadata( - connector_id=connector_id, - credential_id=credential_id, + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, ), ) @@ -200,13 +177,14 @@ def document_ingestion( embedder=new_index_embedding_model, document_index=sec_doc_index, ignore_time_skip=True, + db_session=db_session, ) sec_ind_pipeline( documents=[document], index_attempt_metadata=IndexAttemptMetadata( - connector_id=connector_id, - credential_id=credential_id, + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, ), ) diff --git a/backend/danswer/server/danswer_api/models.py b/backend/danswer/server/danswer_api/models.py index 9b10bef76..8a534c3e3 100644 --- a/backend/danswer/server/danswer_api/models.py +++ b/backend/danswer/server/danswer_api/models.py @@ -5,13 +5,15 @@ from danswer.connectors.models import DocumentBase class IngestionDocument(BaseModel): document: DocumentBase - connector_id: int | None = None # Takes precedence over the name - connector_name: str | None = None - credential_id: int | None = None - create_connector: bool = False # Currently not allowed - public_doc: bool = True # To attach to the cc_pair, currently unused + cc_pair_id: int | None class IngestionResult(BaseModel): document_id: str already_existed: bool + + +class DocMinimalInfo(BaseModel): + document_id: str + semantic_id: str + link: str | None diff --git a/backend/danswer/server/documents/connector.py b/backend/danswer/server/documents/connector.py index a5fa93d40..e18334680 100644 --- a/backend/danswer/server/documents/connector.py +++ b/backend/danswer/server/documents/connector.py @@ -1,3 +1,5 @@ +import os +import uuid from typing import cast from fastapi import APIRouter @@ -6,13 +8,15 @@ from fastapi import HTTPException from fastapi import Request from fastapi import Response from fastapi import UploadFile +from pydantic import BaseModel from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user from danswer.auth.users import current_user from danswer.background.celery.celery_utils import get_deletion_status +from danswer.configs.app_configs import ENABLED_CONNECTOR_TYPES from danswer.configs.constants import DocumentSource -from danswer.connectors.file.utils import write_temp_files +from danswer.configs.constants import FileOrigin from danswer.connectors.gmail.connector_auth import delete_gmail_service_account_key from danswer.connectors.gmail.connector_auth import delete_google_app_gmail_cred from danswer.connectors.gmail.connector_auth import get_gmail_auth_url @@ -63,6 +67,7 @@ from danswer.db.index_attempt import get_index_attempts_for_cc_pair from danswer.db.index_attempt import get_latest_index_attempts from danswer.db.models import User from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.file_store.file_store import get_default_file_store from danswer.server.documents.models import AuthStatus from danswer.server.documents.models import AuthUrl from danswer.server.documents.models import ConnectorBase @@ -334,18 +339,29 @@ def admin_google_drive_auth( @router.post("/admin/connector/file/upload") def upload_files( - files: list[UploadFile], _: User = Depends(current_admin_user) + files: list[UploadFile], + _: User = Depends(current_admin_user), + db_session: Session = Depends(get_session), ) -> FileUploadResponse: for file in files: if not file.filename: raise HTTPException(status_code=400, detail="File name cannot be empty") try: - file_paths = write_temp_files( - [(cast(str, file.filename), file.file) for file in files] - ) + file_store = get_default_file_store(db_session) + deduped_file_paths = [] + for file in files: + file_path = os.path.join(str(uuid.uuid4()), cast(str, file.filename)) + deduped_file_paths.append(file_path) + file_store.save_file( + file_name=file_path, + content=file.file, + display_name=file.filename, + file_origin=FileOrigin.CONNECTOR, + file_type=file.content_type or "text/plain", + ) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) - return FileUploadResponse(file_paths=file_paths) + return FileUploadResponse(file_paths=deduped_file_paths) @router.get("/admin/connector/indexing-status") @@ -402,7 +418,9 @@ def get_connector_indexing_status( credential=CredentialSnapshot.from_credential_db_model(credential), public_doc=cc_pair.is_public, owner=credential.user.email if credential.user else "", - last_status=cc_pair.last_attempt_status, + last_status=latest_index_attempt.status + if latest_index_attempt + else None, last_success=cc_pair.last_successful_index_time, docs_indexed=cc_pair_to_document_cnt.get( (connector.id, credential.id), 0 @@ -421,22 +439,43 @@ def get_connector_indexing_status( db_session=db_session, ), is_deletable=check_deletion_attempt_is_allowed( - connector_credential_pair=cc_pair - ), + connector_credential_pair=cc_pair, + db_session=db_session, + # allow scheduled indexing attempts here, since on deletion request we will cancel them + allow_scheduled=True, + ) + is None, ) ) return indexing_statuses +def _validate_connector_allowed(source: DocumentSource) -> None: + valid_connectors = [ + x for x in ENABLED_CONNECTOR_TYPES.replace("_", "").split(",") if x + ] + if not valid_connectors: + return + for connector_type in valid_connectors: + if source.value.lower().replace("_", "") == connector_type: + return + + raise ValueError( + "This connector type has been disabled by your system admin. " + "Please contact them to get it enabled if you wish to use it." + ) + + @router.post("/admin/connector") def create_connector_from_model( - connector_info: ConnectorBase, + connector_data: ConnectorBase, _: User = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> ObjectCreationIdResponse: try: - return create_connector(connector_info, db_session) + _validate_connector_allowed(connector_data.source) + return create_connector(connector_data, db_session) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) @@ -448,6 +487,11 @@ def update_connector_from_model( _: User = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> ConnectorSnapshot | StatusResponse[int]: + try: + _validate_connector_allowed(connector_data.source) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + updated_connector = update_connector(connector_id, connector_data, db_session) if updated_connector is None: raise HTTPException( @@ -689,3 +733,43 @@ def get_connector_by_id( time_updated=connector.time_updated, disabled=connector.disabled, ) + + +class BasicCCPairInfo(BaseModel): + docs_indexed: int + has_successful_run: bool + source: DocumentSource + + +@router.get("/indexing-status") +def get_basic_connector_indexing_status( + _: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> list[BasicCCPairInfo]: + cc_pairs = get_connector_credential_pairs(db_session) + cc_pair_identifiers = [ + ConnectorCredentialPairIdentifier( + connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id + ) + for cc_pair in cc_pairs + ] + document_count_info = get_document_cnts_for_cc_pairs( + db_session=db_session, + cc_pair_identifiers=cc_pair_identifiers, + ) + cc_pair_to_document_cnt = { + (connector_id, credential_id): cnt + for connector_id, credential_id, cnt in document_count_info + } + return [ + BasicCCPairInfo( + docs_indexed=cc_pair_to_document_cnt.get( + (cc_pair.connector_id, cc_pair.credential_id) + ) + or 0, + has_successful_run=cc_pair.last_successful_index_time is not None, + source=cc_pair.connector.source, + ) + for cc_pair in cc_pairs + if cc_pair.connector.source != DocumentSource.INGESTION_API + ] diff --git a/backend/danswer/server/documents/document.py b/backend/danswer/server/documents/document.py index a0ba40254..3b0adea24 100644 --- a/backend/danswer/server/documents/document.py +++ b/backend/danswer/server/documents/document.py @@ -5,14 +5,13 @@ from fastapi import Query from sqlalchemy.orm import Session from danswer.auth.users import current_user -from danswer.chat.chat_utils import build_doc_context_str from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.engine import get_session from danswer.db.models import User from danswer.document_index.factory import get_default_document_index from danswer.llm.utils import get_default_llm_token_encode -from danswer.search.access_filters import build_access_filters_for_user -from danswer.search.models import IndexFilters +from danswer.prompts.prompt_utils import build_doc_context_str +from danswer.search.preprocessing.access_filters import build_access_filters_for_user from danswer.server.documents.models import ChunkInfo from danswer.server.documents.models import DocumentInfo @@ -35,12 +34,11 @@ def get_document_info( ) user_acl_filters = build_access_filters_for_user(user, db_session) - filters = IndexFilters(access_control_list=user_acl_filters) - inference_chunks = document_index.id_based_retrieval( document_id=document_id, - chunk_ind=None, - filters=filters, + min_chunk_ind=None, + max_chunk_ind=None, + user_access_control_list=user_acl_filters, ) if not inference_chunks: @@ -82,12 +80,11 @@ def get_chunk_info( ) user_acl_filters = build_access_filters_for_user(user, db_session) - filters = IndexFilters(access_control_list=user_acl_filters) - inference_chunks = document_index.id_based_retrieval( document_id=document_id, - chunk_ind=chunk_id, - filters=filters, + min_chunk_ind=chunk_id, + max_chunk_ind=chunk_id, + user_access_control_list=user_acl_filters, ) if not inference_chunks: diff --git a/backend/danswer/server/documents/models.py b/backend/danswer/server/documents/models.py index cac00578e..d574cc361 100644 --- a/backend/danswer/server/documents/models.py +++ b/backend/danswer/server/documents/models.py @@ -31,6 +31,7 @@ class IndexAttemptSnapshot(BaseModel): status: IndexingStatus | None new_docs_indexed: int # only includes completely new docs total_docs_indexed: int # includes docs that are updated + docs_removed_from_index: int error_msg: str | None full_exception_trace: str | None time_started: str | None @@ -45,6 +46,7 @@ class IndexAttemptSnapshot(BaseModel): status=index_attempt.status, new_docs_indexed=index_attempt.new_docs_indexed or 0, total_docs_indexed=index_attempt.total_docs_indexed or 0, + docs_removed_from_index=index_attempt.docs_removed_from_index or 0, error_msg=index_attempt.error_msg, full_exception_trace=index_attempt.full_exception_trace, time_started=index_attempt.time_started.isoformat() diff --git a/backend/danswer/server/features/document_set/api.py b/backend/danswer/server/features/document_set/api.py index a5d604092..f939329bf 100644 --- a/backend/danswer/server/features/document_set/api.py +++ b/backend/danswer/server/features/document_set/api.py @@ -6,7 +6,8 @@ from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user from danswer.auth.users import current_user from danswer.db.document_set import check_document_sets_are_public -from danswer.db.document_set import fetch_document_sets +from danswer.db.document_set import fetch_all_document_sets +from danswer.db.document_set import fetch_user_document_sets from danswer.db.document_set import insert_document_set from danswer.db.document_set import mark_document_set_as_to_be_deleted from danswer.db.document_set import update_document_set @@ -71,15 +72,28 @@ def delete_document_set( raise HTTPException(status_code=400, detail=str(e)) +@router.get("/admin/document-set") +def list_document_sets_admin( + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> list[DocumentSet]: + return [ + DocumentSet.from_model(ds) + for ds in fetch_all_document_sets(db_session=db_session) + ] + + """Endpoints for non-admins""" @router.get("/document-set") def list_document_sets( - _: User = Depends(current_user), + user: User | None = Depends(current_user), db_session: Session = Depends(get_session), ) -> list[DocumentSet]: - document_set_info = fetch_document_sets(db_session=db_session) + document_set_info = fetch_user_document_sets( + user_id=user.id if user else None, db_session=db_session + ) return [ DocumentSet( id=document_set_db_model.id, @@ -100,6 +114,9 @@ def list_document_sets( for cc_pair in cc_pairs ], is_up_to_date=document_set_db_model.is_up_to_date, + is_public=document_set_db_model.is_public, + users=[user.id for user in document_set_db_model.users], + groups=[group.id for group in document_set_db_model.groups], ) for document_set_db_model, cc_pairs in document_set_info ] diff --git a/backend/danswer/server/features/document_set/models.py b/backend/danswer/server/features/document_set/models.py index 2f5be4587..05ada42c8 100644 --- a/backend/danswer/server/features/document_set/models.py +++ b/backend/danswer/server/features/document_set/models.py @@ -1,3 +1,5 @@ +from uuid import UUID + from pydantic import BaseModel from danswer.db.models import DocumentSet as DocumentSetDBModel @@ -10,15 +12,27 @@ class DocumentSetCreationRequest(BaseModel): name: str description: str cc_pair_ids: list[int] + is_public: bool + # For Private Document Sets, who should be able to access these + users: list[UUID] | None = None + groups: list[int] | None = None class DocumentSetUpdateRequest(BaseModel): id: int description: str cc_pair_ids: list[int] + is_public: bool + # For Private Document Sets, who should be able to access these + users: list[UUID] + groups: list[int] class CheckDocSetPublicRequest(BaseModel): + """Note that this does not mean that the Document Set itself is to be viewable by everyone + Rather, this refers to the CC-Pairs in the Document Set, and if every CC-Pair is public + """ + document_set_ids: list[int] @@ -33,6 +47,10 @@ class DocumentSet(BaseModel): cc_pair_descriptors: list[ConnectorCredentialPairDescriptor] is_up_to_date: bool contains_non_public: bool + is_public: bool + # For Private Document Sets, who should be able to access these + users: list[UUID] + groups: list[int] @classmethod def from_model(cls, document_set_model: DocumentSetDBModel) -> "DocumentSet": @@ -60,4 +78,7 @@ class DocumentSet(BaseModel): for cc_pair in document_set_model.connector_credential_pairs ], is_up_to_date=document_set_model.is_up_to_date, + is_public=document_set_model.is_public, + users=[user.id for user in document_set_model.users], + groups=[group.id for group in document_set_model.groups], ) diff --git a/backend/danswer/server/features/folder/__init__.py b/backend/danswer/server/features/folder/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/danswer/server/features/folder/api.py b/backend/danswer/server/features/folder/api.py new file mode 100644 index 000000000..000207370 --- /dev/null +++ b/backend/danswer/server/features/folder/api.py @@ -0,0 +1,176 @@ +from fastapi import APIRouter +from fastapi import Depends +from fastapi import HTTPException +from fastapi import Path +from sqlalchemy.orm import Session + +from danswer.auth.users import current_user +from danswer.db.chat import get_chat_session_by_id +from danswer.db.engine import get_session +from danswer.db.folder import add_chat_to_folder +from danswer.db.folder import create_folder +from danswer.db.folder import delete_folder +from danswer.db.folder import get_user_folders +from danswer.db.folder import remove_chat_from_folder +from danswer.db.folder import rename_folder +from danswer.db.folder import update_folder_display_priority +from danswer.db.models import User +from danswer.server.features.folder.models import DeleteFolderOptions +from danswer.server.features.folder.models import FolderChatSessionRequest +from danswer.server.features.folder.models import FolderCreationRequest +from danswer.server.features.folder.models import FolderResponse +from danswer.server.features.folder.models import FolderUpdateRequest +from danswer.server.features.folder.models import GetUserFoldersResponse +from danswer.server.models import DisplayPriorityRequest +from danswer.server.query_and_chat.models import ChatSessionDetails + +router = APIRouter(prefix="/folder") + + +@router.get("") +def get_folders( + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> GetUserFoldersResponse: + folders = get_user_folders( + user_id=user.id if user else None, + db_session=db_session, + ) + folders.sort() + return GetUserFoldersResponse( + folders=[ + FolderResponse( + folder_id=folder.id, + folder_name=folder.name, + display_priority=folder.display_priority, + chat_sessions=[ + ChatSessionDetails( + id=chat_session.id, + name=chat_session.description, + persona_id=chat_session.persona_id, + time_created=chat_session.time_created.isoformat(), + shared_status=chat_session.shared_status, + folder_id=folder.id, + ) + for chat_session in folder.chat_sessions + if not chat_session.deleted + ], + ) + for folder in folders + ] + ) + + +@router.put("/reorder") +def put_folder_display_priority( + display_priority_request: DisplayPriorityRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + update_folder_display_priority( + user_id=user.id if user else None, + display_priority_map=display_priority_request.display_priority_map, + db_session=db_session, + ) + + +@router.post("") +def create_folder_endpoint( + request: FolderCreationRequest, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> int: + return create_folder( + user_id=user.id if user else None, + folder_name=request.folder_name, + db_session=db_session, + ) + + +@router.patch("/{folder_id}") +def patch_folder_endpoint( + request: FolderUpdateRequest, + folder_id: int = Path(..., description="The ID of the folder to rename"), + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + try: + rename_folder( + user_id=user.id if user else None, + folder_id=folder_id, + folder_name=request.folder_name, + db_session=db_session, + ) + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) + + +@router.delete("/{folder_id}") +def delete_folder_endpoint( + request: DeleteFolderOptions, + folder_id: int = Path(..., description="The ID of the folder to delete"), + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + user_id = user.id if user else None + try: + delete_folder( + user_id=user_id, + folder_id=folder_id, + including_chats=request.including_chats, + db_session=db_session, + ) + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) + + +@router.post("/{folder_id}/add-chat-session") +def add_chat_to_folder_endpoint( + request: FolderChatSessionRequest, + folder_id: int = Path( + ..., description="The ID of the folder in which to add the chat session" + ), + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + user_id = user.id if user else None + try: + chat_session = get_chat_session_by_id( + chat_session_id=request.chat_session_id, + user_id=user_id, + db_session=db_session, + ) + add_chat_to_folder( + user_id=user.id if user else None, + folder_id=folder_id, + chat_session=chat_session, + db_session=db_session, + ) + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) + + +@router.post("/{folder_id}/remove-chat-session/") +def remove_chat_from_folder_endpoint( + request: FolderChatSessionRequest, + folder_id: int = Path( + ..., description="The ID of the folder from which to remove the chat session" + ), + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + user_id = user.id if user else None + try: + chat_session = get_chat_session_by_id( + chat_session_id=request.chat_session_id, + user_id=user_id, + db_session=db_session, + ) + remove_chat_from_folder( + user_id=user_id, + folder_id=folder_id, + chat_session=chat_session, + db_session=db_session, + ) + except Exception as e: + raise HTTPException(status_code=400, detail=str(e)) diff --git a/backend/danswer/server/features/folder/models.py b/backend/danswer/server/features/folder/models.py new file mode 100644 index 000000000..d665fd919 --- /dev/null +++ b/backend/danswer/server/features/folder/models.py @@ -0,0 +1,30 @@ +from pydantic import BaseModel + +from danswer.server.query_and_chat.models import ChatSessionDetails + + +class FolderResponse(BaseModel): + folder_id: int + folder_name: str | None + display_priority: int + chat_sessions: list[ChatSessionDetails] + + +class GetUserFoldersResponse(BaseModel): + folders: list[FolderResponse] + + +class FolderCreationRequest(BaseModel): + folder_name: str | None = None + + +class FolderUpdateRequest(BaseModel): + folder_name: str | None + + +class FolderChatSessionRequest(BaseModel): + chat_session_id: int + + +class DeleteFolderOptions(BaseModel): + including_chats: bool = False diff --git a/backend/danswer/server/features/persona/api.py b/backend/danswer/server/features/persona/api.py index 2614a2233..f69550fcd 100644 --- a/backend/danswer/server/features/persona/api.py +++ b/backend/danswer/server/features/persona/api.py @@ -1,27 +1,27 @@ +from uuid import UUID + from fastapi import APIRouter from fastapi import Depends -from fastapi import HTTPException from pydantic import BaseModel from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user from danswer.auth.users import current_user -from danswer.configs.model_configs import GEN_AI_MODEL_PROVIDER -from danswer.configs.model_configs import GEN_AI_MODEL_VERSION from danswer.db.chat import get_persona_by_id from danswer.db.chat import get_personas -from danswer.db.chat import get_prompts_by_ids from danswer.db.chat import mark_persona_as_deleted +from danswer.db.chat import mark_persona_as_not_deleted from danswer.db.chat import update_all_personas_display_priority from danswer.db.chat import update_persona_visibility -from danswer.db.chat import upsert_persona -from danswer.db.document_set import get_document_sets_by_ids from danswer.db.engine import get_session from danswer.db.models import User -from danswer.one_shot_answer.qa_block import build_dummy_prompt +from danswer.db.persona import create_update_persona +from danswer.db.persona import update_persona_shared_users +from danswer.llm.answering.prompts.utils import build_dummy_prompt from danswer.server.features.persona.models import CreatePersonaRequest from danswer.server.features.persona.models import PersonaSnapshot from danswer.server.features.persona.models import PromptTemplateResponse +from danswer.server.models import DisplayPriorityRequest from danswer.utils.logger import setup_logger logger = setup_logger() @@ -31,79 +31,6 @@ admin_router = APIRouter(prefix="/admin/persona") basic_router = APIRouter(prefix="/persona") -def create_update_persona( - persona_id: int | None, - create_persona_request: CreatePersonaRequest, - user: User | None, - db_session: Session, -) -> PersonaSnapshot: - user_id = user.id if user is not None else None - - # Permission to actually use these is checked later - document_sets = list( - get_document_sets_by_ids( - document_set_ids=create_persona_request.document_set_ids, - db_session=db_session, - ) - ) - prompts = list( - get_prompts_by_ids( - prompt_ids=create_persona_request.prompt_ids, - db_session=db_session, - ) - ) - - try: - persona = upsert_persona( - persona_id=persona_id, - user_id=user_id, - name=create_persona_request.name, - description=create_persona_request.description, - num_chunks=create_persona_request.num_chunks, - llm_relevance_filter=create_persona_request.llm_relevance_filter, - llm_filter_extraction=create_persona_request.llm_filter_extraction, - recency_bias=create_persona_request.recency_bias, - prompts=prompts, - document_sets=document_sets, - llm_model_version_override=create_persona_request.llm_model_version_override, - shared=create_persona_request.shared, - db_session=db_session, - ) - except ValueError as e: - logger.exception("Failed to create persona") - raise HTTPException(status_code=400, detail=str(e)) - return PersonaSnapshot.from_model(persona) - - -@admin_router.post("") -def create_persona( - create_persona_request: CreatePersonaRequest, - user: User | None = Depends(current_admin_user), - db_session: Session = Depends(get_session), -) -> PersonaSnapshot: - return create_update_persona( - persona_id=None, - create_persona_request=create_persona_request, - user=user, - db_session=db_session, - ) - - -@admin_router.patch("/{persona_id}") -def update_persona( - persona_id: int, - update_persona_request: CreatePersonaRequest, - user: User | None = Depends(current_admin_user), - db_session: Session = Depends(get_session), -) -> PersonaSnapshot: - return create_update_persona( - persona_id=persona_id, - create_persona_request=update_persona_request, - user=user, - db_session=db_session, - ) - - class IsVisibleRequest(BaseModel): is_visible: bool @@ -122,11 +49,6 @@ def patch_persona_visibility( ) -class DisplayPriorityRequest(BaseModel): - # maps persona id to display priority - display_priority_map: dict[int, int] - - @admin_router.put("/display-priority") def patch_persona_display_priority( display_priority_request: DisplayPriorityRequest, @@ -139,15 +61,95 @@ def patch_persona_display_priority( ) -@admin_router.delete("/{persona_id}") -def delete_persona( +@admin_router.get("") +def list_personas_admin( + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), + include_deleted: bool = False, +) -> list[PersonaSnapshot]: + return [ + PersonaSnapshot.from_model(persona) + for persona in get_personas( + db_session=db_session, + user_id=None, # user_id = None -> give back all personas + include_deleted=include_deleted, + ) + ] + + +@admin_router.patch("/{persona_id}/undelete") +def undelete_persona( persona_id: int, user: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), +) -> None: + mark_persona_as_not_deleted( + persona_id=persona_id, + user=user, + db_session=db_session, + ) + + +"""Endpoints for all""" + + +@basic_router.post("") +def create_persona( + create_persona_request: CreatePersonaRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> PersonaSnapshot: + return create_update_persona( + persona_id=None, + create_persona_request=create_persona_request, + user=user, + db_session=db_session, + ) + + +@basic_router.patch("/{persona_id}") +def update_persona( + persona_id: int, + update_persona_request: CreatePersonaRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> PersonaSnapshot: + return create_update_persona( + persona_id=persona_id, + create_persona_request=update_persona_request, + user=user, + db_session=db_session, + ) + + +class PersonaShareRequest(BaseModel): + user_ids: list[UUID] + + +@basic_router.patch("/{persona_id}/share") +def share_persona( + persona_id: int, + persona_share_request: PersonaShareRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + update_persona_shared_users( + persona_id=persona_id, + user_ids=persona_share_request.user_ids, + user=user, + db_session=db_session, + ) + + +@basic_router.delete("/{persona_id}") +def delete_persona( + persona_id: int, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), ) -> None: mark_persona_as_deleted( persona_id=persona_id, - user_id=user.id if user is not None else None, + user=user, db_session=db_session, ) @@ -156,11 +158,14 @@ def delete_persona( def list_personas( user: User | None = Depends(current_user), db_session: Session = Depends(get_session), + include_deleted: bool = False, ) -> list[PersonaSnapshot]: user_id = user.id if user is not None else None return [ PersonaSnapshot.from_model(persona) - for persona in get_personas(user_id=user_id, db_session=db_session) + for persona in get_personas( + user_id=user_id, include_deleted=include_deleted, db_session=db_session + ) ] @@ -173,7 +178,7 @@ def get_persona( return PersonaSnapshot.from_model( get_persona_by_id( persona_id=persona_id, - user_id=user.id if user is not None else None, + user=user, db_session=db_session, ) ) @@ -193,47 +198,3 @@ def build_final_template_prompt( retrieval_disabled=retrieval_disabled, ) ) - - -"""Utility endpoints for selecting which model to use for a persona. -Putting here for now, since we have no other flows which use this.""" - -GPT_4_MODEL_VERSIONS = [ - "gpt-4-1106-preview", - "gpt-4", - "gpt-4-32k", - "gpt-4-0613", - "gpt-4-32k-0613", - "gpt-4-0314", - "gpt-4-32k-0314", -] -GPT_3_5_TURBO_MODEL_VERSIONS = [ - "gpt-3.5-turbo-1106", - "gpt-3.5-turbo", - "gpt-3.5-turbo-16k", - "gpt-3.5-turbo-0613", - "gpt-3.5-turbo-16k-0613", - "gpt-3.5-turbo-0301", -] - - -@admin_router.get("/utils/list-available-models") -def list_available_model_versions( - _: User | None = Depends(current_admin_user), -) -> list[str]: - # currently only support selecting different models for OpenAI - if GEN_AI_MODEL_PROVIDER != "openai": - return [] - - return GPT_4_MODEL_VERSIONS + GPT_3_5_TURBO_MODEL_VERSIONS - - -@admin_router.get("/utils/default-model") -def get_default_model( - _: User | None = Depends(current_admin_user), -) -> str: - # currently only support selecting different models for OpenAI - if GEN_AI_MODEL_PROVIDER != "openai": - return "" - - return GEN_AI_MODEL_VERSION diff --git a/backend/danswer/server/features/persona/models.py b/backend/danswer/server/features/persona/models.py index 1eca57f5a..aee39e72a 100644 --- a/backend/danswer/server/features/persona/models.py +++ b/backend/danswer/server/features/persona/models.py @@ -1,61 +1,102 @@ +from uuid import UUID + from pydantic import BaseModel from danswer.db.models import Persona -from danswer.search.models import RecencyBiasSetting +from danswer.db.models import StarterMessage +from danswer.search.enums import RecencyBiasSetting from danswer.server.features.document_set.models import DocumentSet from danswer.server.features.prompt.models import PromptSnapshot +from danswer.server.features.tool.api import ToolSnapshot +from danswer.server.models import MinimalUserSnapshot +from danswer.utils.logger import setup_logger + + +logger = setup_logger() class CreatePersonaRequest(BaseModel): name: str description: str - shared: bool num_chunks: float llm_relevance_filter: bool + is_public: bool llm_filter_extraction: bool recency_bias: RecencyBiasSetting prompt_ids: list[int] document_set_ids: list[int] + # e.g. ID of SearchTool or ImageGenerationTool or + tool_ids: list[int] + llm_model_provider_override: str | None = None llm_model_version_override: str | None = None + starter_messages: list[StarterMessage] | None = None + # For Private Personas, who should be able to access these + users: list[UUID] | None = None + groups: list[int] | None = None class PersonaSnapshot(BaseModel): id: int + owner: MinimalUserSnapshot | None name: str - shared: bool is_visible: bool + is_public: bool display_priority: int | None description: str num_chunks: float | None llm_relevance_filter: bool llm_filter_extraction: bool + llm_model_provider_override: str | None llm_model_version_override: str | None + starter_messages: list[StarterMessage] | None default_persona: bool prompts: list[PromptSnapshot] + tools: list[ToolSnapshot] document_sets: list[DocumentSet] + users: list[MinimalUserSnapshot] + groups: list[int] @classmethod - def from_model(cls, persona: Persona) -> "PersonaSnapshot": + def from_model( + cls, persona: Persona, allow_deleted: bool = False + ) -> "PersonaSnapshot": if persona.deleted: - raise ValueError("Persona has been deleted") + error_msg = f"Persona with ID {persona.id} has been deleted" + if not allow_deleted: + raise ValueError(error_msg) + else: + logger.warning(error_msg) return PersonaSnapshot( id=persona.id, name=persona.name, - shared=persona.user_id is None, + owner=( + MinimalUserSnapshot(id=persona.user.id, email=persona.user.email) + if persona.user + else None + ), is_visible=persona.is_visible, + is_public=persona.is_public, display_priority=persona.display_priority, description=persona.description, num_chunks=persona.num_chunks, llm_relevance_filter=persona.llm_relevance_filter, llm_filter_extraction=persona.llm_filter_extraction, + llm_model_provider_override=persona.llm_model_provider_override, llm_model_version_override=persona.llm_model_version_override, + starter_messages=persona.starter_messages, default_persona=persona.default_persona, prompts=[PromptSnapshot.from_model(prompt) for prompt in persona.prompts], + tools=[ToolSnapshot.from_model(tool) for tool in persona.tools], document_sets=[ DocumentSet.from_model(document_set_model) for document_set_model in persona.document_sets ], + users=[ + MinimalUserSnapshot(id=user.id, email=user.email) + for user in persona.users + ], + groups=[user_group.id for user_group in persona.groups], ) diff --git a/backend/danswer/server/features/prompt/api.py b/backend/danswer/server/features/prompt/api.py index b9f27675d..24c886ab9 100644 --- a/backend/danswer/server/features/prompt/api.py +++ b/backend/danswer/server/features/prompt/api.py @@ -4,7 +4,6 @@ from fastapi import HTTPException from sqlalchemy.orm import Session from starlette import status -from danswer.auth.users import current_admin_user from danswer.auth.users import current_user from danswer.db.chat import get_personas_by_ids from danswer.db.chat import get_prompt_by_id @@ -32,8 +31,6 @@ def create_update_prompt( user: User | None, db_session: Session, ) -> PromptSnapshot: - user_id = user.id if user is not None else None - personas = ( list( get_personas_by_ids( @@ -47,7 +44,7 @@ def create_update_prompt( prompt = upsert_prompt( prompt_id=prompt_id, - user_id=user_id, + user=user, name=create_prompt_request.name, description=create_prompt_request.description, system_prompt=create_prompt_request.system_prompt, @@ -55,7 +52,6 @@ def create_update_prompt( include_citations=create_prompt_request.include_citations, datetime_aware=create_prompt_request.datetime_aware, personas=personas, - shared=create_prompt_request.shared, db_session=db_session, ) return PromptSnapshot.from_model(prompt) @@ -64,7 +60,7 @@ def create_update_prompt( @basic_router.post("") def create_prompt( create_prompt_request: CreatePromptRequest, - user: User | None = Depends(current_admin_user), + user: User | None = Depends(current_user), db_session: Session = Depends(get_session), ) -> PromptSnapshot: try: @@ -124,7 +120,7 @@ def delete_prompt( ) -> None: mark_prompt_as_deleted( prompt_id=prompt_id, - user_id=user.id if user is not None else None, + user=user, db_session=db_session, ) @@ -150,7 +146,7 @@ def get_prompt( return PromptSnapshot.from_model( get_prompt_by_id( prompt_id=prompt_id, - user_id=user.id if user is not None else None, + user=user, db_session=db_session, ) ) diff --git a/backend/danswer/server/features/prompt/models.py b/backend/danswer/server/features/prompt/models.py index 0ae70c58d..1cc9452f4 100644 --- a/backend/danswer/server/features/prompt/models.py +++ b/backend/danswer/server/features/prompt/models.py @@ -6,7 +6,6 @@ from danswer.db.models import Prompt class CreatePromptRequest(BaseModel): name: str description: str - shared: bool system_prompt: str task_prompt: str include_citations: bool = False @@ -17,7 +16,6 @@ class CreatePromptRequest(BaseModel): class PromptSnapshot(BaseModel): id: int name: str - shared: bool description: str system_prompt: str task_prompt: str @@ -34,7 +32,6 @@ class PromptSnapshot(BaseModel): return PromptSnapshot( id=prompt.id, name=prompt.name, - shared=prompt.user_id is None, description=prompt.description, system_prompt=prompt.system_prompt, task_prompt=prompt.task_prompt, diff --git a/backend/danswer/server/features/tool/api.py b/backend/danswer/server/features/tool/api.py new file mode 100644 index 000000000..0a9666646 --- /dev/null +++ b/backend/danswer/server/features/tool/api.py @@ -0,0 +1,38 @@ +from fastapi import APIRouter +from fastapi import Depends +from pydantic import BaseModel +from sqlalchemy import select +from sqlalchemy.orm import Session + +from danswer.auth.users import current_user +from danswer.db.engine import get_session +from danswer.db.models import Tool +from danswer.db.models import User + + +router = APIRouter(prefix="/tool") + + +class ToolSnapshot(BaseModel): + id: int + name: str + description: str + in_code_tool_id: str | None + + @classmethod + def from_model(cls, tool: Tool) -> "ToolSnapshot": + return cls( + id=tool.id, + name=tool.name, + description=tool.description, + in_code_tool_id=tool.in_code_tool_id, + ) + + +@router.get("") +def list_tools( + db_session: Session = Depends(get_session), + _: User | None = Depends(current_user), +) -> list[ToolSnapshot]: + tools = db_session.execute(select(Tool)).scalars().all() + return [ToolSnapshot.from_model(tool) for tool in tools] diff --git a/backend/danswer/server/gpts/api.py b/backend/danswer/server/gpts/api.py index 980003252..ca6978b57 100644 --- a/backend/danswer/server/gpts/api.py +++ b/backend/danswer/server/gpts/api.py @@ -6,13 +6,9 @@ from fastapi import Depends from pydantic import BaseModel from sqlalchemy.orm import Session -from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.engine import get_session -from danswer.document_index.factory import get_default_document_index -from danswer.search.access_filters import build_access_filters_for_user -from danswer.search.models import IndexFilters -from danswer.search.models import SearchQuery -from danswer.search.search_runner import full_chunk_search +from danswer.search.models import SearchRequest +from danswer.search.pipeline import SearchPipeline from danswer.server.danswer_api.ingestion import api_key_dep from danswer.utils.logger import setup_logger @@ -70,27 +66,13 @@ def gpt_search( _: str | None = Depends(api_key_dep), db_session: Session = Depends(get_session), ) -> GptSearchResponse: - query = search_request.query - - user_acl_filters = build_access_filters_for_user(None, db_session) - final_filters = IndexFilters(access_control_list=user_acl_filters) - - search_query = SearchQuery( - query=query, - filters=final_filters, - recency_bias_multiplier=1.0, - skip_llm_chunk_filter=True, - ) - - embedding_model = get_current_db_embedding_model(db_session) - - document_index = get_default_document_index( - primary_index_name=embedding_model.index_name, secondary_index_name=None - ) - - top_chunks, __ = full_chunk_search( - query=search_query, document_index=document_index, db_session=db_session - ) + top_chunks = SearchPipeline( + search_request=SearchRequest( + query=search_request.query, + ), + user=None, + db_session=db_session, + ).reranked_chunks return GptSearchResponse( matching_document_chunks=[ diff --git a/backend/danswer/server/manage/administrative.py b/backend/danswer/server/manage/administrative.py index 2d12ca148..c60206ca3 100644 --- a/backend/danswer/server/manage/administrative.py +++ b/backend/danswer/server/manage/administrative.py @@ -1,41 +1,48 @@ +import json from datetime import datetime from datetime import timedelta from datetime import timezone from typing import cast from fastapi import APIRouter +from fastapi import Body from fastapi import Depends from fastapi import HTTPException from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user from danswer.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ -from danswer.configs.constants import GEN_AI_API_KEY_STORAGE_KEY +from danswer.configs.app_configs import TOKEN_BUDGET_GLOBALLY_ENABLED +from danswer.configs.constants import DocumentSource +from danswer.configs.constants import ENABLE_TOKEN_BUDGET +from danswer.configs.constants import TOKEN_BUDGET +from danswer.configs.constants import TOKEN_BUDGET_SETTINGS +from danswer.configs.constants import TOKEN_BUDGET_TIME_PERIOD from danswer.db.connector_credential_pair import get_connector_credential_pair from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed from danswer.db.engine import get_session from danswer.db.feedback import fetch_docs_ranked_by_boost from danswer.db.feedback import update_document_boost from danswer.db.feedback import update_document_hidden +from danswer.db.index_attempt import cancel_indexing_attempts_for_connector from danswer.db.models import User from danswer.document_index.document_index_utils import get_both_index_names from danswer.document_index.factory import get_default_document_index -from danswer.dynamic_configs import get_dynamic_config_store +from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError -from danswer.llm.exceptions import GenAIDisabledException +from danswer.file_store.file_store import get_default_file_store from danswer.llm.factory import get_default_llm -from danswer.llm.utils import get_gen_ai_api_key from danswer.llm.utils import test_llm from danswer.server.documents.models import ConnectorCredentialPairIdentifier from danswer.server.manage.models import BoostDoc from danswer.server.manage.models import BoostUpdateRequest from danswer.server.manage.models import HiddenUpdateRequest -from danswer.server.models import ApiKey from danswer.utils.logger import setup_logger router = APIRouter(prefix="/manage") logger = setup_logger() +GEN_AI_KEY_CHECK_TIME = "genai_api_key_last_check_time" """Admin only API endpoints""" @@ -107,17 +114,16 @@ def document_hidden_update( raise HTTPException(status_code=400, detail=str(e)) -@router.head("/admin/genai-api-key/validate") +@router.get("/admin/genai-api-key/validate") def validate_existing_genai_api_key( _: User = Depends(current_admin_user), ) -> None: # Only validate every so often - check_key_time = "genai_api_key_last_check_time" kv_store = get_dynamic_config_store() curr_time = datetime.now(tz=timezone.utc) try: last_check = datetime.fromtimestamp( - cast(float, kv_store.load(check_key_time)), tz=timezone.utc + cast(float, kv_store.load(GEN_AI_KEY_CHECK_TIME)), tz=timezone.utc ) check_freq_sec = timedelta(seconds=GENERATIVE_MODEL_ACCESS_CHECK_FREQ) if curr_time - last_check < check_freq_sec: @@ -126,70 +132,18 @@ def validate_existing_genai_api_key( # First time checking the key, nothing unusual pass - genai_api_key = get_gen_ai_api_key() - try: - llm = get_default_llm(api_key=genai_api_key, timeout=10) - except GenAIDisabledException: - return + llm = get_default_llm(timeout=10) + except ValueError: + raise HTTPException(status_code=404, detail="LLM not setup") - is_valid = test_llm(llm) - - if not is_valid: - if genai_api_key is None: - raise HTTPException(status_code=404, detail="Key not found") - raise HTTPException(status_code=400, detail="Invalid API key provided") + error = test_llm(llm) + if error: + raise HTTPException(status_code=400, detail=error) # Mark check as successful - get_dynamic_config_store().store(check_key_time, curr_time.timestamp()) - - -@router.get("/admin/genai-api-key", response_model=ApiKey) -def get_gen_ai_api_key_from_dynamic_config_store( - _: User = Depends(current_admin_user), -) -> ApiKey: - """ - NOTE: Only gets value from dynamic config store as to not expose env variables. - """ - try: - # only get last 4 characters of key to not expose full key - return ApiKey( - api_key=cast( - str, get_dynamic_config_store().load(GEN_AI_API_KEY_STORAGE_KEY) - )[-4:] - ) - except ConfigNotFoundError: - raise HTTPException(status_code=404, detail="Key not found") - - -@router.put("/admin/genai-api-key") -def store_genai_api_key( - request: ApiKey, - _: User = Depends(current_admin_user), -) -> None: - try: - if not request.api_key: - raise HTTPException(400, "No API key provided") - - llm = get_default_llm(api_key=request.api_key, timeout=10) - is_valid = test_llm(llm) - - if not is_valid: - raise HTTPException(400, "Invalid API key provided") - - get_dynamic_config_store().store(GEN_AI_API_KEY_STORAGE_KEY, request.api_key) - except GenAIDisabledException: - # If Disable Generative AI is set, no need to verify, just store the key for later use - get_dynamic_config_store().store(GEN_AI_API_KEY_STORAGE_KEY, request.api_key) - except RuntimeError as e: - raise HTTPException(400, str(e)) - - -@router.delete("/admin/genai-api-key") -def delete_genai_api_key( - _: User = Depends(current_admin_user), -) -> None: - get_dynamic_config_store().delete(GEN_AI_API_KEY_STORAGE_KEY) + curr_time = datetime.now(tz=timezone.utc) + kv_store.store(GEN_AI_KEY_CHECK_TIME, curr_time.timestamp()) @router.post("/admin/deletion-attempt") @@ -215,14 +169,65 @@ def create_deletion_attempt_for_connector_id( f"'{credential_id}' does not exist. Has it already been deleted?", ) - if not check_deletion_attempt_is_allowed(connector_credential_pair=cc_pair): + # Cancel any scheduled indexing attempts + cancel_indexing_attempts_for_connector( + connector_id=connector_id, db_session=db_session, include_secondary_index=True + ) + + # Check if the deletion attempt should be allowed + deletion_attempt_disallowed_reason = check_deletion_attempt_is_allowed( + connector_credential_pair=cc_pair, db_session=db_session + ) + if deletion_attempt_disallowed_reason: raise HTTPException( status_code=400, - detail=f"Connector with ID '{connector_id}' and credential ID " - f"'{credential_id}' is not deletable. It must be both disabled AND have " - "no ongoing / planned indexing attempts.", + detail=deletion_attempt_disallowed_reason, ) cleanup_connector_credential_pair_task.apply_async( kwargs=dict(connector_id=connector_id, credential_id=credential_id), ) + + if cc_pair.connector.source == DocumentSource.FILE: + connector = cc_pair.connector + file_store = get_default_file_store(db_session) + for file_name in connector.connector_specific_config["file_locations"]: + file_store.delete_file(file_name) + + +@router.get("/admin/token-budget-settings") +def get_token_budget_settings(_: User = Depends(current_admin_user)) -> dict: + if not TOKEN_BUDGET_GLOBALLY_ENABLED: + raise HTTPException( + status_code=400, detail="Token budget is not enabled in the application." + ) + + try: + settings_json = cast( + str, get_dynamic_config_store().load(TOKEN_BUDGET_SETTINGS) + ) + settings = json.loads(settings_json) + return settings + except ConfigNotFoundError: + raise HTTPException(status_code=404, detail="Token budget settings not found.") + + +@router.put("/admin/token-budget-settings") +def update_token_budget_settings( + _: User = Depends(current_admin_user), + enable_token_budget: bool = Body(..., embed=True), + token_budget: int = Body(..., ge=0, embed=True), # Ensure non-negative + token_budget_time_period: int = Body(..., ge=1, embed=True), # Ensure positive +) -> dict[str, str]: + # Prepare the settings as a JSON string + settings_json = json.dumps( + { + ENABLE_TOKEN_BUDGET: enable_token_budget, + TOKEN_BUDGET: token_budget, + TOKEN_BUDGET_TIME_PERIOD: token_budget_time_period, + } + ) + + # Store the settings in the dynamic config store + get_dynamic_config_store().store(TOKEN_BUDGET_SETTINGS, settings_json) + return {"message": "Token budget settings updated successfully."} diff --git a/backend/danswer/server/manage/llm/api.py b/backend/danswer/server/manage/llm/api.py new file mode 100644 index 000000000..7bc4efe63 --- /dev/null +++ b/backend/danswer/server/manage/llm/api.py @@ -0,0 +1,157 @@ +from collections.abc import Callable + +from fastapi import APIRouter +from fastapi import Depends +from fastapi import HTTPException +from sqlalchemy.orm import Session + +from danswer.auth.users import current_admin_user +from danswer.auth.users import current_user +from danswer.db.engine import get_session +from danswer.db.llm import fetch_existing_llm_providers +from danswer.db.llm import remove_llm_provider +from danswer.db.llm import update_default_provider +from danswer.db.llm import upsert_llm_provider +from danswer.db.models import User +from danswer.llm.factory import get_default_llm +from danswer.llm.factory import get_llm +from danswer.llm.llm_provider_options import fetch_available_well_known_llms +from danswer.llm.llm_provider_options import WellKnownLLMProviderDescriptor +from danswer.llm.utils import test_llm +from danswer.server.manage.llm.models import FullLLMProvider +from danswer.server.manage.llm.models import LLMProviderDescriptor +from danswer.server.manage.llm.models import LLMProviderUpsertRequest +from danswer.server.manage.llm.models import TestLLMRequest +from danswer.utils.logger import setup_logger +from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel + +logger = setup_logger() + + +admin_router = APIRouter(prefix="/admin/llm") +basic_router = APIRouter(prefix="/llm") + + +@admin_router.get("/built-in/options") +def fetch_llm_options( + _: User | None = Depends(current_admin_user), +) -> list[WellKnownLLMProviderDescriptor]: + return fetch_available_well_known_llms() + + +@admin_router.post("/test") +def test_llm_configuration( + test_llm_request: TestLLMRequest, + _: User | None = Depends(current_admin_user), +) -> None: + llm = get_llm( + provider=test_llm_request.provider, + model=test_llm_request.default_model_name, + api_key=test_llm_request.api_key, + api_base=test_llm_request.api_base, + api_version=test_llm_request.api_version, + custom_config=test_llm_request.custom_config, + ) + functions_with_args: list[tuple[Callable, tuple]] = [(test_llm, (llm,))] + + if ( + test_llm_request.default_fast_model_name + and test_llm_request.default_fast_model_name + != test_llm_request.default_model_name + ): + fast_llm = get_llm( + provider=test_llm_request.provider, + model=test_llm_request.default_fast_model_name, + api_key=test_llm_request.api_key, + api_base=test_llm_request.api_base, + api_version=test_llm_request.api_version, + custom_config=test_llm_request.custom_config, + ) + functions_with_args.append((test_llm, (fast_llm,))) + + parallel_results = run_functions_tuples_in_parallel( + functions_with_args, allow_failures=False + ) + error = parallel_results[0] or ( + parallel_results[1] if len(parallel_results) > 1 else None + ) + + if error: + raise HTTPException(status_code=400, detail=error) + + +@admin_router.post("/test/default") +def test_default_provider( + _: User | None = Depends(current_admin_user), +) -> None: + try: + llm = get_default_llm() + fast_llm = get_default_llm(use_fast_llm=True) + except ValueError: + logger.exception("Failed to fetch default LLM Provider") + raise HTTPException(status_code=400, detail="No LLM Provider setup") + + functions_with_args: list[tuple[Callable, tuple]] = [ + (test_llm, (llm,)), + (test_llm, (fast_llm,)), + ] + parallel_results = run_functions_tuples_in_parallel( + functions_with_args, allow_failures=False + ) + error = parallel_results[0] or ( + parallel_results[1] if len(parallel_results) > 1 else None + ) + if error: + raise HTTPException(status_code=400, detail=error) + + +@admin_router.get("/provider") +def list_llm_providers( + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> list[FullLLMProvider]: + return [ + FullLLMProvider.from_model(llm_provider_model) + for llm_provider_model in fetch_existing_llm_providers(db_session) + ] + + +@admin_router.put("/provider") +def put_llm_provider( + llm_provider: LLMProviderUpsertRequest, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> FullLLMProvider: + return upsert_llm_provider(db_session, llm_provider) + + +@admin_router.delete("/provider/{provider_id}") +def delete_llm_provider( + provider_id: int, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> None: + remove_llm_provider(db_session, provider_id) + + +@admin_router.post("/provider/{provider_id}/default") +def set_provider_as_default( + provider_id: int, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> None: + update_default_provider(db_session, provider_id) + + +"""Endpoints for all""" + + +@basic_router.get("/provider") +def list_llm_provider_basics( + _: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> list[LLMProviderDescriptor]: + return [ + LLMProviderDescriptor.from_model(llm_provider_model) + for llm_provider_model in fetch_existing_llm_providers(db_session) + ] diff --git a/backend/danswer/server/manage/llm/models.py b/backend/danswer/server/manage/llm/models.py new file mode 100644 index 000000000..0e791696a --- /dev/null +++ b/backend/danswer/server/manage/llm/models.py @@ -0,0 +1,93 @@ +from typing import TYPE_CHECKING + +from pydantic import BaseModel + +from danswer.llm.llm_provider_options import fetch_models_for_provider + +if TYPE_CHECKING: + from danswer.db.models import LLMProvider as LLMProviderModel + + +class TestLLMRequest(BaseModel): + # provider level + provider: str + api_key: str | None = None + api_base: str | None = None + api_version: str | None = None + custom_config: dict[str, str] | None = None + + # model level + default_model_name: str + default_fast_model_name: str | None = None + + +class LLMProviderDescriptor(BaseModel): + """A descriptor for an LLM provider that can be safely viewed by + non-admin users. Used when giving a list of available LLMs.""" + + name: str + provider: str + model_names: list[str] + default_model_name: str + fast_default_model_name: str | None + is_default_provider: bool | None + + @classmethod + def from_model( + cls, llm_provider_model: "LLMProviderModel" + ) -> "LLMProviderDescriptor": + return cls( + name=llm_provider_model.name, + provider=llm_provider_model.provider, + default_model_name=llm_provider_model.default_model_name, + fast_default_model_name=llm_provider_model.fast_default_model_name, + is_default_provider=llm_provider_model.is_default_provider, + model_names=( + llm_provider_model.model_names + or fetch_models_for_provider(llm_provider_model.provider) + or [llm_provider_model.default_model_name] + ), + ) + + +class LLMProvider(BaseModel): + name: str + provider: str + api_key: str | None + api_base: str | None + api_version: str | None + custom_config: dict[str, str] | None + default_model_name: str + fast_default_model_name: str | None + + +class LLMProviderUpsertRequest(LLMProvider): + # should only be used for a "custom" provider + # for default providers, the built-in model names are used + model_names: list[str] | None + + +class FullLLMProvider(LLMProvider): + id: int + is_default_provider: bool | None + model_names: list[str] + + @classmethod + def from_model(cls, llm_provider_model: "LLMProviderModel") -> "FullLLMProvider": + return cls( + id=llm_provider_model.id, + name=llm_provider_model.name, + provider=llm_provider_model.provider, + api_key=llm_provider_model.api_key, + api_base=llm_provider_model.api_base, + api_version=llm_provider_model.api_version, + custom_config=llm_provider_model.custom_config, + default_model_name=llm_provider_model.default_model_name, + fast_default_model_name=llm_provider_model.fast_default_model_name, + is_default_provider=llm_provider_model.is_default_provider, + model_names=( + llm_provider_model.model_names + or fetch_models_for_provider(llm_provider_model.provider) + or [llm_provider_model.default_model_name] + ), + ) diff --git a/backend/danswer/server/manage/models.py b/backend/danswer/server/manage/models.py index a22060511..8797913f4 100644 --- a/backend/danswer/server/manage/models.py +++ b/backend/danswer/server/manage/models.py @@ -1,4 +1,5 @@ from typing import Any +from typing import TYPE_CHECKING from pydantic import BaseModel from pydantic import root_validator @@ -9,8 +10,14 @@ from danswer.configs.constants import AuthType from danswer.danswerbot.slack.config import VALID_SLACK_FILTERS from danswer.db.models import AllowedAnswerFilters from danswer.db.models import ChannelConfig +from danswer.db.models import SlackBotConfig as SlackBotConfigModel +from danswer.db.models import SlackBotResponseType +from danswer.indexing.models import EmbeddingModelDetail from danswer.server.features.persona.models import PersonaSnapshot +if TYPE_CHECKING: + from danswer.db.models import User as UserModel + class VersionResponse(BaseModel): backend_version: str @@ -23,6 +30,10 @@ class AuthTypeResponse(BaseModel): requires_verification: bool +class UserPreferences(BaseModel): + chosen_assistants: list[int] | None + + class UserInfo(BaseModel): id: str email: str @@ -30,6 +41,19 @@ class UserInfo(BaseModel): is_superuser: bool is_verified: bool role: UserRole + preferences: UserPreferences + + @classmethod + def from_model(cls, user: "UserModel") -> "UserInfo": + return cls( + id=str(user.id), + email=user.email, + is_active=user.is_active, + is_superuser=user.is_superuser, + is_verified=user.is_verified, + role=user.role, + preferences=(UserPreferences(chosen_assistants=user.chosen_assistants)), + ) class UserByEmail(BaseModel): @@ -81,6 +105,7 @@ class SlackBotConfigCreationRequest(BaseModel): answer_filters: list[AllowedAnswerFilters] = [] # list of user emails follow_up_tags: list[str] | None = None + response_type: SlackBotResponseType @validator("answer_filters", pre=True) def validate_filters(cls, value: list[str]) -> list[str]: @@ -104,12 +129,26 @@ class SlackBotConfig(BaseModel): id: int persona: PersonaSnapshot | None channel_config: ChannelConfig + response_type: SlackBotResponseType - -class ModelVersionResponse(BaseModel): - model_name: str | None # None only applicable to secondary index + @classmethod + def from_model( + cls, slack_bot_config_model: SlackBotConfigModel + ) -> "SlackBotConfig": + return cls( + id=slack_bot_config_model.id, + persona=( + PersonaSnapshot.from_model( + slack_bot_config_model.persona, allow_deleted=True + ) + if slack_bot_config_model.persona + else None + ), + channel_config=slack_bot_config_model.channel_config, + response_type=slack_bot_config_model.response_type, + ) class FullModelVersionResponse(BaseModel): - current_model_name: str - secondary_model_name: str | None + current_model: EmbeddingModelDetail + secondary_model: EmbeddingModelDetail | None diff --git a/backend/danswer/server/manage/secondary_index.py b/backend/danswer/server/manage/secondary_index.py index 2013cfc5b..6f5adf752 100644 --- a/backend/danswer/server/manage/secondary_index.py +++ b/backend/danswer/server/manage/secondary_index.py @@ -6,6 +6,9 @@ from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user from danswer.auth.users import current_user +from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP +from danswer.db.connector_credential_pair import get_connector_credential_pairs +from danswer.db.connector_credential_pair import resync_cc_pair from danswer.db.embedding_model import create_embedding_model from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.embedding_model import get_secondary_db_embedding_model @@ -17,7 +20,6 @@ from danswer.db.models import User from danswer.document_index.factory import get_default_document_index from danswer.indexing.models import EmbeddingModelDetail from danswer.server.manage.models import FullModelVersionResponse -from danswer.server.manage.models import ModelVersionResponse from danswer.server.models import IdReturn from danswer.utils.logger import setup_logger @@ -78,6 +80,14 @@ def set_new_embedding_model( secondary_index_embedding_dim=new_model.model_dim, ) + # Pause index attempts for the currently in use index to preserve resources + if DISABLE_INDEX_UPDATE_ON_SWAP: + expire_index_attempts( + embedding_model_id=current_model.id, db_session=db_session + ) + for cc_pair in get_connector_credential_pairs(db_session): + resync_cc_pair(cc_pair, db_session=db_session) + return IdReturn(id=new_model.id) @@ -104,21 +114,21 @@ def cancel_new_embedding( def get_current_embedding_model( _: User | None = Depends(current_user), db_session: Session = Depends(get_session), -) -> ModelVersionResponse: +) -> EmbeddingModelDetail: current_model = get_current_db_embedding_model(db_session) - return ModelVersionResponse(model_name=current_model.model_name) + return EmbeddingModelDetail.from_model(current_model) @router.get("/get-secondary-embedding-model") def get_secondary_embedding_model( _: User | None = Depends(current_user), db_session: Session = Depends(get_session), -) -> ModelVersionResponse: +) -> EmbeddingModelDetail | None: next_model = get_secondary_db_embedding_model(db_session) + if not next_model: + return None - return ModelVersionResponse( - model_name=next_model.model_name if next_model else None - ) + return EmbeddingModelDetail.from_model(next_model) @router.get("/get-embedding-models") @@ -129,6 +139,8 @@ def get_embedding_models( current_model = get_current_db_embedding_model(db_session) next_model = get_secondary_db_embedding_model(db_session) return FullModelVersionResponse( - current_model_name=current_model.model_name, - secondary_model_name=next_model.model_name if next_model else None, + current_model=EmbeddingModelDetail.from_model(current_model), + secondary_model=EmbeddingModelDetail.from_model(next_model) + if next_model + else None, ) diff --git a/backend/danswer/server/manage/slack_bot.py b/backend/danswer/server/manage/slack_bot.py index 9720f1f5a..40e8663b0 100644 --- a/backend/danswer/server/manage/slack_bot.py +++ b/backend/danswer/server/manage/slack_bot.py @@ -19,7 +19,6 @@ from danswer.db.slack_bot_config import insert_slack_bot_config from danswer.db.slack_bot_config import remove_slack_bot_config from danswer.db.slack_bot_config import update_slack_bot_config from danswer.dynamic_configs.interface import ConfigNotFoundError -from danswer.server.features.persona.models import PersonaSnapshot from danswer.server.manage.models import SlackBotConfig from danswer.server.manage.models import SlackBotConfigCreationRequest from danswer.server.manage.models import SlackBotTokens @@ -108,17 +107,10 @@ def create_slack_bot_config( slack_bot_config_model = insert_slack_bot_config( persona_id=persona_id, channel_config=channel_config, + response_type=slack_bot_config_creation_request.response_type, db_session=db_session, ) - return SlackBotConfig( - id=slack_bot_config_model.id, - persona=( - PersonaSnapshot.from_model(slack_bot_config_model.persona) - if slack_bot_config_model.persona - else None - ), - channel_config=slack_bot_config_model.channel_config, - ) + return SlackBotConfig.from_model(slack_bot_config_model) @router.patch("/admin/slack-bot/config/{slack_bot_config_id}") @@ -148,7 +140,7 @@ def patch_slack_bot_config( existing_persona_id = existing_slack_bot_config.persona_id if existing_persona_id is not None: persona = get_persona_by_id( - persona_id=existing_persona_id, user_id=None, db_session=db_session + persona_id=existing_persona_id, user=None, db_session=db_session ) if not persona.name.startswith(SLACK_BOT_PERSONA_PREFIX): @@ -170,17 +162,10 @@ def patch_slack_bot_config( slack_bot_config_id=slack_bot_config_id, persona_id=persona_id, channel_config=channel_config, + response_type=slack_bot_config_creation_request.response_type, db_session=db_session, ) - return SlackBotConfig( - id=slack_bot_config_model.id, - persona=( - PersonaSnapshot.from_model(slack_bot_config_model.persona) - if slack_bot_config_model.persona - else None - ), - channel_config=slack_bot_config_model.channel_config, - ) + return SlackBotConfig.from_model(slack_bot_config_model) @router.delete("/admin/slack-bot/config/{slack_bot_config_id}") @@ -201,26 +186,21 @@ def list_slack_bot_configs( ) -> list[SlackBotConfig]: slack_bot_config_models = fetch_slack_bot_configs(db_session=db_session) return [ - SlackBotConfig( - id=slack_bot_config_model.id, - persona=( - PersonaSnapshot.from_model(slack_bot_config_model.persona) - if slack_bot_config_model.persona - else None - ), - channel_config=slack_bot_config_model.channel_config, - ) + SlackBotConfig.from_model(slack_bot_config_model) for slack_bot_config_model in slack_bot_config_models ] @router.put("/admin/slack-bot/tokens") -def put_tokens(tokens: SlackBotTokens) -> None: +def put_tokens( + tokens: SlackBotTokens, + _: User | None = Depends(current_admin_user), +) -> None: save_tokens(tokens=tokens) @router.get("/admin/slack-bot/tokens") -def get_tokens() -> SlackBotTokens: +def get_tokens(_: User | None = Depends(current_admin_user)) -> SlackBotTokens: try: return fetch_tokens() except ConfigNotFoundError: diff --git a/backend/danswer/server/manage/users.py b/backend/danswer/server/manage/users.py index 539d7212f..a643f4e75 100644 --- a/backend/danswer/server/manage/users.py +++ b/backend/danswer/server/manage/users.py @@ -2,45 +2,72 @@ from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException from fastapi import status -from fastapi_users.db import SQLAlchemyUserDatabase -from fastapi_users_db_sqlalchemy import UUID_ID -from sqlalchemy.ext.asyncio import AsyncSession +from pydantic import BaseModel +from sqlalchemy import update from sqlalchemy.orm import Session +from danswer.auth.noauth_user import fetch_no_auth_user +from danswer.auth.noauth_user import set_no_auth_user_preferences from danswer.auth.schemas import UserRead from danswer.auth.schemas import UserRole from danswer.auth.users import current_admin_user from danswer.auth.users import current_user -from danswer.auth.users import optional_valid_user +from danswer.auth.users import optional_user +from danswer.configs.app_configs import AUTH_TYPE +from danswer.configs.constants import AuthType from danswer.db.engine import get_session -from danswer.db.engine import get_sqlalchemy_async_engine from danswer.db.models import User +from danswer.db.users import get_user_by_email from danswer.db.users import list_users +from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.server.manage.models import UserByEmail from danswer.server.manage.models import UserInfo from danswer.server.manage.models import UserRoleResponse +from danswer.server.models import MinimalUserSnapshot -router = APIRouter(prefix="/manage") +router = APIRouter() -@router.patch("/promote-user-to-admin") -async def promote_admin( - user_email: UserByEmail, user: User = Depends(current_admin_user) +@router.patch("/manage/promote-user-to-admin") +def promote_admin( + user_email: UserByEmail, + _: User = Depends(current_admin_user), + db_session: Session = Depends(get_session), ) -> None: - if user.role != UserRole.ADMIN: - raise HTTPException(status_code=401, detail="Unauthorized") - async with AsyncSession(get_sqlalchemy_async_engine()) as asession: - user_db = SQLAlchemyUserDatabase[User, UUID_ID](asession, User) - user_to_promote = await user_db.get_by_email(user_email.user_email) - if not user_to_promote: - raise HTTPException(status_code=404, detail="User not found") - user_to_promote.role = UserRole.ADMIN - asession.add(user_to_promote) - await asession.commit() - return + user_to_promote = get_user_by_email( + email=user_email.user_email, db_session=db_session + ) + if not user_to_promote: + raise HTTPException(status_code=404, detail="User not found") + + user_to_promote.role = UserRole.ADMIN + db_session.add(user_to_promote) + db_session.commit() -@router.get("/users") +@router.patch("/manage/demote-admin-to-basic") +async def demote_admin( + user_email: UserByEmail, + user: User = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> None: + user_to_demote = get_user_by_email( + email=user_email.user_email, db_session=db_session + ) + if not user_to_demote: + raise HTTPException(status_code=404, detail="User not found") + + if user_to_demote.id == user.id: + raise HTTPException( + status_code=400, detail="Cannot demote yourself from admin role!" + ) + + user_to_demote.role = UserRole.BASIC + db_session.add(user_to_demote) + db_session.commit() + + +@router.get("/manage/users") def list_all_users( _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), @@ -49,7 +76,19 @@ def list_all_users( return [UserRead.from_orm(user) for user in users] -@router.get("/get-user-role", response_model=UserRoleResponse) +"""Endpoints for all""" + + +@router.get("/users") +def list_all_users_basic_info( + _: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> list[MinimalUserSnapshot]: + users = list_users(db_session) + return [MinimalUserSnapshot(id=user.id, email=user.email) for user in users] + + +@router.get("/get-user-role") async def get_user_role(user: User = Depends(current_user)) -> UserRoleResponse: if user is None: raise ValueError("Invalid or missing user.") @@ -57,20 +96,53 @@ async def get_user_role(user: User = Depends(current_user)) -> UserRoleResponse: @router.get("/me") -def verify_user_logged_in(user: User | None = Depends(optional_valid_user)) -> UserInfo: +def verify_user_logged_in( + user: User | None = Depends(optional_user), +) -> UserInfo: # NOTE: this does not use `current_user` / `current_admin_user` because we don't want # to enforce user verification here - the frontend always wants to get the info about # the current user regardless of if they are currently verified if user is None: + # if auth type is disabled, return a dummy user with preferences from + # the key-value store + if AUTH_TYPE == AuthType.DISABLED: + store = get_dynamic_config_store() + return fetch_no_auth_user(store) + raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail="User Not Authenticated" ) - return UserInfo( - id=str(user.id), - email=user.email, - is_active=user.is_active, - is_superuser=user.is_superuser, - is_verified=user.is_verified, - role=user.role, + return UserInfo.from_model(user) + + +"""APIs to adjust user preferences""" + + +class ChosenAssistantsRequest(BaseModel): + chosen_assistants: list[int] + + +@router.patch("/user/assistant-list") +def update_user_assistant_list( + request: ChosenAssistantsRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + if user is None: + if AUTH_TYPE == AuthType.DISABLED: + store = get_dynamic_config_store() + + no_auth_user = fetch_no_auth_user(store) + no_auth_user.preferences.chosen_assistants = request.chosen_assistants + set_no_auth_user_preferences(store, no_auth_user.preferences) + return + else: + raise RuntimeError("This should never happen") + + db_session.execute( + update(User) + .where(User.id == user.id) # type: ignore + .values(chosen_assistants=request.chosen_assistants) ) + db_session.commit() diff --git a/backend/danswer/server/middleware/latency_logging.py b/backend/danswer/server/middleware/latency_logging.py new file mode 100644 index 000000000..f2bc3127a --- /dev/null +++ b/backend/danswer/server/middleware/latency_logging.py @@ -0,0 +1,23 @@ +import logging +import time +from collections.abc import Awaitable +from collections.abc import Callable + +from fastapi import FastAPI +from fastapi import Request +from fastapi import Response + + +def add_latency_logging_middleware(app: FastAPI, logger: logging.LoggerAdapter) -> None: + @app.middleware("http") + async def log_latency( + request: Request, call_next: Callable[[Request], Awaitable[Response]] + ) -> Response: + start_time = time.monotonic() + response = await call_next(request) + process_time = time.monotonic() - start_time + logger.info( + f"Path: {request.url.path} - Method: {request.method} - " + f"Status Code: {response.status_code} - Time: {process_time:.4f} secs" + ) + return response diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py index d616edd4f..21349ae07 100644 --- a/backend/danswer/server/models.py +++ b/backend/danswer/server/models.py @@ -1,6 +1,7 @@ from typing import Generic from typing import Optional from typing import TypeVar +from uuid import UUID from pydantic import BaseModel from pydantic.generics import GenericModel @@ -21,3 +22,12 @@ class ApiKey(BaseModel): class IdReturn(BaseModel): id: int + + +class MinimalUserSnapshot(BaseModel): + id: UUID + email: str + + +class DisplayPriorityRequest(BaseModel): + display_priority_map: dict[int, int] diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py index 66c69fa87..97dcc62d0 100644 --- a/backend/danswer/server/query_and_chat/chat_backend.py +++ b/backend/danswer/server/query_and_chat/chat_backend.py @@ -1,20 +1,29 @@ +import io +import uuid + from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException +from fastapi import Response +from fastapi import UploadFile from fastapi.responses import StreamingResponse from pydantic import BaseModel from sqlalchemy.orm import Session from danswer.auth.users import current_user -from danswer.chat.chat_utils import compute_max_document_tokens from danswer.chat.chat_utils import create_chat_chain from danswer.chat.process_message import stream_chat_message +from danswer.configs.app_configs import WEB_DOMAIN +from danswer.configs.constants import FileOrigin +from danswer.configs.constants import MessageType from danswer.db.chat import create_chat_session +from danswer.db.chat import create_new_chat_message from danswer.db.chat import delete_chat_session from danswer.db.chat import get_chat_message from danswer.db.chat import get_chat_messages_by_session from danswer.db.chat import get_chat_session_by_id from danswer.db.chat import get_chat_sessions_by_user +from danswer.db.chat import get_or_create_root_message from danswer.db.chat import get_persona_by_id from danswer.db.chat import set_as_latest_chat_message from danswer.db.chat import translate_db_message_to_chat_message_detail @@ -25,6 +34,14 @@ from danswer.db.feedback import create_doc_retrieval_feedback from danswer.db.models import User from danswer.document_index.document_index_utils import get_both_index_names from danswer.document_index.factory import get_default_document_index +from danswer.file_processing.extract_file_text import extract_file_text +from danswer.file_store.file_store import get_default_file_store +from danswer.file_store.models import ChatFileType +from danswer.file_store.models import FileDescriptor +from danswer.llm.answering.prompts.citations_prompt import ( + compute_max_document_tokens_for_persona, +) +from danswer.llm.utils import get_default_llm_tokenizer from danswer.secondary_llm_flows.chat_session_naming import ( get_renamed_conversation_name, ) @@ -35,8 +52,11 @@ from danswer.server.query_and_chat.models import ChatSessionCreationRequest from danswer.server.query_and_chat.models import ChatSessionDetailResponse from danswer.server.query_and_chat.models import ChatSessionDetails from danswer.server.query_and_chat.models import ChatSessionsResponse +from danswer.server.query_and_chat.models import ChatSessionUpdateRequest from danswer.server.query_and_chat.models import CreateChatMessageRequest from danswer.server.query_and_chat.models import CreateChatSessionID +from danswer.server.query_and_chat.models import LLMOverride +from danswer.server.query_and_chat.models import PromptOverride from danswer.server.query_and_chat.models import RenameChatSessionResponse from danswer.server.query_and_chat.models import SearchFeedbackRequest from danswer.utils.logger import setup_logger @@ -64,6 +84,8 @@ def get_user_chat_sessions( name=chat.description, persona_id=chat.persona_id, time_created=chat.time_created.isoformat(), + shared_status=chat.shared_status, + folder_id=chat.folder_id, ) for chat in chat_sessions ] @@ -73,6 +95,7 @@ def get_user_chat_sessions( @router.get("/get-chat-session/{session_id}") def get_chat_session( session_id: int, + is_shared: bool = False, user: User | None = Depends(current_user), db_session: Session = Depends(get_session), ) -> ChatSessionDetailResponse: @@ -80,22 +103,43 @@ def get_chat_session( try: chat_session = get_chat_session_by_id( - chat_session_id=session_id, user_id=user_id, db_session=db_session + chat_session_id=session_id, + user_id=user_id, + db_session=db_session, + is_shared=is_shared, ) except ValueError: raise ValueError("Chat session does not exist or has been deleted") + # for chat-seeding: if the session is unassigned, assign it now. This is done here + # to avoid another back and forth between FE -> BE before starting the first + # message generation + if chat_session.user_id is None and user_id is not None: + chat_session.user_id = user_id + db_session.commit() + session_messages = get_chat_messages_by_session( - chat_session_id=session_id, user_id=user_id, db_session=db_session + chat_session_id=session_id, + user_id=user_id, + db_session=db_session, + # we already did a permission check above with the call to + # `get_chat_session_by_id`, so we can skip it here + skip_permission_check=True, ) return ChatSessionDetailResponse( chat_session_id=session_id, description=chat_session.description, persona_id=chat_session.persona_id, + persona_name=chat_session.persona.name, messages=[ - translate_db_message_to_chat_message_detail(msg) for msg in session_messages + translate_db_message_to_chat_message_detail( + msg, remove_doc_content=is_shared # if shared, don't leak doc content + ) + for msg in session_messages ], + time_created=chat_session.time_created, + shared_status=chat_session.shared_status, ) @@ -109,7 +153,8 @@ def create_new_chat_session( try: new_chat_session = create_chat_session( db_session=db_session, - description="", # Leave the naming till later to prevent delay + description=chat_session_creation_request.description + or "", # Leave the naming till later to prevent delay user_id=user_id, persona_id=chat_session_creation_request.persona_id, ) @@ -133,7 +178,12 @@ def rename_chat_session( logger.info(f"Received rename request for chat session: {chat_session_id}") if name: - update_chat_session(user_id, chat_session_id, name, db_session) + update_chat_session( + db_session=db_session, + user_id=user_id, + chat_session_id=chat_session_id, + description=name, + ) return RenameChatSessionResponse(new_name=name) final_msg, history_msgs = create_chat_chain( @@ -143,11 +193,33 @@ def rename_chat_session( new_name = get_renamed_conversation_name(full_history=full_history) - update_chat_session(user_id, chat_session_id, new_name, db_session) + update_chat_session( + db_session=db_session, + user_id=user_id, + chat_session_id=chat_session_id, + description=new_name, + ) return RenameChatSessionResponse(new_name=new_name) +@router.patch("/chat-session/{session_id}") +def patch_chat_session( + session_id: int, + chat_session_update_req: ChatSessionUpdateRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + user_id = user.id if user is not None else None + update_chat_session( + db_session=db_session, + user_id=user_id, + chat_session_id=session_id, + sharing_status=chat_session_update_req.sharing_status, + ) + return None + + @router.delete("/delete-chat-session/{session_id}") def delete_chat_session_by_id( session_id: int, @@ -162,24 +234,28 @@ def delete_chat_session_by_id( def handle_new_chat_message( chat_message_req: CreateChatMessageRequest, user: User | None = Depends(current_user), - db_session: Session = Depends(get_session), ) -> StreamingResponse: """This endpoint is both used for all the following purposes: - Sending a new message in the session - Regenerating a message in the session (just send the same one again) - Editing a message (similar to regenerating but sending a different message) + - Kicking off a seeded chat session (set `use_existing_user_message`) To avoid extra overhead/latency, this assumes (and checks) that previous messages on the path have already been set as latest""" - logger.info(f"Received new chat message: {chat_message_req.message}") + logger.debug(f"Received new chat message: {chat_message_req.message}") - if not chat_message_req.message and chat_message_req.prompt_id is not None: + if ( + not chat_message_req.message + and chat_message_req.prompt_id is not None + and not chat_message_req.use_existing_user_message + ): raise HTTPException(status_code=400, detail="Empty chat message is invalid") packets = stream_chat_message( new_msg_req=chat_message_req, user=user, - db_session=db_session, + use_existing_user_message=chat_message_req.use_existing_user_message, ) return StreamingResponse(packets, media_type="application/json") @@ -217,6 +293,7 @@ def create_chat_feedback( create_chat_message_feedback( is_positive=feedback.is_positive, feedback_text=feedback.feedback_text, + predefined_feedback=feedback.predefined_feedback, chat_message_id=feedback.chat_message_id, user_id=user_id, db_session=db_session, @@ -262,12 +339,199 @@ def get_max_document_tokens( try: persona = get_persona_by_id( persona_id=persona_id, - user_id=user.id if user else None, + user=user, db_session=db_session, ) except ValueError: raise HTTPException(status_code=404, detail="Persona not found") return MaxSelectedDocumentTokens( - max_tokens=compute_max_document_tokens(persona), + max_tokens=compute_max_document_tokens_for_persona(persona), ) + + +"""Endpoints for chat seeding""" + + +class ChatSeedRequest(BaseModel): + # standard chat session stuff + persona_id: int + prompt_id: int | None = None + + # overrides / seeding + llm_override: LLMOverride | None = None + prompt_override: PromptOverride | None = None + description: str | None = None + message: str | None = None + + # TODO: support this + # initial_message_retrieval_options: RetrievalDetails | None = None + + +class ChatSeedResponse(BaseModel): + redirect_url: str + + +@router.post("/seed-chat-session") +def seed_chat( + chat_seed_request: ChatSeedRequest, + # NOTE: realistically, this will be an API key not an actual user + _: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> ChatSeedResponse: + try: + new_chat_session = create_chat_session( + db_session=db_session, + description=chat_seed_request.description or "", + user_id=None, # this chat session is "unassigned" until a user visits the web UI + persona_id=chat_seed_request.persona_id, + llm_override=chat_seed_request.llm_override, + prompt_override=chat_seed_request.prompt_override, + ) + except Exception as e: + logger.exception(e) + raise HTTPException(status_code=400, detail="Invalid Persona provided.") + + if chat_seed_request.message is not None: + root_message = get_or_create_root_message( + chat_session_id=new_chat_session.id, db_session=db_session + ) + create_new_chat_message( + chat_session_id=new_chat_session.id, + parent_message=root_message, + prompt_id=chat_seed_request.prompt_id + or ( + new_chat_session.persona.prompts[0].id + if new_chat_session.persona.prompts + else None + ), + message=chat_seed_request.message, + token_count=len( + get_default_llm_tokenizer().encode(chat_seed_request.message) + ), + message_type=MessageType.USER, + db_session=db_session, + ) + + return ChatSeedResponse( + redirect_url=f"{WEB_DOMAIN}/chat?chatId={new_chat_session.id}&seeded=true" + ) + + +"""File upload""" + + +@router.post("/file") +def upload_files_for_chat( + files: list[UploadFile], + db_session: Session = Depends(get_session), + _: User | None = Depends(current_user), +) -> dict[str, list[FileDescriptor]]: + image_content_types = {"image/jpeg", "image/png", "image/webp"} + text_content_types = { + "text/plain", + "text/csv", + "text/markdown", + "text/x-markdown", + "text/x-config", + "text/tab-separated-values", + "application/json", + "application/xml", + "application/x-yaml", + } + document_content_types = { + "application/pdf", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "message/rfc822", + "application/epub+zip", + } + + allowed_content_types = image_content_types.union(text_content_types).union( + document_content_types + ) + + for file in files: + if file.content_type not in allowed_content_types: + if file.content_type in image_content_types: + error_detail = "Unsupported image file type. Supported image types include .jpg, .jpeg, .png, .webp." + elif file.content_type in text_content_types: + error_detail = "Unsupported text file type. Supported text types include .txt, .csv, .md, .mdx, .conf, " + ".log, .tsv." + else: + error_detail = ( + "Unsupported document file type. Supported document types include .pdf, .docx, .pptx, .xlsx, " + ".json, .xml, .yml, .yaml, .eml, .epub." + ) + raise HTTPException(status_code=400, detail=error_detail) + + if ( + file.content_type in image_content_types + and file.size + and file.size > 20 * 1024 * 1024 + ): + raise HTTPException( + status_code=400, + detail="File size must be less than 20MB", + ) + + file_store = get_default_file_store(db_session) + + file_info: list[tuple[str, str | None, ChatFileType]] = [] + for file in files: + if file.content_type in image_content_types: + file_type = ChatFileType.IMAGE + elif file.content_type in document_content_types: + file_type = ChatFileType.DOC + else: + file_type = ChatFileType.PLAIN_TEXT + + # store the raw file + file_id = str(uuid.uuid4()) + file_store.save_file( + file_name=file_id, + content=file.file, + display_name=file.filename, + file_origin=FileOrigin.CHAT_UPLOAD, + file_type=file.content_type or file_type.value, + ) + + # if the file is a doc, extract text and store that so we don't need + # to re-extract it every time we send a message + if file_type == ChatFileType.DOC: + extracted_text = extract_file_text(file_name=file.filename, file=file.file) + text_file_id = str(uuid.uuid4()) + file_store.save_file( + file_name=text_file_id, + content=io.BytesIO(extracted_text.encode()), + display_name=file.filename, + file_origin=FileOrigin.CHAT_UPLOAD, + file_type="text/plain", + ) + # for DOC type, just return this for the FileDescriptor + # as we would always use this as the ID to attach to the + # message + file_info.append((text_file_id, file.filename, ChatFileType.PLAIN_TEXT)) + else: + file_info.append((file_id, file.filename, file_type)) + + return { + "files": [ + {"id": file_id, "type": file_type, "name": file_name} + for file_id, file_name, file_type in file_info + ] + } + + +@router.get("/file/{file_id}") +def fetch_chat_file( + file_id: str, + db_session: Session = Depends(get_session), + _: User | None = Depends(current_user), +) -> Response: + file_store = get_default_file_store(db_session) + file_io = file_store.read_file(file_id, mode="b") + # NOTE: specifying "image/jpeg" here, but it still works for pngs + # TODO: do this properly + return Response(content=file_io.read(), media_type="image/jpeg") diff --git a/backend/danswer/server/query_and_chat/models.py b/backend/danswer/server/query_and_chat/models.py index b37d89130..44e8ab846 100644 --- a/backend/danswer/server/query_and_chat/models.py +++ b/backend/danswer/server/query_and_chat/models.py @@ -8,10 +8,14 @@ from danswer.chat.models import RetrievalDocs from danswer.configs.constants import DocumentSource from danswer.configs.constants import MessageType from danswer.configs.constants import SearchFeedbackType +from danswer.db.enums import ChatSessionSharedStatus +from danswer.file_store.models import FileDescriptor +from danswer.llm.override_models import LLMOverride +from danswer.llm.override_models import PromptOverride from danswer.search.models import BaseFilters +from danswer.search.models import ChunkContext from danswer.search.models import RetrievalDetails from danswer.search.models import SearchDoc -from danswer.search.models import SearchType from danswer.search.models import Tag @@ -30,6 +34,7 @@ class SimpleQueryRequest(BaseModel): class ChatSessionCreationRequest(BaseModel): # If not specified, use Danswer default persona persona_id: int = 0 + description: str | None = None class HelperResponse(BaseModel): @@ -45,6 +50,7 @@ class ChatFeedbackRequest(BaseModel): chat_message_id: int is_positive: bool | None = None feedback_text: str | None = None + predefined_feedback: str | None = None @root_validator def check_is_positive_or_feedback_text(cls: BaseModel, values: dict) -> dict: @@ -58,14 +64,6 @@ class ChatFeedbackRequest(BaseModel): return values -class DocumentSearchRequest(BaseModel): - message: str - search_type: SearchType - retrieval_options: RetrievalDetails - recency_bias_multiplier: float = 1.0 - skip_rerank: bool = False - - """ Currently the different branches are generated by changing the search query @@ -77,7 +75,7 @@ Currently the different branches are generated by changing the search query """ -class CreateChatMessageRequest(BaseModel): +class CreateChatMessageRequest(ChunkContext): """Before creating messages, be sure to create a chat_session and get an id""" chat_session_id: int @@ -85,7 +83,10 @@ class CreateChatMessageRequest(BaseModel): parent_message_id: int | None # New message contents message: str - # If no prompt provided, provide canned retrieval answer, no actually LLM flow + # file's that we should attach to this message + file_descriptors: list[FileDescriptor] + # If no prompt provided, uses the largest prompt of the chat session + # but really this should be explicitly specified, only in the simplified APIs is this inferred # Use prompt_id 0 to use the system default prompt which is Answer-Question prompt_id: int | None # If search_doc_ids provided, then retrieval options are unused @@ -95,6 +96,13 @@ class CreateChatMessageRequest(BaseModel): # will disable Query Rewording if specified query_override: str | None = None + # allows the caller to override the Persona / Prompt + llm_override: LLMOverride | None = None + prompt_override: PromptOverride | None = None + + # used for seeded chats to kick off the generation of an AI answer + use_existing_user_message: bool = False + @root_validator def check_search_doc_ids_or_retrieval_options(cls: BaseModel, values: dict) -> dict: search_doc_ids, retrieval_options = values.get("search_doc_ids"), values.get( @@ -118,6 +126,10 @@ class ChatRenameRequest(BaseModel): name: str | None = None +class ChatSessionUpdateRequest(BaseModel): + sharing_status: ChatSessionSharedStatus + + class RenameChatSessionResponse(BaseModel): new_name: str # This is only really useful if the name is generated @@ -127,6 +139,8 @@ class ChatSessionDetails(BaseModel): name: str persona_id: int time_created: str + shared_status: ChatSessionSharedStatus + folder_id: int | None class ChatSessionsResponse(BaseModel): @@ -161,6 +175,7 @@ class ChatMessageDetail(BaseModel): time_sent: datetime # Dict mapping citation number to db_doc_id citations: dict[int, int] | None + files: list[FileDescriptor] def dict(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore initial_dict = super().dict(*args, **kwargs) # type: ignore @@ -172,7 +187,10 @@ class ChatSessionDetailResponse(BaseModel): chat_session_id: int description: str persona_id: int + persona_name: str messages: list[ChatMessageDetail] + time_created: datetime + shared_status: ChatSessionSharedStatus class QueryValidationResponse(BaseModel): diff --git a/backend/danswer/server/query_and_chat/query_backend.py b/backend/danswer/server/query_and_chat/query_backend.py index 0f0e540c6..b8c6945dc 100644 --- a/backend/danswer/server/query_and_chat/query_backend.py +++ b/backend/danswer/server/query_and_chat/query_backend.py @@ -15,11 +15,11 @@ from danswer.document_index.factory import get_default_document_index from danswer.document_index.vespa.index import VespaIndex from danswer.one_shot_answer.answer_question import stream_search_answer from danswer.one_shot_answer.models import DirectQARequest -from danswer.search.access_filters import build_access_filters_for_user -from danswer.search.danswer_helper import recommend_search_flow from danswer.search.models import IndexFilters from danswer.search.models import SearchDoc -from danswer.search.search_runner import chunks_to_search_docs +from danswer.search.preprocessing.access_filters import build_access_filters_for_user +from danswer.search.preprocessing.danswer_helper import recommend_search_flow +from danswer.search.utils import chunks_or_sections_to_search_docs from danswer.secondary_llm_flows.query_validation import get_query_answerability from danswer.secondary_llm_flows.query_validation import stream_query_answerability from danswer.server.query_and_chat.models import AdminSearchRequest @@ -29,6 +29,7 @@ from danswer.server.query_and_chat.models import QueryValidationResponse from danswer.server.query_and_chat.models import SimpleQueryRequest from danswer.server.query_and_chat.models import SourceTag from danswer.server.query_and_chat.models import TagResponse +from danswer.server.query_and_chat.token_budget import check_token_budget from danswer.utils.logger import setup_logger logger = setup_logger() @@ -68,7 +69,7 @@ def admin_search( matching_chunks = document_index.admin_retrieval(query=query, filters=final_filters) - documents = chunks_to_search_docs(matching_chunks) + documents = chunks_or_sections_to_search_docs(matching_chunks) # Deduplicate documents by id deduplicated_documents: list[SearchDoc] = [] @@ -148,7 +149,7 @@ def stream_query_validation( def get_answer_with_quote( query_request: DirectQARequest, user: User = Depends(current_user), - db_session: Session = Depends(get_session), + _: bool = Depends(check_token_budget), ) -> StreamingResponse: query = query_request.messages[0].message logger.info(f"Received query for one shot answer with quotes: {query}") @@ -157,6 +158,5 @@ def get_answer_with_quote( user=user, max_document_tokens=None, max_history_tokens=0, - db_session=db_session, ) return StreamingResponse(packets, media_type="application/json") diff --git a/backend/danswer/server/query_and_chat/token_budget.py b/backend/danswer/server/query_and_chat/token_budget.py new file mode 100644 index 000000000..49a84f5b0 --- /dev/null +++ b/backend/danswer/server/query_and_chat/token_budget.py @@ -0,0 +1,79 @@ +import json +from datetime import datetime +from datetime import timedelta +from typing import cast + +from fastapi import HTTPException +from sqlalchemy import func +from sqlalchemy.orm import Session + +from danswer.configs.app_configs import TOKEN_BUDGET_GLOBALLY_ENABLED +from danswer.configs.constants import ENABLE_TOKEN_BUDGET +from danswer.configs.constants import TOKEN_BUDGET +from danswer.configs.constants import TOKEN_BUDGET_SETTINGS +from danswer.configs.constants import TOKEN_BUDGET_TIME_PERIOD +from danswer.db.engine import get_session_context_manager +from danswer.db.models import ChatMessage +from danswer.dynamic_configs.factory import get_dynamic_config_store + +BUDGET_LIMIT_DEFAULT = -1 # Default to no limit +TIME_PERIOD_HOURS_DEFAULT = 12 + + +def is_under_token_budget(db_session: Session) -> bool: + try: + settings_json = cast( + str, get_dynamic_config_store().load(TOKEN_BUDGET_SETTINGS) + ) + except Exception: + return True + + settings = json.loads(settings_json) + + is_enabled = settings.get(ENABLE_TOKEN_BUDGET, False) + + if not is_enabled: + return True + + budget_limit = settings.get(TOKEN_BUDGET, -1) + + if budget_limit < 0: + return True + + period_hours = settings.get(TOKEN_BUDGET_TIME_PERIOD, TIME_PERIOD_HOURS_DEFAULT) + period_start_time = datetime.now() - timedelta(hours=period_hours) + + # Fetch the sum of all tokens used within the period + token_sum = ( + db_session.query(func.sum(ChatMessage.token_count)) + .filter(ChatMessage.time_sent >= period_start_time) + .scalar() + or 0 + ) + + print( + "token_sum:", + token_sum, + "budget_limit:", + budget_limit, + "period_hours:", + period_hours, + "period_start_time:", + period_start_time, + ) + + return token_sum < ( + budget_limit * 1000 + ) # Budget limit is expressed in thousands of tokens + + +def check_token_budget() -> None: + if not TOKEN_BUDGET_GLOBALLY_ENABLED: + return None + + with get_session_context_manager() as db_session: + # Perform the token budget check here, possibly using `user` and `db_session` for database access if needed + if not is_under_token_budget(db_session): + raise HTTPException( + status_code=429, detail="Sorry, token budget exceeded. Try again later." + ) diff --git a/backend/danswer/server/settings/api.py b/backend/danswer/server/settings/api.py new file mode 100644 index 000000000..422e268c1 --- /dev/null +++ b/backend/danswer/server/settings/api.py @@ -0,0 +1,30 @@ +from fastapi import APIRouter +from fastapi import Depends +from fastapi import HTTPException + +from danswer.auth.users import current_admin_user +from danswer.auth.users import current_user +from danswer.db.models import User +from danswer.server.settings.models import Settings +from danswer.server.settings.store import load_settings +from danswer.server.settings.store import store_settings + + +admin_router = APIRouter(prefix="/admin/settings") +basic_router = APIRouter(prefix="/settings") + + +@admin_router.put("") +def put_settings( + settings: Settings, _: User | None = Depends(current_admin_user) +) -> None: + try: + settings.check_validity() + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + store_settings(settings) + + +@basic_router.get("") +def fetch_settings(_: User | None = Depends(current_user)) -> Settings: + return load_settings() diff --git a/backend/danswer/server/settings/models.py b/backend/danswer/server/settings/models.py new file mode 100644 index 000000000..041e360d7 --- /dev/null +++ b/backend/danswer/server/settings/models.py @@ -0,0 +1,36 @@ +from enum import Enum + +from pydantic import BaseModel + + +class PageType(str, Enum): + CHAT = "chat" + SEARCH = "search" + + +class Settings(BaseModel): + """General settings""" + + chat_page_enabled: bool = True + search_page_enabled: bool = True + default_page: PageType = PageType.SEARCH + + def check_validity(self) -> None: + chat_page_enabled = self.chat_page_enabled + search_page_enabled = self.search_page_enabled + default_page = self.default_page + + if chat_page_enabled is False and search_page_enabled is False: + raise ValueError( + "One of `search_page_enabled` and `chat_page_enabled` must be True." + ) + + if default_page == PageType.CHAT and chat_page_enabled is False: + raise ValueError( + "The default page cannot be 'chat' if the chat page is disabled." + ) + + if default_page == PageType.SEARCH and search_page_enabled is False: + raise ValueError( + "The default page cannot be 'search' if the search page is disabled." + ) diff --git a/backend/danswer/server/settings/store.py b/backend/danswer/server/settings/store.py new file mode 100644 index 000000000..ead1e3652 --- /dev/null +++ b/backend/danswer/server/settings/store.py @@ -0,0 +1,23 @@ +from typing import cast + +from danswer.dynamic_configs.factory import get_dynamic_config_store +from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.server.settings.models import Settings + + +_SETTINGS_KEY = "danswer_settings" + + +def load_settings() -> Settings: + dynamic_config_store = get_dynamic_config_store() + try: + settings = Settings(**cast(dict, dynamic_config_store.load(_SETTINGS_KEY))) + except ConfigNotFoundError: + settings = Settings() + dynamic_config_store.store(_SETTINGS_KEY, settings.dict()) + + return settings + + +def store_settings(settings: Settings) -> None: + get_dynamic_config_store().store(_SETTINGS_KEY, settings.dict()) diff --git a/backend/danswer/tools/built_in_tools.py b/backend/danswer/tools/built_in_tools.py new file mode 100644 index 000000000..94ffb0857 --- /dev/null +++ b/backend/danswer/tools/built_in_tools.py @@ -0,0 +1,168 @@ +from typing import Type +from typing import TypedDict + +from sqlalchemy import not_ +from sqlalchemy import or_ +from sqlalchemy import select +from sqlalchemy.orm import Session + +from danswer.db.models import Persona +from danswer.db.models import Tool as ToolDBModel +from danswer.tools.images.image_generation_tool import ImageGenerationTool +from danswer.tools.search.search_tool import SearchTool +from danswer.tools.tool import Tool +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +class InCodeToolInfo(TypedDict): + cls: Type[Tool] + description: str + in_code_tool_id: str + + +BUILT_IN_TOOLS: list[InCodeToolInfo] = [ + { + "cls": SearchTool, + "description": "The Search Tool allows the Assistant to search through connected knowledge to help build an answer.", + "in_code_tool_id": SearchTool.__name__, + }, + { + "cls": ImageGenerationTool, + "description": ( + "The Image Generation Tool allows the assistant to use DALL-E 3 to generate images. " + "The tool will be used when the user asks the assistant to generate an image." + ), + "in_code_tool_id": ImageGenerationTool.__name__, + }, +] + + +def load_builtin_tools(db_session: Session) -> None: + existing_in_code_tools = db_session.scalars( + select(ToolDBModel).where(not_(ToolDBModel.in_code_tool_id.is_(None))) + ).all() + in_code_tool_id_to_tool = { + tool.in_code_tool_id: tool for tool in existing_in_code_tools + } + + # Add or update existing tools + for tool_info in BUILT_IN_TOOLS: + tool_name = tool_info["cls"].__name__ + tool = in_code_tool_id_to_tool.get(tool_info["in_code_tool_id"]) + if tool: + # Update existing tool + tool.name = tool_name + tool.description = tool_info["description"] + logger.info(f"Updated tool: {tool_name}") + else: + # Add new tool + new_tool = ToolDBModel( + name=tool_name, + description=tool_info["description"], + in_code_tool_id=tool_info["in_code_tool_id"], + ) + db_session.add(new_tool) + logger.info(f"Added new tool: {tool_name}") + + # Remove tools that are no longer in BUILT_IN_TOOLS + built_in_ids = {tool_info["in_code_tool_id"] for tool_info in BUILT_IN_TOOLS} + for tool_id, tool in list(in_code_tool_id_to_tool.items()): + if tool_id not in built_in_ids: + db_session.delete(tool) + logger.info(f"Removed tool no longer in built-in list: {tool.name}") + + db_session.commit() + logger.info("All built-in tools are loaded/verified.") + + +def auto_add_search_tool_to_personas(db_session: Session) -> None: + """ + Automatically adds the SearchTool to all Persona objects in the database that have + `num_chunks` either unset or set to a value that isn't 0. This is done to migrate + Persona objects that were created before the concept of Tools were added. + """ + # Fetch the SearchTool from the database based on in_code_tool_id from BUILT_IN_TOOLS + search_tool_id = next( + ( + tool["in_code_tool_id"] + for tool in BUILT_IN_TOOLS + if tool["cls"].__name__ == SearchTool.__name__ + ), + None, + ) + if not search_tool_id: + raise RuntimeError("SearchTool not found in the BUILT_IN_TOOLS list.") + + search_tool = db_session.execute( + select(ToolDBModel).where(ToolDBModel.in_code_tool_id == search_tool_id) + ).scalar_one_or_none() + + if not search_tool: + raise RuntimeError("SearchTool not found in the database.") + + # Fetch all Personas that need the SearchTool added + personas_to_update = ( + db_session.execute( + select(Persona).where( + or_(Persona.num_chunks.is_(None), Persona.num_chunks != 0) + ) + ) + .scalars() + .all() + ) + + # Add the SearchTool to each relevant Persona + for persona in personas_to_update: + if search_tool not in persona.tools: + persona.tools.append(search_tool) + logger.info(f"Added SearchTool to Persona ID: {persona.id}") + + # Commit changes to the database + db_session.commit() + logger.info("Completed adding SearchTool to relevant Personas.") + + +_built_in_tools_cache: dict[int, Type[Tool]] | None = None + + +def refresh_built_in_tools_cache(db_session: Session) -> None: + global _built_in_tools_cache + _built_in_tools_cache = {} + all_tool_built_in_tools = ( + db_session.execute( + select(ToolDBModel).where(not_(ToolDBModel.in_code_tool_id.is_(None))) + ) + .scalars() + .all() + ) + for tool in all_tool_built_in_tools: + tool_info = next( + ( + item + for item in BUILT_IN_TOOLS + if item["in_code_tool_id"] == tool.in_code_tool_id + ), + None, + ) + if tool_info: + _built_in_tools_cache[tool.id] = tool_info["cls"] + + +def get_built_in_tool_by_id( + tool_id: int, db_session: Session, force_refresh: bool = False +) -> Type[Tool]: + global _built_in_tools_cache + if _built_in_tools_cache is None or force_refresh: + refresh_built_in_tools_cache(db_session) + + if _built_in_tools_cache is None: + raise RuntimeError( + "Built-in tools cache is None despite being refreshed. Should never happen." + ) + + if tool_id in _built_in_tools_cache: + return _built_in_tools_cache[tool_id] + else: + raise ValueError(f"No built-in tool found in the cache with ID {tool_id}") diff --git a/backend/danswer/tools/factory.py b/backend/danswer/tools/factory.py new file mode 100644 index 000000000..197bdd661 --- /dev/null +++ b/backend/danswer/tools/factory.py @@ -0,0 +1,12 @@ +from typing import Type + +from sqlalchemy.orm import Session + +from danswer.db.models import Tool as ToolDBModel +from danswer.tools.built_in_tools import get_built_in_tool_by_id +from danswer.tools.tool import Tool + + +def get_tool_cls(tool: ToolDBModel, db_session: Session) -> Type[Tool]: + # Currently only support built-in tools + return get_built_in_tool_by_id(tool.id, db_session) diff --git a/backend/danswer/tools/force.py b/backend/danswer/tools/force.py new file mode 100644 index 000000000..1c3f0a220 --- /dev/null +++ b/backend/danswer/tools/force.py @@ -0,0 +1,40 @@ +from typing import Any + +from langchain_core.messages import AIMessage +from langchain_core.messages import BaseMessage +from pydantic import BaseModel + +from danswer.tools.tool import Tool + + +class ForceUseTool(BaseModel): + tool_name: str + args: dict[str, Any] | None = None + + def build_openai_tool_choice_dict(self) -> dict[str, Any]: + """Build dict in the format that OpenAI expects which tells them to use this tool.""" + return {"type": "function", "function": {"name": self.tool_name}} + + +def modify_message_chain_for_force_use_tool( + messages: list[BaseMessage], force_use_tool: ForceUseTool | None = None +) -> list[BaseMessage]: + """NOTE: modifies `messages` in place.""" + if not force_use_tool: + return messages + + for message in messages: + if isinstance(message, AIMessage) and message.tool_calls: + for tool_call in message.tool_calls: + tool_call["args"] = force_use_tool.args or {} + + return messages + + +def filter_tools_for_force_tool_use( + tools: list[Tool], force_use_tool: ForceUseTool | None = None +) -> list[Tool]: + if not force_use_tool: + return tools + + return [tool for tool in tools if tool.name() == force_use_tool.tool_name] diff --git a/backend/danswer/tools/images/image_generation_tool.py b/backend/danswer/tools/images/image_generation_tool.py new file mode 100644 index 000000000..da6627132 --- /dev/null +++ b/backend/danswer/tools/images/image_generation_tool.py @@ -0,0 +1,164 @@ +import json +from collections.abc import Generator +from typing import Any +from typing import cast + +from litellm import image_generation # type: ignore +from pydantic import BaseModel + +from danswer.chat.chat_utils import combine_message_chain +from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF +from danswer.llm.answering.models import PreviousMessage +from danswer.llm.interfaces import LLM +from danswer.llm.utils import build_content_with_imgs +from danswer.llm.utils import message_to_string +from danswer.prompts.constants import GENERAL_SEP_PAT +from danswer.tools.tool import Tool +from danswer.tools.tool import ToolResponse +from danswer.utils.logger import setup_logger +from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel + +logger = setup_logger() + + +IMAGE_GENERATION_RESPONSE_ID = "image_generation_response" + +YES_IMAGE_GENERATION = "Yes Image Generation" +SKIP_IMAGE_GENERATION = "Skip Image Generation" + +IMAGE_GENERATION_TEMPLATE = f""" +Given the conversation history and a follow up query, determine if the system should call \ +an external image generation tool to better answer the latest user input. +Your default response is {SKIP_IMAGE_GENERATION}. + +Respond "{YES_IMAGE_GENERATION}" if: +- The user is asking for an image to be generated. + +Conversation History: +{GENERAL_SEP_PAT} +{{chat_history}} +{GENERAL_SEP_PAT} + +If you are at all unsure, respond with {SKIP_IMAGE_GENERATION}. +Respond with EXACTLY and ONLY "{YES_IMAGE_GENERATION}" or "{SKIP_IMAGE_GENERATION}" + +Follow Up Input: +{{final_query}} +""".strip() + + +class ImageGenerationResponse(BaseModel): + revised_prompt: str + url: str + + +class ImageGenerationTool(Tool): + def __init__( + self, api_key: str, model: str = "dall-e-3", num_imgs: int = 2 + ) -> None: + self.api_key = api_key + self.model = model + self.num_imgs = num_imgs + + @classmethod + def name(self) -> str: + return "run_image_generation" + + @classmethod + def tool_definition(cls) -> dict: + return { + "type": "function", + "function": { + "name": cls.name(), + "description": "Generate an image from a prompt", + "parameters": { + "type": "object", + "properties": { + "prompt": { + "type": "string", + "description": "Prompt used to generate the image", + }, + }, + "required": ["prompt"], + }, + }, + } + + def get_args_for_non_tool_calling_llm( + self, + query: str, + history: list[PreviousMessage], + llm: LLM, + force_run: bool = False, + ) -> dict[str, Any] | None: + args = {"prompt": query} + if force_run: + return args + + history_str = combine_message_chain( + messages=history, token_limit=GEN_AI_HISTORY_CUTOFF + ) + prompt = IMAGE_GENERATION_TEMPLATE.format( + chat_history=history_str, + final_query=query, + ) + use_image_generation_tool_output = message_to_string(llm.invoke(prompt)) + + logger.debug( + f"Evaluated if should use ImageGenerationTool: {use_image_generation_tool_output}" + ) + if ( + YES_IMAGE_GENERATION.split()[0] + ).lower() in use_image_generation_tool_output.lower(): + return args + + return None + + def build_tool_message_content( + self, *args: ToolResponse + ) -> str | list[str | dict[str, Any]]: + generation_response = args[0] + image_generations = cast( + list[ImageGenerationResponse], generation_response.response + ) + + return build_content_with_imgs( + json.dumps( + [ + { + "revised_prompt": image_generation.revised_prompt, + "url": image_generation.url, + } + for image_generation in image_generations + ] + ), + img_urls=[image_generation.url for image_generation in image_generations], + ) + + def _generate_image(self, prompt: str) -> ImageGenerationResponse: + response = image_generation( + prompt=prompt, + model=self.model, + api_key=self.api_key, + n=1, + ) + return ImageGenerationResponse( + revised_prompt=response.data[0]["revised_prompt"], + url=response.data[0]["url"], + ) + + def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]: + prompt = cast(str, kwargs["prompt"]) + + # dalle3 only supports 1 image at a time, which is why we have to + # parallelize this via threading + results = cast( + list[ImageGenerationResponse], + run_functions_tuples_in_parallel( + [(self._generate_image, (prompt,)) for _ in range(self.num_imgs)] + ), + ) + yield ToolResponse( + id=IMAGE_GENERATION_RESPONSE_ID, + response=results, + ) diff --git a/backend/danswer/tools/images/prompt.py b/backend/danswer/tools/images/prompt.py new file mode 100644 index 000000000..dee28b49c --- /dev/null +++ b/backend/danswer/tools/images/prompt.py @@ -0,0 +1,33 @@ +from langchain_core.messages import HumanMessage + +from danswer.llm.utils import build_content_with_imgs + + +NON_TOOL_CALLING_PROMPT = """ +You have just created the attached images in response to the following query: "{{query}}". + +Can you please summarize them in a sentence or two? +""" + +TOOL_CALLING_PROMPT = """ +Can you please summarize the two images you generate in a sentence or two? +""" + + +def build_image_generation_user_prompt( + query: str, img_urls: list[str] | None = None +) -> HumanMessage: + if img_urls: + return HumanMessage( + content=build_content_with_imgs( + message=NON_TOOL_CALLING_PROMPT.format(query=query).strip(), + img_urls=img_urls, + ) + ) + + return HumanMessage( + content=build_content_with_imgs( + message=TOOL_CALLING_PROMPT.strip(), + img_urls=img_urls, + ) + ) diff --git a/backend/danswer/tools/message.py b/backend/danswer/tools/message.py new file mode 100644 index 000000000..cdf86a23b --- /dev/null +++ b/backend/danswer/tools/message.py @@ -0,0 +1,39 @@ +import json +from typing import Any + +from langchain_core.messages.ai import AIMessage +from langchain_core.messages.tool import ToolCall +from langchain_core.messages.tool import ToolMessage +from pydantic import BaseModel + +from danswer.llm.utils import get_default_llm_tokenizer + + +def build_tool_message( + tool_call: ToolCall, tool_content: str | list[str | dict[str, Any]] +) -> ToolMessage: + return ToolMessage( + tool_call_id=tool_call["id"] or "", + name=tool_call["name"], + content=tool_content, + ) + + +class ToolCallSummary(BaseModel): + tool_call_request: AIMessage + tool_call_result: ToolMessage + + +def tool_call_tokens(tool_call_summary: ToolCallSummary) -> int: + llm_tokenizer = get_default_llm_tokenizer() + + request_tokens = len( + llm_tokenizer.encode( + json.dumps(tool_call_summary.tool_call_request.tool_calls[0]["args"]) + ) + ) + result_tokens = len( + llm_tokenizer.encode(json.dumps(tool_call_summary.tool_call_result.content)) + ) + + return request_tokens + result_tokens diff --git a/backend/danswer/tools/search/search_tool.py b/backend/danswer/tools/search/search_tool.py new file mode 100644 index 000000000..968c17f5a --- /dev/null +++ b/backend/danswer/tools/search/search_tool.py @@ -0,0 +1,240 @@ +import json +from collections.abc import Generator +from typing import Any +from typing import cast + +from pydantic import BaseModel +from sqlalchemy.orm import Session + +from danswer.chat.chat_utils import llm_doc_from_inference_section +from danswer.chat.models import LlmDoc +from danswer.db.models import Persona +from danswer.db.models import User +from danswer.llm.answering.doc_pruning import prune_documents +from danswer.llm.answering.models import DocumentPruningConfig +from danswer.llm.answering.models import PreviousMessage +from danswer.llm.answering.models import PromptConfig +from danswer.llm.interfaces import LLM +from danswer.llm.interfaces import LLMConfig +from danswer.search.enums import QueryFlow +from danswer.search.enums import SearchType +from danswer.search.models import IndexFilters +from danswer.search.models import InferenceSection +from danswer.search.models import RetrievalDetails +from danswer.search.models import SearchRequest +from danswer.search.pipeline import SearchPipeline +from danswer.secondary_llm_flows.choose_search import check_if_need_search +from danswer.secondary_llm_flows.query_expansion import history_based_query_rephrase +from danswer.tools.search.search_utils import llm_doc_to_dict +from danswer.tools.tool import Tool +from danswer.tools.tool import ToolResponse + +SEARCH_RESPONSE_SUMMARY_ID = "search_response_summary" +SECTION_RELEVANCE_LIST_ID = "section_relevance_list" +FINAL_CONTEXT_DOCUMENTS = "final_context_documents" + + +class SearchResponseSummary(BaseModel): + top_sections: list[InferenceSection] + rephrased_query: str | None = None + predicted_flow: QueryFlow | None + predicted_search: SearchType | None + final_filters: IndexFilters + recency_bias_multiplier: float + + +search_tool_description = """ +Runs a semantic search over the user's knowledge base. The default behavior is to use this tool. \ +The only scenario where you should not use this tool is if: + +- There is sufficient information in chat history to FULLY and ACCURATELY answer the query AND \ +additional information or details would provide little or no value. +- The query is some form of request that does not require additional information to handle. + +HINT: if you are unfamiliar with the user input OR think the user input is a typo, use this tool. +""" + + +class SearchTool(Tool): + def __init__( + self, + db_session: Session, + user: User | None, + persona: Persona, + retrieval_options: RetrievalDetails | None, + prompt_config: PromptConfig, + llm_config: LLMConfig, + pruning_config: DocumentPruningConfig, + # if specified, will not actually run a search and will instead return these + # sections. Used when the user selects specific docs to talk to + selected_docs: list[LlmDoc] | None = None, + chunks_above: int = 0, + chunks_below: int = 0, + full_doc: bool = False, + ) -> None: + self.user = user + self.persona = persona + self.retrieval_options = retrieval_options + self.prompt_config = prompt_config + self.llm_config = llm_config + self.pruning_config = pruning_config + + self.selected_docs = selected_docs + + self.chunks_above = chunks_above + self.chunks_below = chunks_below + self.full_doc = full_doc + self.db_session = db_session + + @classmethod + def name(cls) -> str: + return "run_search" + + """For explicit tool calling""" + + @classmethod + def tool_definition(cls) -> dict: + return { + "type": "function", + "function": { + "name": cls.name(), + "description": search_tool_description, + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "What to search for", + }, + }, + "required": ["query"], + }, + }, + } + + def build_tool_message_content( + self, *args: ToolResponse + ) -> str | list[str | dict[str, Any]]: + final_context_docs_response = args[2] + final_context_docs = cast(list[LlmDoc], final_context_docs_response.response) + + return json.dumps( + { + "search_results": [ + llm_doc_to_dict(doc, ind) + for ind, doc in enumerate(final_context_docs) + ] + } + ) + + """For LLMs that don't support tool calling""" + + def get_args_for_non_tool_calling_llm( + self, + query: str, + history: list[PreviousMessage], + llm: LLM, + force_run: bool = False, + ) -> dict[str, Any] | None: + if not force_run and not check_if_need_search( + query=query, history=history, llm=llm + ): + return None + + rephrased_query = history_based_query_rephrase( + query=query, history=history, llm=llm + ) + return {"query": rephrased_query} + + """Actual tool execution""" + + def _build_response_for_specified_sections( + self, query: str + ) -> Generator[ToolResponse, None, None]: + if self.selected_docs is None: + raise ValueError("sections must be specified") + + yield ToolResponse( + id=SEARCH_RESPONSE_SUMMARY_ID, + response=SearchResponseSummary( + rephrased_query=None, + top_sections=[], + predicted_flow=None, + predicted_search=None, + final_filters=IndexFilters(access_control_list=None), # dummy filters + recency_bias_multiplier=1.0, + ), + ) + yield ToolResponse( + id=SECTION_RELEVANCE_LIST_ID, + response=[i for i in range(len(self.selected_docs))], + ) + yield ToolResponse( + id=FINAL_CONTEXT_DOCUMENTS, + response=prune_documents( + docs=self.selected_docs, + doc_relevance_list=None, + prompt_config=self.prompt_config, + llm_config=self.llm_config, + question=query, + document_pruning_config=self.pruning_config, + ), + ) + + def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]: + query = cast(str, kwargs["query"]) + + if self.selected_docs: + yield from self._build_response_for_specified_sections(query) + return + + search_pipeline = SearchPipeline( + search_request=SearchRequest( + query=query, + human_selected_filters=self.retrieval_options.filters + if self.retrieval_options + else None, + persona=self.persona, + offset=self.retrieval_options.offset + if self.retrieval_options + else None, + limit=self.retrieval_options.limit if self.retrieval_options else None, + chunks_above=self.chunks_above, + chunks_below=self.chunks_below, + full_doc=self.full_doc, + ), + user=self.user, + db_session=self.db_session, + ) + yield ToolResponse( + id=SEARCH_RESPONSE_SUMMARY_ID, + response=SearchResponseSummary( + rephrased_query=query, + top_sections=search_pipeline.reranked_sections, + predicted_flow=search_pipeline.predicted_flow, + predicted_search=search_pipeline.predicted_search_type, + final_filters=search_pipeline.search_query.filters, + recency_bias_multiplier=search_pipeline.search_query.recency_bias_multiplier, + ), + ) + yield ToolResponse( + id=SECTION_RELEVANCE_LIST_ID, + response=search_pipeline.relevant_chunk_indices, + ) + + llm_docs = [ + llm_doc_from_inference_section(section) + for section in search_pipeline.reranked_sections + ] + final_context_documents = prune_documents( + docs=llm_docs, + doc_relevance_list=[ + True if ind in search_pipeline.relevant_chunk_indices else False + for ind in range(len(llm_docs)) + ], + prompt_config=self.prompt_config, + llm_config=self.llm_config, + question=query, + document_pruning_config=self.pruning_config, + ) + yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS, response=final_context_documents) diff --git a/backend/danswer/tools/search/search_utils.py b/backend/danswer/tools/search/search_utils.py new file mode 100644 index 000000000..7e5151bb5 --- /dev/null +++ b/backend/danswer/tools/search/search_utils.py @@ -0,0 +1,15 @@ +from danswer.chat.models import LlmDoc +from danswer.prompts.prompt_utils import clean_up_source + + +def llm_doc_to_dict(llm_doc: LlmDoc, doc_num: int) -> dict: + doc_dict = { + "document_number": doc_num + 1, + "title": llm_doc.semantic_identifier, + "content": llm_doc.content, + "source": clean_up_source(llm_doc.source_type), + "metadata": llm_doc.metadata, + } + if llm_doc.updated_at: + doc_dict["updated_at"] = llm_doc.updated_at.strftime("%B %d, %Y %H:%M") + return doc_dict diff --git a/backend/danswer/tools/tool.py b/backend/danswer/tools/tool.py new file mode 100644 index 000000000..dd443757e --- /dev/null +++ b/backend/danswer/tools/tool.py @@ -0,0 +1,51 @@ +import abc +from collections.abc import Generator +from typing import Any + +from pydantic import BaseModel + +from danswer.llm.answering.models import PreviousMessage +from danswer.llm.interfaces import LLM + + +class ToolResponse(BaseModel): + id: str | None = None + response: Any + + +class Tool(abc.ABC): + @classmethod + @abc.abstractmethod + def name(self) -> str: + raise NotImplementedError + + """For LLMs which support explicit tool calling""" + + @classmethod + @abc.abstractmethod + def tool_definition(self) -> dict: + raise NotImplementedError + + @abc.abstractmethod + def build_tool_message_content( + self, *args: ToolResponse + ) -> str | list[str | dict[str, Any]]: + raise NotImplementedError + + """For LLMs which do NOT support explicit tool calling""" + + @abc.abstractmethod + def get_args_for_non_tool_calling_llm( + self, + query: str, + history: list[PreviousMessage], + llm: LLM, + force_run: bool = False, + ) -> dict[str, Any] | None: + raise NotImplementedError + + """Actual execution of the tool""" + + @abc.abstractmethod + def run(self, **kwargs: Any) -> Generator[ToolResponse, None, None]: + raise NotImplementedError diff --git a/backend/danswer/tools/tool_runner.py b/backend/danswer/tools/tool_runner.py new file mode 100644 index 000000000..46f247b06 --- /dev/null +++ b/backend/danswer/tools/tool_runner.py @@ -0,0 +1,73 @@ +from collections.abc import Generator +from typing import Any + +from pydantic import BaseModel +from pydantic import root_validator + +from danswer.llm.answering.models import PreviousMessage +from danswer.llm.interfaces import LLM +from danswer.tools.tool import Tool +from danswer.tools.tool import ToolResponse +from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel + + +class ToolRunKickoff(BaseModel): + tool_name: str + tool_args: dict[str, Any] + + +class ToolRunnerResponse(BaseModel): + tool_run_kickoff: ToolRunKickoff | None = None + tool_response: ToolResponse | None = None + tool_message_content: str | list[str | dict[str, Any]] | None = None + + @root_validator + def validate_tool_runner_response( + cls, values: dict[str, ToolResponse | str] + ) -> dict[str, ToolResponse | str]: + fields = ["tool_response", "tool_message_content", "tool_run_kickoff"] + provided = sum(1 for field in fields if values.get(field) is not None) + + if provided != 1: + raise ValueError( + "Exactly one of 'tool_response', 'tool_message_content', " + "or 'tool_run_kickoff' must be provided" + ) + + return values + + +class ToolRunner: + def __init__(self, tool: Tool, args: dict[str, Any]): + self.tool = tool + self.args = args + + self._tool_responses: list[ToolResponse] | None = None + + def kickoff(self) -> ToolRunKickoff: + return ToolRunKickoff(tool_name=self.tool.name(), tool_args=self.args) + + def tool_responses(self) -> Generator[ToolResponse, None, None]: + if self._tool_responses is not None: + yield from self._tool_responses + + tool_responses: list[ToolResponse] = [] + for tool_response in self.tool.run(**self.args): + yield tool_response + tool_responses.append(tool_response) + + self._tool_responses = tool_responses + + def tool_message_content(self) -> str | list[str | dict[str, Any]]: + tool_responses = list(self.tool_responses()) + return self.tool.build_tool_message_content(*tool_responses) + + +def check_which_tools_should_run_for_non_tool_calling_llm( + tools: list[Tool], query: str, history: list[PreviousMessage], llm: LLM +) -> list[dict[str, Any] | None]: + tool_args_list = [ + (tool.get_args_for_non_tool_calling_llm, (query, history, llm)) + for tool in tools + ] + return run_functions_tuples_in_parallel(tool_args_list) diff --git a/backend/danswer/tools/utils.py b/backend/danswer/tools/utils.py new file mode 100644 index 000000000..831021cda --- /dev/null +++ b/backend/danswer/tools/utils.py @@ -0,0 +1,31 @@ +import json +from typing import Type + +from tiktoken import Encoding + +from danswer.llm.utils import get_default_llm_tokenizer +from danswer.tools.tool import Tool + + +OPEN_AI_TOOL_CALLING_MODELS = {"gpt-3.5-turbo", "gpt-4-turbo", "gpt-4"} + + +def explicit_tool_calling_supported(model_provider: str, model_name: str) -> bool: + if model_provider == "openai" and model_name in OPEN_AI_TOOL_CALLING_MODELS: + return True + + return False + + +def compute_tool_tokens( + tool: Tool | Type[Tool], llm_tokenizer: Encoding | None = None +) -> int: + if not llm_tokenizer: + llm_tokenizer = get_default_llm_tokenizer() + return len(llm_tokenizer.encode(json.dumps(tool.tool_definition()))) + + +def compute_all_tool_tokens( + tools: list[Tool] | list[Type[Tool]], llm_tokenizer: Encoding | None = None +) -> int: + return sum(compute_tool_tokens(tool, llm_tokenizer) for tool in tools) diff --git a/backend/danswer/utils/acl.py b/backend/danswer/utils/acl.py index 268457bfd..8fbadb300 100644 --- a/backend/danswer/utils/acl.py +++ b/backend/danswer/utils/acl.py @@ -11,7 +11,7 @@ from danswer.document_index.document_index_utils import get_both_index_names from danswer.document_index.factory import get_default_document_index from danswer.document_index.interfaces import UpdateRequest from danswer.document_index.vespa.index import VespaIndex -from danswer.dynamic_configs import get_dynamic_config_store +from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.utils.logger import setup_logger diff --git a/backend/danswer/utils/batching.py b/backend/danswer/utils/batching.py index 0200f7225..2ea436e11 100644 --- a/backend/danswer/utils/batching.py +++ b/backend/danswer/utils/batching.py @@ -21,3 +21,10 @@ def batch_generator( if pre_batch_yield: pre_batch_yield(batch) yield batch + + +def batch_list( + lst: list[T], + batch_size: int, +) -> list[list[T]]: + return [lst[i : i + batch_size] for i in range(0, len(lst), batch_size)] diff --git a/backend/danswer/utils/encryption.py b/backend/danswer/utils/encryption.py new file mode 100644 index 000000000..0f21d84d0 --- /dev/null +++ b/backend/danswer/utils/encryption.py @@ -0,0 +1,31 @@ +from danswer.configs.app_configs import ENCRYPTION_KEY_SECRET +from danswer.utils.logger import setup_logger +from danswer.utils.variable_functionality import fetch_versioned_implementation + +logger = setup_logger() + + +def _encrypt_string(input_str: str) -> bytes: + if ENCRYPTION_KEY_SECRET: + logger.warning("MIT version of Danswer does not support encryption of secrets.") + return input_str.encode() + + +def _decrypt_bytes(input_bytes: bytes) -> str: + # No need to double warn. If you wish to learn more about encryption features + # refer to the Danswer EE code + return input_bytes.decode() + + +def encrypt_string_to_bytes(intput_str: str) -> bytes: + versioned_encryption_fn = fetch_versioned_implementation( + "danswer.utils.encryption", "_encrypt_string" + ) + return versioned_encryption_fn(intput_str) + + +def decrypt_bytes_to_string(intput_bytes: bytes) -> str: + versioned_decryption_fn = fetch_versioned_implementation( + "danswer.utils.encryption", "_decrypt_bytes" + ) + return versioned_decryption_fn(intput_bytes) diff --git a/backend/danswer/utils/logger.py b/backend/danswer/utils/logger.py index c4dd59742..38e24a367 100644 --- a/backend/danswer/utils/logger.py +++ b/backend/danswer/utils/logger.py @@ -3,7 +3,7 @@ import os from collections.abc import MutableMapping from typing import Any -from danswer.configs.app_configs import LOG_LEVEL +from shared_configs.configs import LOG_LEVEL class IndexAttemptSingleton: diff --git a/backend/danswer/utils/telemetry.py b/backend/danswer/utils/telemetry.py index 65e9f4709..f2dbb8915 100644 --- a/backend/danswer/utils/telemetry.py +++ b/backend/danswer/utils/telemetry.py @@ -6,7 +6,7 @@ from typing import cast import requests from danswer.configs.app_configs import DISABLE_TELEMETRY -from danswer.dynamic_configs import get_dynamic_config_store +from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.dynamic_configs.interface import ConfigNotFoundError CUSTOMER_UUID_KEY = "customer_uuid" @@ -27,7 +27,7 @@ def get_or_generate_uuid() -> str: return cast(str, kv_store.load(CUSTOMER_UUID_KEY)) except ConfigNotFoundError: customer_id = str(uuid.uuid4()) - kv_store.store(CUSTOMER_UUID_KEY, customer_id) + kv_store.store(CUSTOMER_UUID_KEY, customer_id, encrypt=True) return customer_id diff --git a/backend/danswer/utils/timing.py b/backend/danswer/utils/timing.py index 664656aa7..2aa150955 100644 --- a/backend/danswer/utils/timing.py +++ b/backend/danswer/utils/timing.py @@ -18,7 +18,10 @@ FG = TypeVar("FG", bound=Callable[..., Generator | Iterator]) def log_function_time( - func_name: str | None = None, print_only: bool = False + func_name: str | None = None, + print_only: bool = False, + debug_only: bool = False, + include_args: bool = False, ) -> Callable[[F], F]: def decorator(func: F) -> F: @wraps(func) @@ -28,7 +31,12 @@ def log_function_time( result = func(*args, **kwargs) elapsed_time_str = str(time.time() - start_time) log_name = func_name or func.__name__ - logger.info(f"{log_name} took {elapsed_time_str} seconds") + args_str = f" args={args} kwargs={kwargs}" if include_args else "" + final_log = f"{log_name}{args_str} took {elapsed_time_str} seconds" + if debug_only: + logger.debug(final_log) + else: + logger.info(final_log) if not print_only: optional_telemetry( diff --git a/backend/danswer/utils/variable_functionality.py b/backend/danswer/utils/variable_functionality.py index 934b526a5..61414effd 100644 --- a/backend/danswer/utils/variable_functionality.py +++ b/backend/danswer/utils/variable_functionality.py @@ -1,6 +1,12 @@ +import functools import importlib from typing import Any +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + class DanswerVersion: def __init__(self) -> None: @@ -16,6 +22,17 @@ class DanswerVersion: global_version = DanswerVersion() +@functools.lru_cache(maxsize=128) def fetch_versioned_implementation(module: str, attribute: str) -> Any: - module_full = f"ee.{module}" if global_version.get_is_ee_version() else module - return getattr(importlib.import_module(module_full), attribute) + logger.debug("Fetching versioned implementation for %s.%s", module, attribute) + is_ee = global_version.get_is_ee_version() + + module_full = f"ee.{module}" if is_ee else module + try: + return getattr(importlib.import_module(module_full), attribute) + except ModuleNotFoundError: + # try the non-ee version as a fallback + if is_ee: + return getattr(importlib.import_module(module), attribute) + + raise diff --git a/backend/model_server/constants.py b/backend/model_server/constants.py new file mode 100644 index 000000000..bc842f546 --- /dev/null +++ b/backend/model_server/constants.py @@ -0,0 +1 @@ +MODEL_WARM_UP_STRING = "hi " * 512 diff --git a/backend/model_server/custom_models.py b/backend/model_server/custom_models.py index 9faea17ba..ee97ded78 100644 --- a/backend/model_server/custom_models.py +++ b/backend/model_server/custom_models.py @@ -1,19 +1,58 @@ -import numpy as np -from fastapi import APIRouter +from typing import Optional + +import numpy as np +import tensorflow as tf # type: ignore +from fastapi import APIRouter +from transformers import AutoTokenizer # type: ignore +from transformers import TFDistilBertForSequenceClassification + +from model_server.constants import MODEL_WARM_UP_STRING +from model_server.utils import simple_log_function_time +from shared_configs.configs import INDEXING_ONLY +from shared_configs.configs import INTENT_MODEL_CONTEXT_SIZE +from shared_configs.configs import INTENT_MODEL_VERSION +from shared_configs.model_server_models import IntentRequest +from shared_configs.model_server_models import IntentResponse -from danswer.search.search_nlp_models import get_intent_model_tokenizer -from danswer.search.search_nlp_models import get_local_intent_model -from danswer.utils.timing import log_function_time -from shared_models.model_server_models import IntentRequest -from shared_models.model_server_models import IntentResponse router = APIRouter(prefix="/custom") +_INTENT_TOKENIZER: Optional[AutoTokenizer] = None +_INTENT_MODEL: Optional[TFDistilBertForSequenceClassification] = None -@log_function_time(print_only=True) + +def get_intent_model_tokenizer( + model_name: str = INTENT_MODEL_VERSION, +) -> "AutoTokenizer": + global _INTENT_TOKENIZER + if _INTENT_TOKENIZER is None: + _INTENT_TOKENIZER = AutoTokenizer.from_pretrained(model_name) + return _INTENT_TOKENIZER + + +def get_local_intent_model( + model_name: str = INTENT_MODEL_VERSION, + max_context_length: int = INTENT_MODEL_CONTEXT_SIZE, +) -> TFDistilBertForSequenceClassification: + global _INTENT_MODEL + if _INTENT_MODEL is None or max_context_length != _INTENT_MODEL.max_seq_length: + _INTENT_MODEL = TFDistilBertForSequenceClassification.from_pretrained( + model_name + ) + _INTENT_MODEL.max_seq_length = max_context_length + return _INTENT_MODEL + + +def warm_up_intent_model() -> None: + intent_tokenizer = get_intent_model_tokenizer() + inputs = intent_tokenizer( + MODEL_WARM_UP_STRING, return_tensors="tf", truncation=True, padding=True + ) + get_local_intent_model()(inputs) + + +@simple_log_function_time() def classify_intent(query: str) -> list[float]: - import tensorflow as tf # type:ignore - tokenizer = get_intent_model_tokenizer() intent_model = get_local_intent_model() model_input = tokenizer(query, return_tensors="tf", truncation=True, padding=True) @@ -26,16 +65,11 @@ def classify_intent(query: str) -> list[float]: @router.post("/intent-model") -def process_intent_request( +async def process_intent_request( intent_request: IntentRequest, ) -> IntentResponse: + if INDEXING_ONLY: + raise RuntimeError("Indexing model server should not call intent endpoint") + class_percentages = classify_intent(intent_request.query) return IntentResponse(class_probs=class_percentages) - - -def warm_up_intent_model() -> None: - intent_tokenizer = get_intent_model_tokenizer() - inputs = intent_tokenizer( - "danswer", return_tensors="tf", truncation=True, padding=True - ) - get_local_intent_model()(inputs) diff --git a/backend/model_server/encoders.py b/backend/model_server/encoders.py index 1220736de..705386a8c 100644 --- a/backend/model_server/encoders.py +++ b/backend/model_server/encoders.py @@ -1,34 +1,33 @@ -from typing import TYPE_CHECKING +import gc +from typing import Optional from fastapi import APIRouter from fastapi import HTTPException +from sentence_transformers import CrossEncoder # type: ignore +from sentence_transformers import SentenceTransformer # type: ignore -from danswer.configs.model_configs import CROSS_ENCODER_MODEL_ENSEMBLE -from danswer.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE -from danswer.search.search_nlp_models import get_local_reranking_model_ensemble from danswer.utils.logger import setup_logger -from danswer.utils.timing import log_function_time -from shared_models.model_server_models import EmbedRequest -from shared_models.model_server_models import EmbedResponse -from shared_models.model_server_models import RerankRequest -from shared_models.model_server_models import RerankResponse - -if TYPE_CHECKING: - from sentence_transformers import SentenceTransformer # type: ignore - +from model_server.constants import MODEL_WARM_UP_STRING +from model_server.utils import simple_log_function_time +from shared_configs.configs import CROSS_EMBED_CONTEXT_SIZE +from shared_configs.configs import CROSS_ENCODER_MODEL_ENSEMBLE +from shared_configs.configs import INDEXING_ONLY +from shared_configs.model_server_models import EmbedRequest +from shared_configs.model_server_models import EmbedResponse +from shared_configs.model_server_models import RerankRequest +from shared_configs.model_server_models import RerankResponse logger = setup_logger() -WARM_UP_STRING = "Danswer is amazing" - router = APIRouter(prefix="/encoder") _GLOBAL_MODELS_DICT: dict[str, "SentenceTransformer"] = {} +_RERANK_MODELS: Optional[list["CrossEncoder"]] = None def get_embedding_model( model_name: str, - max_context_length: int = DOC_EMBEDDING_CONTEXT_SIZE, + max_context_length: int, ) -> "SentenceTransformer": from sentence_transformers import SentenceTransformer # type: ignore @@ -48,11 +47,44 @@ def get_embedding_model( return _GLOBAL_MODELS_DICT[model_name] -@log_function_time(print_only=True) +def get_local_reranking_model_ensemble( + model_names: list[str] = CROSS_ENCODER_MODEL_ENSEMBLE, + max_context_length: int = CROSS_EMBED_CONTEXT_SIZE, +) -> list[CrossEncoder]: + global _RERANK_MODELS + if _RERANK_MODELS is None or max_context_length != _RERANK_MODELS[0].max_length: + del _RERANK_MODELS + gc.collect() + + _RERANK_MODELS = [] + for model_name in model_names: + logger.info(f"Loading {model_name}") + model = CrossEncoder(model_name) + model.max_length = max_context_length + _RERANK_MODELS.append(model) + return _RERANK_MODELS + + +def warm_up_cross_encoders() -> None: + logger.info(f"Warming up Cross-Encoders: {CROSS_ENCODER_MODEL_ENSEMBLE}") + + cross_encoders = get_local_reranking_model_ensemble() + [ + cross_encoder.predict((MODEL_WARM_UP_STRING, MODEL_WARM_UP_STRING)) + for cross_encoder in cross_encoders + ] + + +@simple_log_function_time() def embed_text( - texts: list[str], model_name: str, normalize_embeddings: bool + texts: list[str], + model_name: str, + max_context_length: int, + normalize_embeddings: bool, ) -> list[list[float]]: - model = get_embedding_model(model_name=model_name) + model = get_embedding_model( + model_name=model_name, max_context_length=max_context_length + ) embeddings = model.encode(texts, normalize_embeddings=normalize_embeddings) if not isinstance(embeddings, list): @@ -61,7 +93,7 @@ def embed_text( return embeddings -@log_function_time(print_only=True) +@simple_log_function_time() def calc_sim_scores(query: str, docs: list[str]) -> list[list[float]]: cross_encoders = get_local_reranking_model_ensemble() sim_scores = [ @@ -72,13 +104,14 @@ def calc_sim_scores(query: str, docs: list[str]) -> list[list[float]]: @router.post("/bi-encoder-embed") -def process_embed_request( +async def process_embed_request( embed_request: EmbedRequest, ) -> EmbedResponse: try: embeddings = embed_text( texts=embed_request.texts, model_name=embed_request.model_name, + max_context_length=embed_request.max_context_length, normalize_embeddings=embed_request.normalize_embeddings, ) return EmbedResponse(embeddings=embeddings) @@ -87,7 +120,11 @@ def process_embed_request( @router.post("/cross-encoder-scores") -def process_rerank_request(embed_request: RerankRequest) -> RerankResponse: +async def process_rerank_request(embed_request: RerankRequest) -> RerankResponse: + """Cross encoders can be purely black box from the app perspective""" + if INDEXING_ONLY: + raise RuntimeError("Indexing model server should not call intent endpoint") + try: sim_scores = calc_sim_scores( query=embed_request.query, docs=embed_request.documents @@ -95,13 +132,3 @@ def process_rerank_request(embed_request: RerankRequest) -> RerankResponse: return RerankResponse(scores=sim_scores) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) - - -def warm_up_cross_encoders() -> None: - logger.info(f"Warming up Cross-Encoders: {CROSS_ENCODER_MODEL_ENSEMBLE}") - - cross_encoders = get_local_reranking_model_ensemble() - [ - cross_encoder.predict((WARM_UP_STRING, WARM_UP_STRING)) - for cross_encoder in cross_encoders - ] diff --git a/backend/model_server/main.py b/backend/model_server/main.py index dead931dc..1aaf95678 100644 --- a/backend/model_server/main.py +++ b/backend/model_server/main.py @@ -1,40 +1,63 @@ +import os +from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager + import torch import uvicorn from fastapi import FastAPI +from transformers import logging as transformer_logging # type:ignore from danswer import __version__ -from danswer.configs.app_configs import MODEL_SERVER_ALLOWED_HOST -from danswer.configs.app_configs import MODEL_SERVER_PORT -from danswer.configs.model_configs import MIN_THREADS_ML_MODELS from danswer.utils.logger import setup_logger from model_server.custom_models import router as custom_models_router from model_server.custom_models import warm_up_intent_model from model_server.encoders import router as encoders_router from model_server.encoders import warm_up_cross_encoders +from model_server.management_endpoints import router as management_router +from shared_configs.configs import ENABLE_RERANKING_ASYNC_FLOW +from shared_configs.configs import ENABLE_RERANKING_REAL_TIME_FLOW +from shared_configs.configs import INDEXING_ONLY +from shared_configs.configs import MIN_THREADS_ML_MODELS +from shared_configs.configs import MODEL_SERVER_ALLOWED_HOST +from shared_configs.configs import MODEL_SERVER_PORT +os.environ["TOKENIZERS_PARALLELISM"] = "false" +os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1" + +transformer_logging.set_verbosity_error() logger = setup_logger() -def get_model_app() -> FastAPI: - application = FastAPI(title="Danswer Model Server", version=__version__) +@asynccontextmanager +async def lifespan(app: FastAPI) -> AsyncGenerator: + if torch.cuda.is_available(): + logger.info("GPU is available") + else: + logger.info("GPU is not available") + torch.set_num_threads(max(MIN_THREADS_ML_MODELS, torch.get_num_threads())) + logger.info(f"Torch Threads: {torch.get_num_threads()}") + + if not INDEXING_ONLY: + warm_up_intent_model() + if ENABLE_RERANKING_REAL_TIME_FLOW or ENABLE_RERANKING_ASYNC_FLOW: + warm_up_cross_encoders() + else: + logger.info("This model server should only run document indexing.") + + yield + + +def get_model_app() -> FastAPI: + application = FastAPI( + title="Danswer Model Server", version=__version__, lifespan=lifespan + ) + + application.include_router(management_router) application.include_router(encoders_router) application.include_router(custom_models_router) - @application.on_event("startup") - def startup_event() -> None: - if torch.cuda.is_available(): - logger.info("GPU is available") - else: - logger.info("GPU is not available") - - torch.set_num_threads(max(MIN_THREADS_ML_MODELS, torch.get_num_threads())) - logger.info(f"Torch Threads: {torch.get_num_threads()}") - - warm_up_cross_encoders() - warm_up_intent_model() - return application diff --git a/backend/model_server/management_endpoints.py b/backend/model_server/management_endpoints.py new file mode 100644 index 000000000..fc1b8901e --- /dev/null +++ b/backend/model_server/management_endpoints.py @@ -0,0 +1,9 @@ +from fastapi import APIRouter +from fastapi import Response + +router = APIRouter(prefix="/api") + + +@router.get("/health") +def healthcheck() -> Response: + return Response(status_code=200) diff --git a/backend/model_server/utils.py b/backend/model_server/utils.py new file mode 100644 index 000000000..3ebae26e5 --- /dev/null +++ b/backend/model_server/utils.py @@ -0,0 +1,41 @@ +import time +from collections.abc import Callable +from collections.abc import Generator +from collections.abc import Iterator +from functools import wraps +from typing import Any +from typing import cast +from typing import TypeVar + +from danswer.utils.logger import setup_logger + +logger = setup_logger() + +F = TypeVar("F", bound=Callable) +FG = TypeVar("FG", bound=Callable[..., Generator | Iterator]) + + +def simple_log_function_time( + func_name: str | None = None, + debug_only: bool = False, + include_args: bool = False, +) -> Callable[[F], F]: + def decorator(func: F) -> F: + @wraps(func) + def wrapped_func(*args: Any, **kwargs: Any) -> Any: + start_time = time.time() + result = func(*args, **kwargs) + elapsed_time_str = str(time.time() - start_time) + log_name = func_name or func.__name__ + args_str = f" args={args} kwargs={kwargs}" if include_args else "" + final_log = f"{log_name}{args_str} took {elapsed_time_str} seconds" + if debug_only: + logger.debug(final_log) + else: + logger.info(final_log) + + return result + + return cast(F, wrapped_func) + + return decorator diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index 6feb8ed27..6052624ad 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -1,13 +1,16 @@ +aiohttp==3.9.4 alembic==1.10.4 asyncpg==0.27.0 atlassian-python-api==3.37.0 beautifulsoup4==4.12.2 +boto3==1.34.84 celery==5.3.4 chardet==5.2.0 dask==2023.8.1 +ddtrace==2.6.5 distributed==2023.8.1 -fastapi==0.103.0 -fastapi-users==11.0.0 +fastapi==0.109.2 +fastapi-users==12.1.3 fastapi-users-db-sqlalchemy==5.0.0 filelock==3.12.0 google-api-python-client==2.86.0 @@ -21,22 +24,24 @@ httpx[http2]==0.23.3 httpx-oauth==0.11.2 huggingface-hub==0.20.1 jira==3.5.1 -langchain==0.0.340 -litellm==1.7.5 +langchain==0.1.17 +langchain-community==0.0.36 +langchain-core==0.1.50 +langchain-text-splitters==0.0.1 +litellm==1.37.7 llama-index==0.9.45 Mako==1.2.4 msal==1.26.0 nltk==3.8.1 -docx2txt==0.8 -Office365-REST-Python-Client==2.5.4 +Office365-REST-Python-Client==2.5.9 oauthlib==3.2.2 -openai==1.3.5 +openai==1.14.3 openpyxl==3.1.2 playwright==1.41.2 psutil==5.9.5 psycopg2-binary==2.9.9 pycryptodome==3.19.1 -pydantic==1.10.7 +pydantic==1.10.13 PyGithub==1.58.2 python-dateutil==2.8.2 python-gitlab==3.9.0 @@ -46,25 +51,20 @@ pytest-mock==3.12.0 pytest-playwright==0.3.2 python-docx==1.1.0 python-dotenv==1.0.0 -python-multipart==0.0.6 +python-multipart==0.0.7 +pywikibot==9.0.0 requests==2.31.0 requests-oauthlib==1.3.1 retry==0.9.2 # This pulls in py which is in CVE-2022-42969, must remove py from image rfc3986==1.5.0 -# need to pin `safetensors` version, since the latest versions requires -# building from source using Rust rt==3.1.2 -safetensors==0.3.1 -sentence-transformers==2.2.2 slack-sdk==3.20.2 SQLAlchemy[mypy]==2.0.15 +starlette==0.36.3 supervisor==4.2.5 -tensorflow==2.14.0 tiktoken==0.4.0 timeago==1.0.16 -torch==2.0.1 -torchvision==0.15.2 -transformers==4.36.2 +transformers==4.39.2 uvicorn==0.21.1 zulip==0.8.2 hubspot-api-client==8.1.0 diff --git a/backend/requirements/dev.txt b/backend/requirements/dev.txt index e855436db..4a9bd21d3 100644 --- a/backend/requirements/dev.txt +++ b/backend/requirements/dev.txt @@ -1,7 +1,7 @@ black==23.3.0 celery-types==0.19.0 mypy-extensions==1.0.0 -mypy==1.1.1 +mypy==1.8.0 pre-commit==3.2.2 reorder-python-imports==3.9.0 ruff==0.0.286 diff --git a/backend/requirements/model_server.txt b/backend/requirements/model_server.txt index 3da3e3e59..4ef8ffa5b 100644 --- a/backend/requirements/model_server.txt +++ b/backend/requirements/model_server.txt @@ -1,8 +1,9 @@ -fastapi==0.103.0 -pydantic==1.10.7 -safetensors==0.3.1 -sentence-transformers==2.2.2 -tensorflow==2.13.0 +fastapi==0.109.2 +h5py==3.9.0 +pydantic==1.10.13 +safetensors==0.4.2 +sentence-transformers==2.6.1 +tensorflow==2.15.0 torch==2.0.1 -transformers==4.36.2 +transformers==4.39.2 uvicorn==0.21.1 diff --git a/backend/scripts/dev_run_background_jobs.py b/backend/scripts/dev_run_background_jobs.py index 30fb4bf6f..c9b91b00c 100644 --- a/backend/scripts/dev_run_background_jobs.py +++ b/backend/scripts/dev_run_background_jobs.py @@ -49,8 +49,6 @@ def run_jobs(exclude_indexing: bool) -> None: if not exclude_indexing: update_env = os.environ.copy() update_env["PYTHONPATH"] = "." - update_env["DYNAMIC_CONFIG_DIR_PATH"] = "./dynamic_config_storage" - update_env["FILE_CONNECTOR_TMP_STORAGE_PATH"] = "./dynamic_config_storage" cmd_indexing = ["python", "danswer/background/update.py"] indexing_process = subprocess.Popen( diff --git a/backend/scripts/simulate_chat_frontend.py b/backend/scripts/simulate_chat_frontend.py deleted file mode 100644 index 2344c166d..000000000 --- a/backend/scripts/simulate_chat_frontend.py +++ /dev/null @@ -1,92 +0,0 @@ -# This file is purely for development use, not included in any builds -# Use this to test the chat feature -# This script does not allow for branching logic that is supported by the backend APIs -# This script also does not allow for editing/regeneration of user/model messages -# Have Danswer API server running to use this. -import json - -import requests - -from danswer.configs.app_configs import APP_PORT - -LOCAL_CHAT_ENDPOINT = f"http://127.0.0.1:{APP_PORT}/chat/" - - -def create_new_session() -> int: - data = {"persona_id": 0} # Global default Persona - response = requests.post(LOCAL_CHAT_ENDPOINT + "create-chat-session", json=data) - response.raise_for_status() - new_session_id = response.json()["chat_session_id"] - return new_session_id - - -def send_chat_message( - message: str, - chat_session_id: int, - parent_message: int | None, -) -> int: - data = { - "message": message, - "chat_session_id": chat_session_id, - "parent_message_id": parent_message, - "prompt_id": 0, # Global default Prompt - "retrieval_options": { - "run_search": "always", - "real_time": True, - "filters": {"tags": []}, - }, - } - - docs: list[dict] | None = None - message_id: int | None = None - with requests.post( - LOCAL_CHAT_ENDPOINT + "send-message", json=data, stream=True - ) as r: - for json_response in r.iter_lines(): - response_text = json.loads(json_response.decode()) - new_token = response_text.get("answer_piece") - if docs is None: - docs = response_text.get("top_documents") - if message_id is None: - message_id = response_text.get("message_id") - if new_token: - print(new_token, end="", flush=True) - print() - - if docs: - docs.sort(key=lambda x: x["score"], reverse=True) # type: ignore - print("\nReference Docs:") - for ind, doc in enumerate(docs, start=1): - print(f"\t - Doc {ind}: {doc.get('semantic_identifier')}") - - if message_id is None: - raise ValueError("Couldn't get latest message id") - - return message_id - - -def run_chat() -> None: - try: - new_session_id = create_new_session() - print(f"Chat Session ID: {new_session_id}") - except requests.exceptions.ConnectionError: - print( - "Looks like you haven't started the Danswer Backend server, please run the FastAPI server" - ) - exit() - return - - parent_message = None - while True: - new_message = input( - "\n\n----------------------------------\n" - "Please provide a new chat message:\n> " - ) - - parent_message = send_chat_message( - new_message, new_session_id, parent_message=parent_message - ) - - -if __name__ == "__main__": - run_chat() diff --git a/backend/scripts/simulate_frontend.py b/backend/scripts/simulate_frontend.py deleted file mode 100644 index 253c6e6a1..000000000 --- a/backend/scripts/simulate_frontend.py +++ /dev/null @@ -1,95 +0,0 @@ -# This file is purely for development use, not included in any builds -import argparse -import json -from pprint import pprint - -import requests - -from danswer.configs.app_configs import APP_PORT -from danswer.configs.app_configs import DOCUMENT_INDEX_NAME -from danswer.configs.constants import SOURCE_TYPE - - -if __name__ == "__main__": - previous_query = None - parser = argparse.ArgumentParser() - - parser.add_argument( - "-t", - "--type", - type=str, - default="hybrid", - help='"hybrid" "semantic" or "keyword", defaults to "hybrid"', - ) - - parser.add_argument( - "-s", - "--stream", - action="store_true", - help="Enable streaming response", - ) - - parser.add_argument( - "--filters", - type=str, - help="Comma separated list of source types to filter by (no spaces)", - ) - - parser.add_argument("query", nargs="*", help="The query to process") - - previous_input = None - while True: - try: - user_input = input( - "\n\nAsk any question:\n" - " - Use -t (hybrid/semantic/keyword) flag to choose search flow.\n" - " - prefix with -s to stream answer, --filters web,slack etc. for filters.\n" - " - input an empty string to rerun last query.\n\t" - ) - - if user_input: - previous_input = user_input - else: - if not previous_input: - print("No previous input") - continue - print(f"Re-executing previous question:\n\t{previous_input}") - user_input = previous_input - - args = parser.parse_args(user_input.split()) - - search_type = str(args.type).lower() - stream = args.stream - source_types = args.filters.split(",") if args.filters else None - - query = " ".join(args.query) - - if search_type not in ["hybrid", "semantic", "keyword"]: - raise ValueError("Invalid Search") - - elif stream: - path = "stream-direct-qa" - else: - path = "direct-qa" - - endpoint = f"http://127.0.0.1:{APP_PORT}/{path}" - - query_json = { - "query": query, - "collection": DOCUMENT_INDEX_NAME, - "filters": {SOURCE_TYPE: source_types}, - "enable_auto_detect_filters": True, - "search_type": search_type, - } - - if args.stream: - with requests.post(endpoint, json=query_json, stream=True) as r: - for json_response in r.iter_lines(): - pprint(json.loads(json_response.decode())) - else: - response = requests.post(endpoint, json=query_json) - contents = json.loads(response.content) - pprint(contents) - - except Exception as e: - print(f"Failed due to {e}, retrying") diff --git a/backend/shared_configs/__init__.py b/backend/shared_configs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/shared_configs/configs.py b/backend/shared_configs/configs.py new file mode 100644 index 000000000..aeeb9cf2a --- /dev/null +++ b/backend/shared_configs/configs.py @@ -0,0 +1,43 @@ +import os + + +MODEL_SERVER_HOST = os.environ.get("MODEL_SERVER_HOST") or "localhost" +MODEL_SERVER_ALLOWED_HOST = os.environ.get("MODEL_SERVER_HOST") or "0.0.0.0" +MODEL_SERVER_PORT = int(os.environ.get("MODEL_SERVER_PORT") or "9000") +# Model server for indexing should use a separate one to not allow indexing to introduce delay +# for inference +INDEXING_MODEL_SERVER_HOST = ( + os.environ.get("INDEXING_MODEL_SERVER_HOST") or MODEL_SERVER_HOST +) +INDEXING_MODEL_SERVER_PORT = int( + os.environ.get("INDEXING_MODEL_SERVER_PORT") or MODEL_SERVER_PORT +) + +# Danswer custom Deep Learning Models +INTENT_MODEL_VERSION = "danswer/intent-model" +INTENT_MODEL_CONTEXT_SIZE = 256 + +# Bi-Encoder, other details +DOC_EMBEDDING_CONTEXT_SIZE = 512 + +# Cross Encoder Settings +ENABLE_RERANKING_ASYNC_FLOW = ( + os.environ.get("ENABLE_RERANKING_ASYNC_FLOW", "").lower() == "true" +) +ENABLE_RERANKING_REAL_TIME_FLOW = ( + os.environ.get("ENABLE_RERANKING_REAL_TIME_FLOW", "").lower() == "true" +) +# Only using one cross-encoder for now +CROSS_ENCODER_MODEL_ENSEMBLE = ["mixedbread-ai/mxbai-rerank-xsmall-v1"] +CROSS_EMBED_CONTEXT_SIZE = 512 + +# This controls the minimum number of pytorch "threads" to allocate to the embedding +# model. If torch finds more threads on its own, this value is not used. +MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1) + +# Model server that has indexing only set will throw exception if used for reranking +# or intent classification +INDEXING_ONLY = os.environ.get("INDEXING_ONLY", "").lower() == "true" + +# notset, debug, info, warning, error, or critical +LOG_LEVEL = os.environ.get("LOG_LEVEL", "info") diff --git a/backend/shared_models/model_server_models.py b/backend/shared_configs/model_server_models.py similarity index 79% rename from backend/shared_models/model_server_models.py rename to backend/shared_configs/model_server_models.py index e3b04557d..020a24a30 100644 --- a/backend/shared_models/model_server_models.py +++ b/backend/shared_configs/model_server_models.py @@ -2,8 +2,10 @@ from pydantic import BaseModel class EmbedRequest(BaseModel): + # This already includes any prefixes, the text is just passed directly to the model texts: list[str] model_name: str + max_context_length: int normalize_embeddings: bool diff --git a/backend/slackbot_images/Confluence.png b/backend/slackbot_images/Confluence.png new file mode 100644 index 000000000..b201fb616 Binary files /dev/null and b/backend/slackbot_images/Confluence.png differ diff --git a/backend/slackbot_images/File.png b/backend/slackbot_images/File.png new file mode 100644 index 000000000..563d74939 Binary files /dev/null and b/backend/slackbot_images/File.png differ diff --git a/backend/slackbot_images/Guru.png b/backend/slackbot_images/Guru.png new file mode 100644 index 000000000..adfa459d6 Binary files /dev/null and b/backend/slackbot_images/Guru.png differ diff --git a/backend/slackbot_images/Jira.png b/backend/slackbot_images/Jira.png new file mode 100644 index 000000000..2056e6b93 Binary files /dev/null and b/backend/slackbot_images/Jira.png differ diff --git a/backend/slackbot_images/README.md b/backend/slackbot_images/README.md new file mode 100644 index 000000000..bb527d676 --- /dev/null +++ b/backend/slackbot_images/README.md @@ -0,0 +1,3 @@ + +This folder contains images needed by the Danswer Slack Bot. When possible, we use the images +within `web/public`, but sometimes those images do not work for the Slack Bot. diff --git a/backend/slackbot_images/Web.png b/backend/slackbot_images/Web.png new file mode 100644 index 000000000..33320416b Binary files /dev/null and b/backend/slackbot_images/Web.png differ diff --git a/backend/slackbot_images/Zendesk.png b/backend/slackbot_images/Zendesk.png new file mode 100644 index 000000000..689c14264 Binary files /dev/null and b/backend/slackbot_images/Zendesk.png differ diff --git a/backend/tests/regression/answer_quality/eval_direct_qa.py b/backend/tests/regression/answer_quality/eval_direct_qa.py index bd2f70010..d32f27547 100644 --- a/backend/tests/regression/answer_quality/eval_direct_qa.py +++ b/backend/tests/regression/answer_quality/eval_direct_qa.py @@ -77,7 +77,6 @@ def get_answer_for_question( str | None, RetrievalMetricsContainer | None, RerankMetricsContainer | None, - LLMMetricsContainer | None, ]: filters = IndexFilters( source_type=None, @@ -103,7 +102,6 @@ def get_answer_for_question( retrieval_metrics = MetricsHander[RetrievalMetricsContainer]() rerank_metrics = MetricsHander[RerankMetricsContainer]() - llm_metrics = MetricsHander[LLMMetricsContainer]() answer = get_search_answer( query_req=new_message_request, @@ -116,14 +114,12 @@ def get_answer_for_question( bypass_acl=True, retrieval_metrics_callback=retrieval_metrics.record_metric, rerank_metrics_callback=rerank_metrics.record_metric, - llm_metrics_callback=llm_metrics.record_metric, ) return ( answer.answer, retrieval_metrics.metrics, rerank_metrics.metrics, - llm_metrics.metrics, ) @@ -221,7 +217,6 @@ if __name__ == "__main__": answer, retrieval_metrics, rerank_metrics, - llm_metrics, ) = get_answer_for_question(sample["question"], db_session) end_time = datetime.now() @@ -237,12 +232,6 @@ if __name__ == "__main__": else "\tFailed, either crashed or refused to answer." ) if not args.discard_metrics: - print("\nLLM Tokens Usage:") - if llm_metrics is None: - print("No LLM Metrics Available") - else: - _print_llm_metrics(llm_metrics) - print("\nRetrieval Metrics:") if retrieval_metrics is None: print("No Retrieval Metrics Available") diff --git a/backend/tests/regression/search_quality/eval_search.py b/backend/tests/regression/search_quality/eval_search.py index 7cd3e6068..23eefc45c 100644 --- a/backend/tests/regression/search_quality/eval_search.py +++ b/backend/tests/regression/search_quality/eval_search.py @@ -7,16 +7,13 @@ from typing import TextIO from sqlalchemy.orm import Session -from danswer.chat.chat_utils import get_chunks_for_qa -from danswer.db.embedding_model import get_current_db_embedding_model from danswer.db.engine import get_sqlalchemy_engine -from danswer.document_index.factory import get_default_document_index -from danswer.indexing.models import InferenceChunk -from danswer.search.models import IndexFilters +from danswer.llm.answering.doc_pruning import reorder_docs +from danswer.search.models import InferenceChunk from danswer.search.models import RerankMetricsContainer from danswer.search.models import RetrievalMetricsContainer -from danswer.search.models import SearchQuery -from danswer.search.search_runner import full_chunk_search +from danswer.search.models import SearchRequest +from danswer.search.pipeline import SearchPipeline from danswer.utils.callbacks import MetricsHander @@ -81,46 +78,25 @@ def get_search_results( RetrievalMetricsContainer | None, RerankMetricsContainer | None, ]: - filters = IndexFilters( - source_type=None, - document_set=None, - time_cutoff=None, - access_control_list=None, - ) - search_query = SearchQuery( - query=query, - filters=filters, - recency_bias_multiplier=1.0, - ) - retrieval_metrics = MetricsHander[RetrievalMetricsContainer]() rerank_metrics = MetricsHander[RerankMetricsContainer]() with Session(get_sqlalchemy_engine()) as db_session: - embedding_model = get_current_db_embedding_model(db_session) + search_pipeline = SearchPipeline( + search_request=SearchRequest( + query=query, + ), + user=None, + db_session=db_session, + retrieval_metrics_callback=retrieval_metrics.record_metric, + rerank_metrics_callback=rerank_metrics.record_metric, + ) - document_index = get_default_document_index( - primary_index_name=embedding_model.index_name, secondary_index_name=None - ) - - top_chunks, llm_chunk_selection = full_chunk_search( - query=search_query, - document_index=document_index, - db_session=db_session, - retrieval_metrics_callback=retrieval_metrics.record_metric, - rerank_metrics_callback=rerank_metrics.record_metric, - ) - - llm_chunks_indices = get_chunks_for_qa( - chunks=top_chunks, - llm_chunk_selection=llm_chunk_selection, - token_limit=None, - ) - - llm_chunks = [top_chunks[i] for i in llm_chunks_indices] + top_chunks = search_pipeline.reranked_chunks + llm_chunk_selection = search_pipeline.chunk_relevance_list return ( - llm_chunks, + reorder_docs(top_chunks, llm_chunk_selection), retrieval_metrics.metrics, rerank_metrics.metrics, ) diff --git a/backend/tests/unit/danswer/connectors/cross_connector_utils/test_html_utils.py b/backend/tests/unit/danswer/connectors/cross_connector_utils/test_html_utils.py index 8c502269d..860001e15 100644 --- a/backend/tests/unit/danswer/connectors/cross_connector_utils/test_html_utils.py +++ b/backend/tests/unit/danswer/connectors/cross_connector_utils/test_html_utils.py @@ -1,7 +1,7 @@ import pathlib import unittest -from danswer.connectors.cross_connector_utils.html_utils import parse_html_page_basic +from danswer.file_processing.html_utils import parse_html_page_basic class TestQAPostprocessing(unittest.TestCase): diff --git a/backend/tests/unit/danswer/connectors/mediawiki/__init__.py b/backend/tests/unit/danswer/connectors/mediawiki/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/tests/unit/danswer/connectors/mediawiki/test_mediawiki_family.py b/backend/tests/unit/danswer/connectors/mediawiki/test_mediawiki_family.py new file mode 100644 index 000000000..8f053e9f3 --- /dev/null +++ b/backend/tests/unit/danswer/connectors/mediawiki/test_mediawiki_family.py @@ -0,0 +1,75 @@ +from typing import Final +from unittest import mock + +import pytest +from pywikibot.families.wikipedia_family import Family as WikipediaFamily # type: ignore[import-untyped] +from pywikibot.family import Family # type: ignore[import-untyped] + +from danswer.connectors.mediawiki import family + +NON_BUILTIN_WIKIS: Final[list[tuple[str, str]]] = [ + ("https://fallout.fandom.com", "falloutwiki"), + ("https://harrypotter.fandom.com/wiki/", "harrypotterwiki"), + ("https://artofproblemsolving.com/wiki", "artofproblemsolving"), + ("https://www.bogleheads.org/wiki/Main_Page", "bogleheadswiki"), + ("https://bogleheads.org/wiki/Main_Page", "bogleheadswiki"), + ("https://www.dandwiki.com/wiki/", "dungeonsanddragons"), + ("https://wiki.factorio.com/", "factoriowiki"), +] + + +# TODO: Add support for more builtin family types from `pywikibot.families`. +@pytest.mark.parametrize( + "url, name, expected", + [ + ( + "https://en.wikipedia.org", + "wikipedia", + WikipediaFamily, + ), # Support urls with protocol + ( + "wikipedia.org", + "wikipedia", + WikipediaFamily, + ), # Support urls without subdomain + ( + "en.wikipedia.org", + "wikipedia", + WikipediaFamily, + ), # Support urls with subdomain + ("m.wikipedia.org", "wikipedia", WikipediaFamily), + ("de.wikipedia.org", "wikipedia", WikipediaFamily), + ], +) +def test_family_class_dispatch_builtins( + url: str, name: str, expected: type[Family] +) -> None: + """Test that the family class dispatch function returns the correct family class in several scenarios.""" + assert family.family_class_dispatch(url, name) == expected + + +@pytest.mark.parametrize("url, name", NON_BUILTIN_WIKIS) +def test_family_class_dispatch_on_non_builtins_generates_new_class_fast( + url: str, name: str +) -> None: + """Test that using the family class dispatch function on an unknown url generates a new family class.""" + with mock.patch.object( + family, "generate_family_class" + ) as mock_generate_family_class: + family.family_class_dispatch(url, name) + mock_generate_family_class.assert_called_once_with(url, name) + + +@pytest.mark.slow +@pytest.mark.parametrize("url, name", NON_BUILTIN_WIKIS) +def test_family_class_dispatch_on_non_builtins_generates_new_class_slow( + url: str, name: str +) -> None: + """Test that using the family class dispatch function on an unknown url generates a new family class. + + This test is slow because it actually performs the network calls to generate the family classes. + """ + generated_family_class = family.generate_family_class(url, name) + assert issubclass(generated_family_class, Family) + dispatch_family_class = family.family_class_dispatch(url, name) + assert dispatch_family_class == generated_family_class diff --git a/backend/tests/unit/danswer/connectors/mediawiki/test_wiki.py b/backend/tests/unit/danswer/connectors/mediawiki/test_wiki.py new file mode 100644 index 000000000..260a5619f --- /dev/null +++ b/backend/tests/unit/danswer/connectors/mediawiki/test_wiki.py @@ -0,0 +1,146 @@ +from __future__ import annotations + +import datetime +from collections.abc import Iterable + +import pytest +import pywikibot # type: ignore[import-untyped] +from pytest_mock import MockFixture + +from danswer.connectors.mediawiki import wiki + + +@pytest.fixture +def site() -> pywikibot.Site: + return pywikibot.Site("en", "wikipedia") + + +def test_pywikibot_timestamp_to_utc_datetime() -> None: + timestamp_without_tzinfo = pywikibot.Timestamp(2023, 12, 27, 15, 38, 49) + timestamp_min_timezone = timestamp_without_tzinfo.astimezone(datetime.timezone.min) + timestamp_max_timezone = timestamp_without_tzinfo.astimezone(datetime.timezone.max) + assert timestamp_min_timezone.tzinfo == datetime.timezone.min + assert timestamp_max_timezone.tzinfo == datetime.timezone.max + for timestamp in [ + timestamp_without_tzinfo, + timestamp_min_timezone, + timestamp_max_timezone, + ]: + dt = wiki.pywikibot_timestamp_to_utc_datetime(timestamp) + assert dt.tzinfo == datetime.timezone.utc + + +class MockPage(pywikibot.Page): + def __init__( + self, site: pywikibot.Site, title: str, _has_categories: bool = False + ) -> None: + super().__init__(site, title) + self._has_categories = _has_categories + self.header = "This is a header" + self._sections = ["This is a section", "This is another section"] + + @property + def _sections_helper(self) -> list[str]: + return [ + f"== Section {i} ==\n{section}\n" + for i, section in enumerate(self._sections) + ] + + @property + def text(self) -> str: + text = self.header + "\n" + for section in self._sections_helper: + text += section + return text + + @property + def pageid(self) -> str: + return "1" + + def full_url(self) -> str: + return "Test URL" + + def categories( + self, + with_sort_key: bool = False, + total: int | None = None, + content: bool = False, + ) -> Iterable[pywikibot.Page]: + if not self._has_categories: + return [] + return [ + MockPage(self.site, "Test Category1"), + MockPage(self.site, "Test Category2"), + ] + + @property + def latest_revision(self) -> pywikibot.page.Revision: + return pywikibot.page.Revision( + timestamp=pywikibot.Timestamp(2023, 12, 27, 15, 38, 49) + ) + + +def test_get_doc_from_page(site: pywikibot.Site) -> None: + test_page = MockPage(site, "Test Page", _has_categories=True) + doc = wiki.get_doc_from_page(test_page, site, wiki.DocumentSource.MEDIAWIKI) + assert doc.source == wiki.DocumentSource.MEDIAWIKI + assert doc.title == test_page.title() + assert doc.doc_updated_at == wiki.pywikibot_timestamp_to_utc_datetime( + test_page.latest_revision.timestamp + ) + assert len(doc.sections) == 3 + for section, expected_section in zip( + doc.sections, test_page._sections_helper + [test_page.header] + ): + assert ( + section.text.strip() == expected_section.strip() + ) # Extra whitespace before/after is okay + assert section.link and section.link.startswith(test_page.full_url()) + assert doc.semantic_identifier == test_page.title() + assert doc.metadata == { + "categories": [category.title() for category in test_page.categories()] + } + assert doc.id == test_page.pageid + + +def test_mediawiki_connector_recurse_depth() -> None: + """Test that the recurse_depth parameter is parsed correctly. + + -1 should be parsed as `True` (for unbounded recursion) + 0 or greater should be parsed as an integer + Negative values less than -1 should raise a ValueError + + This is the specification dictated by the `pywikibot` library. We do not need to test behavior beyond this. + """ + hostname = "wikipedia.org" + categories: list[str] = [] + pages = ["Test Page"] + connector_name = "Test Connector" + + # Recurse depth less than -1 raises ValueError + with pytest.raises(ValueError): + recurse_depth = -2 + wiki.MediaWikiConnector( + hostname, categories, pages, recurse_depth, connector_name + ) + + # Recurse depth of -1 gets parsed as `True` + recurse_depth = -1 + connector = wiki.MediaWikiConnector( + hostname, categories, pages, recurse_depth, connector_name + ) + assert connector.recurse_depth is True + + # Recurse depth of 0 or greater gets parsed as an integer + recurse_depth = 0 + connector = wiki.MediaWikiConnector( + hostname, categories, pages, recurse_depth, connector_name + ) + assert connector.recurse_depth == recurse_depth + + +def test_load_from_state_calls_poll_source_with_nones(mocker: MockFixture) -> None: + connector = wiki.MediaWikiConnector("wikipedia.org", [], [], 0, "test") + poll_source = mocker.patch.object(connector, "poll_source") + connector.load_from_state() + poll_source.assert_called_once_with(None, None) diff --git a/backend/tests/unit/danswer/direct_qa/test_qa_utils.py b/backend/tests/unit/danswer/direct_qa/test_qa_utils.py index b30d08b16..a9046691b 100644 --- a/backend/tests/unit/danswer/direct_qa/test_qa_utils.py +++ b/backend/tests/unit/danswer/direct_qa/test_qa_utils.py @@ -2,9 +2,13 @@ import textwrap import unittest from danswer.configs.constants import DocumentSource -from danswer.indexing.models import InferenceChunk -from danswer.one_shot_answer.qa_utils import match_quotes_to_docs -from danswer.one_shot_answer.qa_utils import separate_answer_quotes +from danswer.llm.answering.stream_processing.quotes_processing import ( + match_quotes_to_docs, +) +from danswer.llm.answering.stream_processing.quotes_processing import ( + separate_answer_quotes, +) +from danswer.search.models import InferenceChunk class TestQAPostprocessing(unittest.TestCase): diff --git a/deployment/README.md b/deployment/README.md index e3de37268..9454cbbe5 100644 --- a/deployment/README.md +++ b/deployment/README.md @@ -25,6 +25,8 @@ Docker Compose provides the easiest way to get Danswer up and running. Requirements: Docker and docker compose +This section is for getting started quickly without setting up GPUs. For deployments to leverage GPU, please refer to [this](https://github.com/danswer-ai/danswer/blob/main/deployment/docker_compose/README.md) documentation. + 1. To run Danswer, navigate to `docker_compose` directory and run the following: - `docker compose -f docker-compose.dev.yml -p danswer-stack up -d --pull always --force-recreate` - or run: `docker compose -f docker-compose.dev.yml -p danswer-stack up -d --build --force-recreate` diff --git a/deployment/data/nginx/app.conf.template b/deployment/data/nginx/app.conf.template index c2d13d12d..b698c744b 100644 --- a/deployment/data/nginx/app.conf.template +++ b/deployment/data/nginx/app.conf.template @@ -1,3 +1,9 @@ +# Log format to include request latency +log_format custom_main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for" ' + 'rt=$request_time'; + upstream api_server { # fail_timeout=0 means we always retry an upstream even if it failed # to return a good HTTP response @@ -20,7 +26,11 @@ server { client_max_body_size 5G; # Maximum upload size - location ~ ^/api(.*)$ { + access_log /var/log/nginx/access.log custom_main; + + # Match both /api/* and /openapi.json in a single rule + location ~ ^/(api|openapi.json)(/.*)?$ { + # Rewrite /api prefixed matched paths rewrite ^/api(/.*)$ $1 break; # misc headers diff --git a/deployment/data/nginx/app.conf.template.dev b/deployment/data/nginx/app.conf.template.dev index 7cffccf21..a7a0efa19 100644 --- a/deployment/data/nginx/app.conf.template.dev +++ b/deployment/data/nginx/app.conf.template.dev @@ -1,3 +1,9 @@ +# Override log format to include request latency +log_format custom_main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for" ' + 'rt=$request_time'; + upstream api_server { # fail_timeout=0 means we always retry an upstream even if it failed # to return a good HTTP response @@ -20,7 +26,11 @@ server { client_max_body_size 5G; # Maximum upload size - location ~ ^/api(.*)$ { + access_log /var/log/nginx/access.log custom_main; + + # Match both /api/* and /openapi.json in a single rule + location ~ ^/(api|openapi.json)(/.*)?$ { + # Rewrite /api prefixed matched paths rewrite ^/api(/.*)$ $1 break; # misc headers @@ -56,3 +66,4 @@ server { proxy_pass http://web_server; } } + diff --git a/deployment/data/nginx/app.conf.template.no-letsencrypt b/deployment/data/nginx/app.conf.template.no-letsencrypt index 7c6074617..4d5096374 100644 --- a/deployment/data/nginx/app.conf.template.no-letsencrypt +++ b/deployment/data/nginx/app.conf.template.no-letsencrypt @@ -1,3 +1,9 @@ +# Log format to include request latency +log_format custom_main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for" ' + 'rt=$request_time'; + upstream api_server { # fail_timeout=0 means we always retry an upstream even if it failed # to return a good HTTP response @@ -20,7 +26,11 @@ server { client_max_body_size 5G; # Maximum upload size - location ~ ^/api(.*)$ { + access_log /var/log/nginx/access.log custom_main; + + # Match both /api/* and /openapi.json in a single rule + location ~ ^/(api|openapi.json)(/.*)?$ { + # Rewrite /api prefixed matched paths rewrite ^/api(/.*)$ $1 break; # misc headers diff --git a/deployment/data/nginx/run-nginx.sh b/deployment/data/nginx/run-nginx.sh index 12f64127c..fed6eb686 100755 --- a/deployment/data/nginx/run-nginx.sh +++ b/deployment/data/nginx/run-nginx.sh @@ -2,6 +2,12 @@ envsubst '$DOMAIN $SSL_CERT_FILE_NAME $SSL_CERT_KEY_FILE_NAME' < "/etc/nginx/conf.d/$1" > /etc/nginx/conf.d/app.conf # wait for the api_server to be ready +echo "Waiting for API server to boot up; this may take a minute or two..." +echo "If this takes more than ~5 minutes, check the logs of the API server container for errors with the following command:" +echo +echo "docker logs danswer-stack_api_server-1" +echo + while true; do # Use curl to send a request and capture the HTTP status code status_code=$(curl -o /dev/null -s -w "%{http_code}\n" "http://api_server:8080/health") diff --git a/deployment/docker_compose/README.md b/deployment/docker_compose/README.md new file mode 100644 index 000000000..96285681e --- /dev/null +++ b/deployment/docker_compose/README.md @@ -0,0 +1,40 @@ + + +# Deploying Danswer using Docker Compose + +For general information, please read the instructions in this [README](https://github.com/danswer-ai/danswer/blob/main/deployment/docker_compose/README.md). + +## Deploy in a system without GPU support +This part is elaborated precisely in in this [README](https://github.com/danswer-ai/danswer/blob/main/deployment/docker_compose/README.md) in section *Docker Compose*. If you have any questions, please feel free to open an issue or get in touch in slack for support. + +## Deploy in a system with GPU support +Running Model servers with GPU support while indexing and querying can result in significant improvements in performance. This is highly recommended if you have access to resources. Currently, Danswer offloads embedding model and tokenizers to the GPU VRAM and the size needed depends on chosen embedding model. Default embedding models `intfloat/e5-base-v2` takes up about 1GB of VRAM and since we need this for inference and embedding pipeline, you would need roughly 2GB of VRAM. + +### Setup +To be able to use NVIDIA runtime, following is mandatory: +- proper setup of NVIDIA driver in host system. +- installation of `nvidia-container-toolkit` for passing GPU runtime to containers + +You will find elaborate steps here: + +#### Installation of NVIDIA Drivers +Visit the official [NVIDIA drivers page](https://www.nvidia.com/Download/index.aspx) to download and install the proper drivers. Reboot your system once you have done so. + +Alternatively, you can choose to install the driver versions via package managers of your choice in UNIX based systems. + +#### Installation of `nvidia-container-toolkit` + +For GPUs to be accessible to containers, you will need the container toolkit. Please follow [these instructions](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) to install the necessary runtime based on your requirement. + +### Launching with GPU + +1. To run Danswer with GPU, navigate to `docker_compose` directory and run the following: + - `docker compose -f docker-compose.gpu-dev.yml -p danswer-stack up -d --pull always --force-recreate` + - or run: `docker compose -f docker-compose.gpu-dev.yml -p danswer-stack up -d --build --force-recreate` +to build from source + - Downloading images or packages/requirements may take 15+ minutes depending on your internet connection. + + +2. To shut down the deployment, run: + - To stop the containers: `docker compose -f docker-compose.gpu-dev.yml -p danswer-stack stop` + - To delete the containers: `docker compose -f docker-compose.gpu-dev.yml -p danswer-stack down` diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 0ec9a8b82..bd6054867 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -12,6 +12,7 @@ services: depends_on: - relational_db - index + - inference_model_server restart: always ports: - "8080:8080" @@ -19,6 +20,7 @@ services: # Auth Settings - AUTH_TYPE=${AUTH_TYPE:-disabled} - SESSION_EXPIRE_TIME_SECONDS=${SESSION_EXPIRE_TIME_SECONDS:-86400} + - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-} - VALID_EMAIL_DOMAINS=${VALID_EMAIL_DOMAINS:-} - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-} - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-} @@ -29,9 +31,9 @@ services: - SMTP_PASS=${SMTP_PASS:-} - EMAIL_FROM=${EMAIL_FROM:-} # Gen AI Settings - - GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-openai} - - GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-gpt-3.5-turbo-0125} - - FAST_GEN_AI_MODEL_VERSION=${FAST_GEN_AI_MODEL_VERSION:-gpt-3.5-turbo-0125} + - GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-} + - GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-} + - FAST_GEN_AI_MODEL_VERSION=${FAST_GEN_AI_MODEL_VERSION:-} - GEN_AI_API_KEY=${GEN_AI_API_KEY:-} - GEN_AI_API_ENDPOINT=${GEN_AI_API_ENDPOINT:-} - GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-} @@ -42,7 +44,16 @@ services: - DISABLE_LLM_FILTER_EXTRACTION=${DISABLE_LLM_FILTER_EXTRACTION:-} - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-} - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} + - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} + - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-} + - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-} + # if set, allows for the use of the token budget system + - TOKEN_BUDGET_GLOBALLY_ENABLED=${TOKEN_BUDGET_GLOBALLY_ENABLED:-} + # Enables the use of bedrock models + - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-} + - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-} + - AWS_REGION_NAME=${AWS_REGION_NAME:-} # Query Options - DOC_TIME_DECAY=${DOC_TIME_DECAY:-} # Recency Bias for search results, decay at 1 / (1 + DOC_TIME_DECAY * x years) - HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector) @@ -60,7 +71,7 @@ services: - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-} - ENABLE_RERANKING_REAL_TIME_FLOW=${ENABLE_RERANKING_REAL_TIME_FLOW:-} - ENABLE_RERANKING_ASYNC_FLOW=${ENABLE_RERANKING_ASYNC_FLOW:-} - - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-} + - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-} # Leave this on pretty please? Nothing sensitive is collected! # https://docs.danswer.dev/more/telemetry @@ -70,17 +81,16 @@ services: # If set to `true` will enable additional logs about Vespa query performance # (time spent on finding the right docs + time spent fetching summaries from disk) - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-} - volumes: - - local_dynamic_storage:/home/storage - - file_connector_tmp_storage:/home/file_connector_storage - - model_cache_torch:/root/.cache/torch/ - - model_cache_nltk:/root/nltk_data/ - - model_cache_huggingface:/root/.cache/huggingface/ + - LOG_ENDPOINT_LATENCY=${LOG_ENDPOINT_LATENCY:-} + extra_hosts: + - "host.docker.internal:host-gateway" logging: driver: json-file options: max-size: "50m" max-file: "6" + + background: image: danswer/danswer-backend:latest build: @@ -90,12 +100,15 @@ services: depends_on: - relational_db - index + - inference_model_server + - indexing_model_server restart: always environment: + - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-} # Gen AI Settings (Needed by DanswerBot) - - GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-openai} - - GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-gpt-3.5-turbo-0125} - - FAST_GEN_AI_MODEL_VERSION=${FAST_GEN_AI_MODEL_VERSION:-gpt-3.5-turbo-0125} + - GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-} + - GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-} + - FAST_GEN_AI_MODEL_VERSION=${FAST_GEN_AI_MODEL_VERSION:-} - GEN_AI_API_KEY=${GEN_AI_API_KEY:-} - GEN_AI_API_ENDPOINT=${GEN_AI_API_ENDPOINT:-} - GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-} @@ -106,7 +119,11 @@ services: - DISABLE_LLM_FILTER_EXTRACTION=${DISABLE_LLM_FILTER_EXTRACTION:-} - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-} - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} + - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} + - GENERATIVE_MODEL_ACCESS_CHECK_FREQ=${GENERATIVE_MODEL_ACCESS_CHECK_FREQ:-} + - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-} + - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-} # Query Options - DOC_TIME_DECAY=${DOC_TIME_DECAY:-} # Recency Bias for search results, decay at 1 / (1 + DOC_TIME_DECAY * x years) - HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector) @@ -126,23 +143,29 @@ services: - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-} # Needed by DanswerBot - ASYM_PASSAGE_PREFIX=${ASYM_PASSAGE_PREFIX:-} - - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-} + - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-} - - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-} - - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} + - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server} # Indexing Configs - NUM_INDEXING_WORKERS=${NUM_INDEXING_WORKERS:-} + - ENABLED_CONNECTOR_TYPES=${ENABLED_CONNECTOR_TYPES:-} + - DISABLE_INDEX_UPDATE_ON_SWAP=${DISABLE_INDEX_UPDATE_ON_SWAP:-} - DASK_JOB_CLIENT_ENABLED=${DASK_JOB_CLIENT_ENABLED:-} - CONTINUE_ON_CONNECTOR_FAILURE=${CONTINUE_ON_CONNECTOR_FAILURE:-} - EXPERIMENTAL_CHECKPOINTING_ENABLED=${EXPERIMENTAL_CHECKPOINTING_ENABLED:-} - CONFLUENCE_CONNECTOR_LABELS_TO_SKIP=${CONFLUENCE_CONNECTOR_LABELS_TO_SKIP:-} + - JIRA_CONNECTOR_LABELS_TO_SKIP=${JIRA_CONNECTOR_LABELS_TO_SKIP:-} + - WEB_CONNECTOR_VALIDATE_URLS=${WEB_CONNECTOR_VALIDATE_URLS:-} + - JIRA_API_VERSION=${JIRA_API_VERSION:-} - GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-} - NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-} - GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-} + - DISABLE_DOCUMENT_CLEANUP=${DISABLE_DOCUMENT_CLEANUP:-} # Danswer SlackBot Configs - DANSWER_BOT_SLACK_APP_TOKEN=${DANSWER_BOT_SLACK_APP_TOKEN:-} - DANSWER_BOT_SLACK_BOT_TOKEN=${DANSWER_BOT_SLACK_BOT_TOKEN:-} - DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER=${DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER:-} + - DANSWER_BOT_FEEDBACK_VISIBILITY=${DANSWER_BOT_FEEDBACK_VISIBILITY:-} - DANSWER_BOT_DISPLAY_ERROR_MSGS=${DANSWER_BOT_DISPLAY_ERROR_MSGS:-} - DANSWER_BOT_RESPOND_EVERY_CHANNEL=${DANSWER_BOT_RESPOND_EVERY_CHANNEL:-} - DANSWER_BOT_DISABLE_COT=${DANSWER_BOT_DISABLE_COT:-} # Currently unused @@ -156,17 +179,15 @@ services: - LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # Log all of the prompts to the LLM - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-} - volumes: - - local_dynamic_storage:/home/storage - - file_connector_tmp_storage:/home/file_connector_storage - - model_cache_torch:/root/.cache/torch/ - - model_cache_nltk:/root/nltk_data/ - - model_cache_huggingface:/root/.cache/huggingface/ + extra_hosts: + - "host.docker.internal:host-gateway" logging: driver: json-file options: max-size: "50m" max-file: "6" + + web_server: image: danswer/danswer-web-server:latest build: @@ -174,12 +195,73 @@ services: dockerfile: Dockerfile args: - NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING:-false} + - NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA:-false} + - NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-} + - NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-} + - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-} depends_on: - api_server restart: always environment: - INTERNAL_URL=http://api_server:8080 - WEB_DOMAIN=${WEB_DOMAIN:-} + + + inference_model_server: + image: danswer/danswer-model-server:latest + build: + context: ../../backend + dockerfile: Dockerfile.model_server + command: > + /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then + echo 'Skipping service...'; + exit 0; + else + exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; + fi" + restart: on-failure + environment: + - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} + # Set to debug to get more fine-grained logs + - LOG_LEVEL=${LOG_LEVEL:-info} + volumes: + # Not necessary, this is just to reduce download time during startup + - model_cache_huggingface:/root/.cache/huggingface/ + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + indexing_model_server: + image: danswer/danswer-model-server:latest + build: + context: ../../backend + dockerfile: Dockerfile.model_server + command: > + /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then + echo 'Skipping service...'; + exit 0; + else + exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; + fi" + restart: on-failure + environment: + - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} + - INDEXING_ONLY=True + # Set to debug to get more fine-grained logs + - LOG_LEVEL=${LOG_LEVEL:-info} + volumes: + # Not necessary, this is just to reduce download time during startup + - model_cache_huggingface:/root/.cache/huggingface/ + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + relational_db: image: postgres:15.2-alpine restart: always @@ -190,6 +272,8 @@ services: - "5432:5432" volumes: - db_volume:/var/lib/postgresql/data + + # This container name cannot have an underscore in it due to Vespa expectations of the URL index: image: vespaengine/vespa:8.277.17 @@ -204,6 +288,8 @@ services: options: max-size: "50m" max-file: "6" + + nginx: image: nginx:1.23.4-alpine restart: always @@ -232,37 +318,10 @@ services: command: > /bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh && /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev" - # Run with --profile model-server to bring up the danswer-model-server container - # Be sure to change MODEL_SERVER_HOST (see above) as well - # ie. MODEL_SERVER_HOST="model_server" docker compose -f docker-compose.dev.yml -p danswer-stack --profile model-server up -d --build - model_server: - image: danswer/danswer-model-server:latest - build: - context: ../../backend - dockerfile: Dockerfile.model_server - profiles: - - "model-server" - command: uvicorn model_server.main:app --host 0.0.0.0 --port 9000 - restart: always - environment: - - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-} - - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} - - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} - # Set to debug to get more fine-grained logs - - LOG_LEVEL=${LOG_LEVEL:-info} - volumes: - - model_cache_torch:/root/.cache/torch/ - - model_cache_huggingface:/root/.cache/huggingface/ - logging: - driver: json-file - options: - max-size: "50m" - max-file: "6" + + volumes: - local_dynamic_storage: - file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them db_volume: vespa_volume: - model_cache_torch: - model_cache_nltk: + # Created by the container itself model_cache_huggingface: diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml new file mode 100644 index 000000000..8d2db9337 --- /dev/null +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -0,0 +1,342 @@ +version: '3' +services: + api_server: + image: danswer/danswer-backend:latest + build: + context: ../../backend + dockerfile: Dockerfile + command: > + /bin/sh -c "alembic upgrade head && + echo \"Starting Danswer Api Server\" && + uvicorn danswer.main:app --host 0.0.0.0 --port 8080" + depends_on: + - relational_db + - index + - inference_model_server + restart: always + ports: + - "8080:8080" + environment: + # Auth Settings + - AUTH_TYPE=${AUTH_TYPE:-disabled} + - SESSION_EXPIRE_TIME_SECONDS=${SESSION_EXPIRE_TIME_SECONDS:-86400} + - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-} + - VALID_EMAIL_DOMAINS=${VALID_EMAIL_DOMAINS:-} + - GOOGLE_OAUTH_CLIENT_ID=${GOOGLE_OAUTH_CLIENT_ID:-} + - GOOGLE_OAUTH_CLIENT_SECRET=${GOOGLE_OAUTH_CLIENT_SECRET:-} + - REQUIRE_EMAIL_VERIFICATION=${REQUIRE_EMAIL_VERIFICATION:-} + - SMTP_SERVER=${SMTP_SERVER:-} # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com' + - SMTP_PORT=${SMTP_PORT:-587} # For sending verification emails, if unspecified then defaults to '587' + - SMTP_USER=${SMTP_USER:-} + - SMTP_PASS=${SMTP_PASS:-} + - EMAIL_FROM=${EMAIL_FROM:-} + # Gen AI Settings + - GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-} + - GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-} + - FAST_GEN_AI_MODEL_VERSION=${FAST_GEN_AI_MODEL_VERSION:-} + - GEN_AI_API_KEY=${GEN_AI_API_KEY:-} + - GEN_AI_API_ENDPOINT=${GEN_AI_API_ENDPOINT:-} + - GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-} + - GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-} + - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} + - QA_TIMEOUT=${QA_TIMEOUT:-} + - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-} + - DISABLE_LLM_FILTER_EXTRACTION=${DISABLE_LLM_FILTER_EXTRACTION:-} + - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-} + - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} + - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} + - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} + - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-} + - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-} + # if set, allows for the use of the token budget system + - TOKEN_BUDGET_GLOBALLY_ENABLED=${TOKEN_BUDGET_GLOBALLY_ENABLED:-} + # Enables the use of bedrock models + - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-} + - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-} + - AWS_REGION_NAME=${AWS_REGION_NAME:-} + # Query Options + - DOC_TIME_DECAY=${DOC_TIME_DECAY:-} # Recency Bias for search results, decay at 1 / (1 + DOC_TIME_DECAY * x years) + - HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector) + - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-} + - MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-} + - QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-} + # Other services + - POSTGRES_HOST=relational_db + - VESPA_HOST=index + - WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose + # Don't change the NLP model configs unless you know what you're doing + - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-} + - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-} + - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} + - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-} + - ENABLE_RERANKING_REAL_TIME_FLOW=${ENABLE_RERANKING_REAL_TIME_FLOW:-} + - ENABLE_RERANKING_ASYNC_FLOW=${ENABLE_RERANKING_ASYNC_FLOW:-} + - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} + - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-} + # Leave this on pretty please? Nothing sensitive is collected! + # https://docs.danswer.dev/more/telemetry + - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-} + - LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs + - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # Log all of the prompts to the LLM + # If set to `true` will enable additional logs about Vespa query performance + # (time spent on finding the right docs + time spent fetching summaries from disk) + - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-} + extra_hosts: + - "host.docker.internal:host-gateway" + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + background: + image: danswer/danswer-backend:latest + build: + context: ../../backend + dockerfile: Dockerfile + command: /usr/bin/supervisord + depends_on: + - relational_db + - index + - inference_model_server + - indexing_model_server + restart: always + environment: + - ENCRYPTION_KEY_SECRET=${ENCRYPTION_KEY_SECRET:-} + # Gen AI Settings (Needed by DanswerBot) + - GEN_AI_MODEL_PROVIDER=${GEN_AI_MODEL_PROVIDER:-} + - GEN_AI_MODEL_VERSION=${GEN_AI_MODEL_VERSION:-} + - FAST_GEN_AI_MODEL_VERSION=${FAST_GEN_AI_MODEL_VERSION:-} + - GEN_AI_API_KEY=${GEN_AI_API_KEY:-} + - GEN_AI_API_ENDPOINT=${GEN_AI_API_ENDPOINT:-} + - GEN_AI_API_VERSION=${GEN_AI_API_VERSION:-} + - GEN_AI_LLM_PROVIDER_TYPE=${GEN_AI_LLM_PROVIDER_TYPE:-} + - GEN_AI_MAX_TOKENS=${GEN_AI_MAX_TOKENS:-} + - QA_TIMEOUT=${QA_TIMEOUT:-} + - MAX_CHUNKS_FED_TO_CHAT=${MAX_CHUNKS_FED_TO_CHAT:-} + - DISABLE_LLM_FILTER_EXTRACTION=${DISABLE_LLM_FILTER_EXTRACTION:-} + - DISABLE_LLM_CHUNK_FILTER=${DISABLE_LLM_CHUNK_FILTER:-} + - DISABLE_LLM_CHOOSE_SEARCH=${DISABLE_LLM_CHOOSE_SEARCH:-} + - DISABLE_LLM_QUERY_REPHRASE=${DISABLE_LLM_QUERY_REPHRASE:-} + - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-} + - GENERATIVE_MODEL_ACCESS_CHECK_FREQ=${GENERATIVE_MODEL_ACCESS_CHECK_FREQ:-} + - DISABLE_LITELLM_STREAMING=${DISABLE_LITELLM_STREAMING:-} + - LITELLM_EXTRA_HEADERS=${LITELLM_EXTRA_HEADERS:-} + # Query Options + - DOC_TIME_DECAY=${DOC_TIME_DECAY:-} # Recency Bias for search results, decay at 1 / (1 + DOC_TIME_DECAY * x years) + - HYBRID_ALPHA=${HYBRID_ALPHA:-} # Hybrid Search Alpha (0 for entirely keyword, 1 for entirely vector) + - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-} + - MULTILINGUAL_QUERY_EXPANSION=${MULTILINGUAL_QUERY_EXPANSION:-} + - QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-} + # Other Services + - POSTGRES_HOST=relational_db + - POSTGRES_USER=${POSTGRES_USER:-} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-} + - POSTGRES_DB=${POSTGRES_DB:-} + - VESPA_HOST=index + - WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose for OAuth2 connectors + # Don't change the NLP model configs unless you know what you're doing + - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-} + - DOC_EMBEDDING_DIM=${DOC_EMBEDDING_DIM:-} + - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} + - ASYM_QUERY_PREFIX=${ASYM_QUERY_PREFIX:-} # Needed by DanswerBot + - ASYM_PASSAGE_PREFIX=${ASYM_PASSAGE_PREFIX:-} + - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} + - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-} + - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server} + # Indexing Configs + - NUM_INDEXING_WORKERS=${NUM_INDEXING_WORKERS:-} + - ENABLED_CONNECTOR_TYPES=${ENABLED_CONNECTOR_TYPES:-} + - DISABLE_INDEX_UPDATE_ON_SWAP=${DISABLE_INDEX_UPDATE_ON_SWAP:-} + - DASK_JOB_CLIENT_ENABLED=${DASK_JOB_CLIENT_ENABLED:-} + - CONTINUE_ON_CONNECTOR_FAILURE=${CONTINUE_ON_CONNECTOR_FAILURE:-} + - EXPERIMENTAL_CHECKPOINTING_ENABLED=${EXPERIMENTAL_CHECKPOINTING_ENABLED:-} + - CONFLUENCE_CONNECTOR_LABELS_TO_SKIP=${CONFLUENCE_CONNECTOR_LABELS_TO_SKIP:-} + - JIRA_CONNECTOR_LABELS_TO_SKIP=${JIRA_CONNECTOR_LABELS_TO_SKIP:-} + - WEB_CONNECTOR_VALIDATE_URLS=${WEB_CONNECTOR_VALIDATE_URLS:-} + - JIRA_API_VERSION=${JIRA_API_VERSION:-} + - GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-} + - NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-} + - GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-} + - DISABLE_DOCUMENT_CLEANUP=${DISABLE_DOCUMENT_CLEANUP:-} + # Danswer SlackBot Configs + - DANSWER_BOT_SLACK_APP_TOKEN=${DANSWER_BOT_SLACK_APP_TOKEN:-} + - DANSWER_BOT_SLACK_BOT_TOKEN=${DANSWER_BOT_SLACK_BOT_TOKEN:-} + - DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER=${DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER:-} + - DANSWER_BOT_FEEDBACK_VISIBILITY=${DANSWER_BOT_FEEDBACK_VISIBILITY:-} + - DANSWER_BOT_DISPLAY_ERROR_MSGS=${DANSWER_BOT_DISPLAY_ERROR_MSGS:-} + - DANSWER_BOT_RESPOND_EVERY_CHANNEL=${DANSWER_BOT_RESPOND_EVERY_CHANNEL:-} + - DANSWER_BOT_DISABLE_COT=${DANSWER_BOT_DISABLE_COT:-} # Currently unused + - NOTIFY_SLACKBOT_NO_ANSWER=${NOTIFY_SLACKBOT_NO_ANSWER:-} + - DANSWER_BOT_MAX_QPM=${DANSWER_BOT_MAX_QPM:-} + - DANSWER_BOT_MAX_WAIT_TIME=${DANSWER_BOT_MAX_WAIT_TIME:-} + # Logging + # Leave this on pretty please? Nothing sensitive is collected! + # https://docs.danswer.dev/more/telemetry + - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-} + - LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs + - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # Log all of the prompts to the LLM + - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-} + extra_hosts: + - "host.docker.internal:host-gateway" + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + web_server: + image: danswer/danswer-web-server:latest + build: + context: ../../web + dockerfile: Dockerfile + args: + - NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING:-false} + - NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA:-false} + - NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-} + - NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-} + - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-} + depends_on: + - api_server + restart: always + environment: + - INTERNAL_URL=http://api_server:8080 + - WEB_DOMAIN=${WEB_DOMAIN:-} + + + inference_model_server: + image: danswer/danswer-model-server:latest + # for GPU support, please read installation guidelines in the README.md + # bare minimum to get this working is to install nvidia-container-toolkit + deploy: + resources: + reservations: + devices: + - driver: nvidia + capabilities: [gpu] + build: + context: ../../backend + dockerfile: Dockerfile.model_server + command: > + /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then + echo 'Skipping service...'; + exit 0; + else + exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; + fi" + restart: on-failure + environment: + - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} + # Set to debug to get more fine-grained logs + - LOG_LEVEL=${LOG_LEVEL:-info} + volumes: + # Not necessary, this is just to reduce download time during startup + - model_cache_huggingface:/root/.cache/huggingface/ + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + indexing_model_server: + image: danswer/danswer-model-server:latest + build: + context: ../../backend + dockerfile: Dockerfile.model_server + # for GPU support, please read installation guidelines in the README.md + # bare minimum to get this working is to install nvidia-container-toolkit + deploy: + resources: + reservations: + devices: + - driver: nvidia + capabilities: [gpu] + command: > + /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then + echo 'Skipping service...'; + exit 0; + else + exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; + fi" + restart: on-failure + environment: + - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} + - INDEXING_ONLY=True + # Set to debug to get more fine-grained logs + - LOG_LEVEL=${LOG_LEVEL:-info} + volumes: + # Not necessary, this is just to reduce download time during startup + - model_cache_huggingface:/root/.cache/huggingface/ + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + relational_db: + image: postgres:15.2-alpine + restart: always + environment: + - POSTGRES_USER=${POSTGRES_USER:-postgres} + - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-password} + ports: + - "5432:5432" + volumes: + - db_volume:/var/lib/postgresql/data + + + # This container name cannot have an underscore in it due to Vespa expectations of the URL + index: + image: vespaengine/vespa:8.277.17 + restart: always + ports: + - "19071:19071" + - "8081:8081" + volumes: + - vespa_volume:/opt/vespa/var + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + nginx: + image: nginx:1.23.4-alpine + restart: always + # nginx will immediately crash with `nginx: [emerg] host not found in upstream` + # if api_server / web_server are not up + depends_on: + - api_server + - web_server + environment: + - DOMAIN=localhost + ports: + - "80:80" + - "3000:80" # allow for localhost:3000 usage, since that is the norm + volumes: + - ../data/nginx:/etc/nginx/conf.d + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + # the specified script waits for the api_server to start up. + # Without this we've seen issues where nginx shows no error logs but + # does not recieve any traffic + # NOTE: we have to use dos2unix to remove Carriage Return chars from the file + # in order to make this work on both Unix-like systems and windows + command: > + /bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh + && /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev" + + +volumes: + db_volume: + vespa_volume: + # Created by the container itself + model_cache_huggingface: diff --git a/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml b/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml index 5688c84f3..d3c05bb67 100644 --- a/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml +++ b/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml @@ -12,6 +12,7 @@ services: depends_on: - relational_db - index + - inference_model_server restart: always env_file: - .env @@ -19,17 +20,16 @@ services: - AUTH_TYPE=${AUTH_TYPE:-google_oauth} - POSTGRES_HOST=relational_db - VESPA_HOST=index - volumes: - - local_dynamic_storage:/home/storage - - file_connector_tmp_storage:/home/file_connector_storage - - model_cache_torch:/root/.cache/torch/ - - model_cache_nltk:/root/nltk_data/ - - model_cache_huggingface:/root/.cache/huggingface/ + - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} + extra_hosts: + - "host.docker.internal:host-gateway" logging: driver: json-file options: max-size: "50m" max-file: "6" + + background: image: danswer/danswer-backend:latest build: @@ -39,6 +39,8 @@ services: depends_on: - relational_db - index + - inference_model_server + - indexing_model_server restart: always env_file: - .env @@ -46,17 +48,17 @@ services: - AUTH_TYPE=${AUTH_TYPE:-google_oauth} - POSTGRES_HOST=relational_db - VESPA_HOST=index - volumes: - - local_dynamic_storage:/home/storage - - file_connector_tmp_storage:/home/file_connector_storage - - model_cache_torch:/root/.cache/torch/ - - model_cache_nltk:/root/nltk_data/ - - model_cache_huggingface:/root/.cache/huggingface/ + - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} + - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server} + extra_hosts: + - "host.docker.internal:host-gateway" logging: driver: json-file options: max-size: "50m" max-file: "6" + + web_server: image: danswer/danswer-web-server:latest build: @@ -64,6 +66,10 @@ services: dockerfile: Dockerfile args: - NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING:-false} + - NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA:-false} + - NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-} + - NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-} + - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-} depends_on: - api_server restart: always @@ -76,6 +82,63 @@ services: options: max-size: "50m" max-file: "6" + + + inference_model_server: + image: danswer/danswer-model-server:latest + build: + context: ../../backend + dockerfile: Dockerfile.model_server + command: > + /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then + echo 'Skipping service...'; + exit 0; + else + exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; + fi" + restart: on-failure + environment: + - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} + # Set to debug to get more fine-grained logs + - LOG_LEVEL=${LOG_LEVEL:-info} + volumes: + # Not necessary, this is just to reduce download time during startup + - model_cache_huggingface:/root/.cache/huggingface/ + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + indexing_model_server: + image: danswer/danswer-model-server:latest + build: + context: ../../backend + dockerfile: Dockerfile.model_server + command: > + /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then + echo 'Skipping service...'; + exit 0; + else + exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; + fi" + restart: on-failure + environment: + - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} + - INDEXING_ONLY=True + # Set to debug to get more fine-grained logs + - LOG_LEVEL=${LOG_LEVEL:-info} + volumes: + # Not necessary, this is just to reduce download time during startup + - model_cache_huggingface:/root/.cache/huggingface/ + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + relational_db: image: postgres:15.2-alpine restart: always @@ -89,6 +152,8 @@ services: options: max-size: "50m" max-file: "6" + + # This container name cannot have an underscore in it due to Vespa expectations of the URL index: image: vespaengine/vespa:8.277.17 @@ -103,6 +168,8 @@ services: options: max-size: "50m" max-file: "6" + + nginx: image: nginx:1.23.4-alpine restart: always @@ -132,35 +199,10 @@ services: && /etc/nginx/conf.d/run-nginx.sh app.conf.template.no-letsencrypt" env_file: - .env.nginx - # Run with --profile model-server to bring up the danswer-model-server container - model_server: - image: danswer/danswer-model-server:latest - build: - context: ../../backend - dockerfile: Dockerfile.model_server - profiles: - - "model-server" - command: uvicorn model_server.main:app --host 0.0.0.0 --port 9000 - restart: always - environment: - - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-} - - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} - - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} - # Set to debug to get more fine-grained logs - - LOG_LEVEL=${LOG_LEVEL:-info} - volumes: - - model_cache_torch:/root/.cache/torch/ - - model_cache_huggingface:/root/.cache/huggingface/ - logging: - driver: json-file - options: - max-size: "50m" - max-file: "6" + + volumes: - local_dynamic_storage: - file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them db_volume: vespa_volume: - model_cache_torch: - model_cache_nltk: + # Created by the container itself model_cache_huggingface: diff --git a/deployment/docker_compose/docker-compose.prod.yml b/deployment/docker_compose/docker-compose.prod.yml index 97af1e7c9..8ad0ad301 100644 --- a/deployment/docker_compose/docker-compose.prod.yml +++ b/deployment/docker_compose/docker-compose.prod.yml @@ -12,6 +12,7 @@ services: depends_on: - relational_db - index + - inference_model_server restart: always env_file: - .env @@ -19,17 +20,16 @@ services: - AUTH_TYPE=${AUTH_TYPE:-google_oauth} - POSTGRES_HOST=relational_db - VESPA_HOST=index - volumes: - - local_dynamic_storage:/home/storage - - file_connector_tmp_storage:/home/file_connector_storage - - model_cache_torch:/root/.cache/torch/ - - model_cache_nltk:/root/nltk_data/ - - model_cache_huggingface:/root/.cache/huggingface/ + - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} + extra_hosts: + - "host.docker.internal:host-gateway" logging: driver: json-file options: max-size: "50m" max-file: "6" + + background: image: danswer/danswer-backend:latest build: @@ -39,6 +39,8 @@ services: depends_on: - relational_db - index + - inference_model_server + - indexing_model_server restart: always env_file: - .env @@ -46,17 +48,17 @@ services: - AUTH_TYPE=${AUTH_TYPE:-google_oauth} - POSTGRES_HOST=relational_db - VESPA_HOST=index - volumes: - - local_dynamic_storage:/home/storage - - file_connector_tmp_storage:/home/file_connector_storage - - model_cache_torch:/root/.cache/torch/ - - model_cache_nltk:/root/nltk_data/ - - model_cache_huggingface:/root/.cache/huggingface/ + - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} + - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server} + extra_hosts: + - "host.docker.internal:host-gateway" logging: driver: json-file options: max-size: "50m" max-file: "6" + + web_server: image: danswer/danswer-web-server:latest build: @@ -64,6 +66,10 @@ services: dockerfile: Dockerfile args: - NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING:-false} + - NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA:-false} + - NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-} + - NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-} + - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-} depends_on: - api_server restart: always @@ -76,6 +82,8 @@ services: options: max-size: "50m" max-file: "6" + + relational_db: image: postgres:15.2-alpine restart: always @@ -89,6 +97,63 @@ services: options: max-size: "50m" max-file: "6" + + + inference_model_server: + image: danswer/danswer-model-server:latest + build: + context: ../../backend + dockerfile: Dockerfile.model_server + command: > + /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then + echo 'Skipping service...'; + exit 0; + else + exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; + fi" + restart: on-failure + environment: + - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} + # Set to debug to get more fine-grained logs + - LOG_LEVEL=${LOG_LEVEL:-info} + volumes: + # Not necessary, this is just to reduce download time during startup + - model_cache_huggingface:/root/.cache/huggingface/ + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + indexing_model_server: + image: danswer/danswer-model-server:latest + build: + context: ../../backend + dockerfile: Dockerfile.model_server + command: > + /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then + echo 'Skipping service...'; + exit 0; + else + exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; + fi" + restart: on-failure + environment: + - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} + - INDEXING_ONLY=True + # Set to debug to get more fine-grained logs + - LOG_LEVEL=${LOG_LEVEL:-info} + volumes: + # Not necessary, this is just to reduce download time during startup + - model_cache_huggingface:/root/.cache/huggingface/ + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + # This container name cannot have an underscore in it due to Vespa expectations of the URL index: image: vespaengine/vespa:8.277.17 @@ -103,6 +168,8 @@ services: options: max-size: "50m" max-file: "6" + + nginx: image: nginx:1.23.4-alpine restart: always @@ -136,6 +203,8 @@ services: && /etc/nginx/conf.d/run-nginx.sh app.conf.template" env_file: - .env.nginx + + # follows https://pentacent.medium.com/nginx-and-lets-encrypt-with-docker-in-less-than-5-minutes-b4b8a60d3a71 certbot: image: certbot/certbot @@ -149,35 +218,10 @@ services: max-size: "50m" max-file: "6" entrypoint: "/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'" - # Run with --profile model-server to bring up the danswer-model-server container - model_server: - image: danswer/danswer-model-server:latest - build: - context: ../../backend - dockerfile: Dockerfile.model_server - profiles: - - "model-server" - command: uvicorn model_server.main:app --host 0.0.0.0 --port 9000 - restart: always - environment: - - DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-} - - NORMALIZE_EMBEDDINGS=${NORMALIZE_EMBEDDINGS:-} - - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} - # Set to debug to get more fine-grained logs - - LOG_LEVEL=${LOG_LEVEL:-info} - volumes: - - model_cache_torch:/root/.cache/torch/ - - model_cache_huggingface:/root/.cache/huggingface/ - logging: - driver: json-file - options: - max-size: "50m" - max-file: "6" + + volumes: - local_dynamic_storage: - file_connector_tmp_storage: # used to store files uploaded by the user temporarily while we are indexing them db_volume: vespa_volume: - model_cache_torch: - model_cache_nltk: + # Created by the container itself model_cache_huggingface: diff --git a/deployment/docker_compose/init-letsencrypt.sh b/deployment/docker_compose/init-letsencrypt.sh index c58e1230f..5eb3c73b9 100755 --- a/deployment/docker_compose/init-letsencrypt.sh +++ b/deployment/docker_compose/init-letsencrypt.sh @@ -6,10 +6,20 @@ set -o allexport source .env.nginx set +o allexport -if ! docker compose --version >/dev/null 2>&1; then - echo 'Error: docker compose is not installed.' >&2 - exit 1 -fi +# Function to determine correct docker compose command +docker_compose_cmd() { + if command -v docker-compose >/dev/null 2>&1; then + echo "docker-compose" + elif command -v docker compose >/dev/null 2>&1; then + echo "docker compose" + else + echo 'Error: docker-compose or docker compose is not installed.' >&2 + exit 1 + fi +} + +# Assign appropriate Docker Compose command +COMPOSE_CMD=$(docker_compose_cmd) domains=("$DOMAIN" "www.$DOMAIN") rsa_key_size=4096 @@ -36,7 +46,7 @@ fi echo "### Creating dummy certificate for $domains ..." path="/etc/letsencrypt/live/$domains" mkdir -p "$data_path/conf/live/$domains" -docker compose -f docker-compose.prod.yml run --name danswer-stack --rm --entrypoint "\ +$COMPOSE_CMD -f docker-compose.prod.yml run --name danswer-stack --rm --entrypoint "\ openssl req -x509 -nodes -newkey rsa:$rsa_key_size -days 1\ -keyout '$path/privkey.pem' \ -out '$path/fullchain.pem' \ @@ -45,11 +55,25 @@ echo echo "### Starting nginx ..." -docker compose -f docker-compose.prod.yml -p danswer-stack up --force-recreate -d nginx +$COMPOSE_CMD -f docker-compose.prod.yml -p danswer-stack up --force-recreate -d nginx echo +echo "Waiting for nginx to be ready, this may take a minute..." +while true; do + # Use curl to send a request and capture the HTTP status code + status_code=$(curl -o /dev/null -s -w "%{http_code}\n" "http://localhost/api/health") + + # Check if the status code is 200 + if [ "$status_code" -eq 200 ]; then + break # Exit the loop + else + echo "Nginx is not ready yet, retrying in 5 seconds..." + sleep 5 # Sleep for 5 seconds before retrying + fi +done + echo "### Deleting dummy certificate for $domains ..." -docker compose -f docker-compose.prod.yml run --name danswer-stack --rm --entrypoint "\ +$COMPOSE_CMD -f docker-compose.prod.yml run --name danswer-stack --rm --entrypoint "\ rm -Rf /etc/letsencrypt/live/$domains && \ rm -Rf /etc/letsencrypt/archive/$domains && \ rm -Rf /etc/letsencrypt/renewal/$domains.conf" certbot @@ -72,7 +96,7 @@ esac # Enable staging mode if needed if [ $staging != "0" ]; then staging_arg="--staging"; fi -docker compose -f docker-compose.prod.yml run --name danswer-stack --rm --entrypoint "\ +$COMPOSE_CMD -f docker-compose.prod.yml run --name danswer-stack --rm --entrypoint "\ certbot certonly --webroot -w /var/www/certbot \ $staging_arg \ $email_arg \ @@ -83,4 +107,4 @@ docker compose -f docker-compose.prod.yml run --name danswer-stack --rm --entryp echo echo "### Reloading nginx ..." -docker compose -f docker-compose.prod.yml -p danswer-stack up --force-recreate -d nginx +$COMPOSE_CMD -f docker-compose.prod.yml -p danswer-stack up --force-recreate -d diff --git a/deployment/helm/.gitignore b/deployment/helm/.gitignore new file mode 100644 index 000000000..b442275d6 --- /dev/null +++ b/deployment/helm/.gitignore @@ -0,0 +1,3 @@ +### Helm ### +# Chart dependencies +**/charts/*.tgz diff --git a/deployment/helm/.helmignore b/deployment/helm/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/deployment/helm/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/deployment/helm/Chart.lock b/deployment/helm/Chart.lock new file mode 100644 index 000000000..918b44f6e --- /dev/null +++ b/deployment/helm/Chart.lock @@ -0,0 +1,12 @@ +dependencies: +- name: postgresql + repository: https://charts.bitnami.com/bitnami + version: 14.3.1 +- name: vespa + repository: https://unoplat.github.io/vespa-helm-charts + version: 0.2.3 +- name: nginx + repository: oci://registry-1.docker.io/bitnamicharts + version: 15.14.0 +digest: sha256:ab17b5d2c3883055cb4a26bf530043521be5220c24f804e954bb428273d16ba8 +generated: "2024-05-24T16:55:30.598279-07:00" diff --git a/deployment/helm/Chart.yaml b/deployment/helm/Chart.yaml new file mode 100644 index 000000000..7763f33be --- /dev/null +++ b/deployment/helm/Chart.yaml @@ -0,0 +1,35 @@ +apiVersion: v2 +name: danswer-stack +description: A Helm chart for Kubernetes +home: https://www.danswer.ai/ +sources: + - "https://github.com/danswer-ai/danswer" +type: application +version: 0.2.0 +appVersion: "latest" +annotations: + category: Productivity + licenses: MIT + images: | + - name: webserver + image: docker.io/danswer/danswer-web-server:latest + - name: background + image: docker.io/danswer/danswer-backend:latest + - name: vespa + image: vespaengine/vespa:8.277.17 +dependencies: + - name: postgresql + version: 14.3.1 + repository: https://charts.bitnami.com/bitnami + condition: postgresql.enabled + - name: vespa + version: 0.2.3 + repository: https://unoplat.github.io/vespa-helm-charts + condition: vespa.enabled + - name: nginx + version: 15.14.0 + repository: oci://registry-1.docker.io/bitnamicharts + condition: nginx.enabled + + + \ No newline at end of file diff --git a/deployment/helm/templates/_helpers.tpl b/deployment/helm/templates/_helpers.tpl new file mode 100644 index 000000000..483a5b5e5 --- /dev/null +++ b/deployment/helm/templates/_helpers.tpl @@ -0,0 +1,83 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "danswer-stack.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "danswer-stack.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "danswer-stack.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "danswer-stack.labels" -}} +helm.sh/chart: {{ include "danswer-stack.chart" . }} +{{ include "danswer-stack.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "danswer-stack.selectorLabels" -}} +app.kubernetes.io/name: {{ include "danswer-stack.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "danswer-stack.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "danswer-stack.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Set secret name +*/}} +{{- define "danswer-stack.secretName" -}} +{{- default (default "danswer-secrets" .Values.auth.secretName) .Values.auth.existingSecret }} +{{- end }} + +{{/* +Create env vars from secrets +*/}} +{{- define "danswer-stack.envSecrets" -}} + {{- range $name, $key := .Values.auth.secretKeys }} +- name: {{ $name | upper | replace "-" "_" | quote }} + valueFrom: + secretKeyRef: + name: {{ include "danswer-stack.secretName" $ }} + key: {{ default $name $key }} + {{- end }} +{{- end }} + diff --git a/deployment/helm/templates/api-deployment.yaml b/deployment/helm/templates/api-deployment.yaml new file mode 100644 index 000000000..7f10bffaf --- /dev/null +++ b/deployment/helm/templates/api-deployment.yaml @@ -0,0 +1,59 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "danswer-stack.fullname" . }}-api-deployment + labels: + {{- include "danswer-stack.labels" . | nindent 4 }} +spec: + {{- if not .Values.api.autoscaling.enabled }} + replicas: {{ .Values.api.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "danswer-stack.selectorLabels" . | nindent 6 }} + {{- if .Values.api.deploymentLabels }} + {{- toYaml .Values.api.deploymentLabels | nindent 6 }} + {{- end }} + template: + metadata: + {{- with .Values.api.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "danswer-stack.labels" . | nindent 8 }} + {{- with .Values.api.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "danswer-stack.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.api.podSecurityContext | nindent 8 }} + containers: + - name: api-server + securityContext: + {{- toYaml .Values.api.securityContext | nindent 12 }} + image: "{{ .Values.api.image.repository }}:{{ .Values.api.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.api.image.pullPolicy }} + command: + - "/bin/sh" + - "-c" + - | + alembic upgrade head && + echo "Starting Danswer Api Server" && + uvicorn danswer.main:app --host 0.0.0.0 --port 8080 + ports: + - name: api-server-port + containerPort: {{ .Values.api.service.port }} + protocol: TCP + resources: + {{- toYaml .Values.api.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ .Values.config.envConfigMapName }} + env: + {{- include "danswer-stack.envSecrets" . | nindent 12}} diff --git a/deployment/helm/templates/api-hpa.yaml b/deployment/helm/templates/api-hpa.yaml new file mode 100644 index 000000000..378c39715 --- /dev/null +++ b/deployment/helm/templates/api-hpa.yaml @@ -0,0 +1,32 @@ +{{- if .Values.api.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "danswer-stack.fullname" . }}-api + labels: + {{- include "danswer-stack.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "danswer-stack.fullname" . }} + minReplicas: {{ .Values.api.autoscaling.minReplicas }} + maxReplicas: {{ .Values.api.autoscaling.maxReplicas }} + metrics: + {{- if .Values.api.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.api.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.api.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.api.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/deployment/helm/templates/api-service.yaml b/deployment/helm/templates/api-service.yaml new file mode 100644 index 000000000..1fd74d4dd --- /dev/null +++ b/deployment/helm/templates/api-service.yaml @@ -0,0 +1,22 @@ +apiVersion: v1 +kind: Service +metadata: + # INTERNAL_URL env variable depends on this, don't change without changing INTERNAL_URL + name: {{ include "danswer-stack.fullname" . }}-api-service + labels: + {{- include "danswer-stack.labels" . | nindent 4 }} + {{- if .Values.api.deploymentLabels }} + {{- toYaml .Values.api.deploymentLabels | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.api.service.type }} + ports: + - port: {{ .Values.api.service.port }} + targetPort: api-server-port + protocol: TCP + name: api-server-port + selector: + {{- include "danswer-stack.selectorLabels" . | nindent 4 }} + {{- if .Values.api.deploymentLabels }} + {{- toYaml .Values.api.deploymentLabels | nindent 4 }} + {{- end }} diff --git a/deployment/helm/templates/background-deployment.yaml b/deployment/helm/templates/background-deployment.yaml new file mode 100644 index 000000000..3cd65a99a --- /dev/null +++ b/deployment/helm/templates/background-deployment.yaml @@ -0,0 +1,51 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "danswer-stack.fullname" . }}-background + labels: + {{- include "danswer-stack.labels" . | nindent 4 }} +spec: + {{- if not .Values.background.autoscaling.enabled }} + replicas: {{ .Values.background.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "danswer-stack.selectorLabels" . | nindent 6 }} + {{- if .Values.background.deploymentLabels }} + {{- toYaml .Values.background.deploymentLabels | nindent 6 }} + {{- end }} + template: + metadata: + {{- with .Values.background.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "danswer-stack.labels" . | nindent 8 }} + {{- with .Values.background.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "danswer-stack.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.background.podSecurityContext | nindent 8 }} + containers: + - name: background + securityContext: + {{- toYaml .Values.background.securityContext | nindent 12 }} + image: "{{ .Values.background.image.repository }}:{{ .Values.background.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.background.image.pullPolicy }} + command: ["/usr/bin/supervisord"] + resources: + {{- toYaml .Values.background.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ .Values.config.envConfigMapName }} + env: + - name: ENABLE_MINI_CHUNK + value: "{{ .Values.background.enableMiniChunk }}" + {{- include "danswer-stack.envSecrets" . | nindent 12}} diff --git a/deployment/helm/templates/background-hpa.yaml b/deployment/helm/templates/background-hpa.yaml new file mode 100644 index 000000000..009daf10f --- /dev/null +++ b/deployment/helm/templates/background-hpa.yaml @@ -0,0 +1,32 @@ +{{- if .Values.background.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "danswer-stack.fullname" . }}-background + labels: + {{- include "danswer-stack.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "danswer-stack.fullname" . }} + minReplicas: {{ .Values.background.autoscaling.minReplicas }} + maxReplicas: {{ .Values.background.autoscaling.maxReplicas }} + metrics: + {{- if .Values.background.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.background.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.background.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.background.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/deployment/helm/templates/configmap.yaml b/deployment/helm/templates/configmap.yaml new file mode 100755 index 000000000..8119ae045 --- /dev/null +++ b/deployment/helm/templates/configmap.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.config.envConfigMapName }} + labels: + {{- include "danswer-stack.labels" . | nindent 4 }} +data: + INTERNAL_URL: "http://{{ include "danswer-stack.fullname" . }}-api-service:{{ .Values.api.service.port | default 8080 }}" + POSTGRES_HOST: {{ .Release.Name }}-postgresql + VESPA_HOST: "document-index-service" + MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-inference-model-service" + INDEXING_MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-indexing-model-service" +{{- range $key, $value := .Values.configMap }} + {{ $key }}: "{{ $value }}" +{{- end }} \ No newline at end of file diff --git a/deployment/helm/templates/danswer-secret.yaml b/deployment/helm/templates/danswer-secret.yaml new file mode 100644 index 000000000..6b2aa3172 --- /dev/null +++ b/deployment/helm/templates/danswer-secret.yaml @@ -0,0 +1,11 @@ +{{- if not .Values.auth.existingSecret -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "danswer-stack.secretName" . }} +type: Opaque +stringData: + {{- range $name, $value := .Values.auth.secrets }} + {{ $name }}: {{ $value | quote }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/deployment/helm/templates/indexing-model-deployment.yaml b/deployment/helm/templates/indexing-model-deployment.yaml new file mode 100644 index 000000000..cc88aefb7 --- /dev/null +++ b/deployment/helm/templates/indexing-model-deployment.yaml @@ -0,0 +1,51 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "danswer-stack.fullname" . }}-indexing-model + labels: + {{- include "danswer-stack.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "danswer-stack.selectorLabels" . | nindent 6 }} + {{- if .Values.indexCapability.deploymentLabels }} + {{- toYaml .Values.indexCapability.deploymentLabels | nindent 6 }} + {{- end }} + template: + metadata: + {{- with .Values.indexCapability.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "danswer-stack.labels" . | nindent 8 }} + {{- with .Values.indexCapability.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + containers: + - name: indexing-model-server + image: danswer/danswer-model-server:latest + imagePullPolicy: IfNotPresent + command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000", "--limit-concurrency", "10" ] + ports: + - containerPort: 9000 + envFrom: + - configMapRef: + name: {{ .Values.config.envConfigMapName }} + env: + - name: INDEXING_ONLY + value: "{{ default "True" .Values.indexCapability.indexingOnly }}" + {{- include "danswer-stack.envSecrets" . | nindent 10}} + volumeMounts: + {{- range .Values.indexCapability.volumeMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + {{- end }} + volumes: + {{- range .Values.indexCapability.volumes }} + - name: {{ .name }} + persistentVolumeClaim: + claimName: {{ .persistentVolumeClaim.claimName }} + {{- end }} diff --git a/deployment/helm/templates/indexing-model-pvc.yaml b/deployment/helm/templates/indexing-model-pvc.yaml new file mode 100644 index 000000000..e5825557d --- /dev/null +++ b/deployment/helm/templates/indexing-model-pvc.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Values.indexCapability.indexingModelPVC.name }} +spec: + accessModes: + - {{ .Values.indexCapability.indexingModelPVC.accessMode | quote }} + resources: + requests: + storage: {{ .Values.indexCapability.indexingModelPVC.storage | quote }} \ No newline at end of file diff --git a/deployment/helm/templates/indexing-model-service.yaml b/deployment/helm/templates/indexing-model-service.yaml new file mode 100644 index 000000000..fbbeb6bee --- /dev/null +++ b/deployment/helm/templates/indexing-model-service.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "danswer-stack.fullname" . }}-indexing-model-service + labels: + {{- include "danswer-stack.labels" . | nindent 4 }} +spec: + selector: + {{- include "danswer-stack.selectorLabels" . | nindent 4 }} + {{- if .Values.indexCapability.deploymentLabels }} + {{- toYaml .Values.indexCapability.deploymentLabels | nindent 4 }} + {{- end }} + ports: + - name: {{ .Values.indexCapability.service.name }} + protocol: TCP + port: {{ .Values.indexCapability.service.port }} + targetPort: {{ .Values.indexCapability.service.port }} + type: {{ .Values.indexCapability.service.type }} \ No newline at end of file diff --git a/deployment/helm/templates/inference-model-deployment.yaml b/deployment/helm/templates/inference-model-deployment.yaml new file mode 100644 index 000000000..43caddd29 --- /dev/null +++ b/deployment/helm/templates/inference-model-deployment.yaml @@ -0,0 +1,45 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "danswer-stack.fullname" . }}-inference-model + labels: + {{- range .Values.inferenceCapability.deployment.labels }} + {{ .key }}: {{ .value }} + {{- end }} +spec: + replicas: {{ .Values.inferenceCapability.deployment.replicas }} + selector: + matchLabels: + {{- range .Values.inferenceCapability.deployment.labels }} + {{ .key }}: {{ .value }} + {{- end }} + template: + metadata: + labels: + {{- range .Values.inferenceCapability.podLabels }} + {{ .key }}: {{ .value }} + {{- end }} + spec: + containers: + - name: {{ .Values.inferenceCapability.service.name }} + image: {{ .Values.inferenceCapability.deployment.image.repository }}:{{ .Values.inferenceCapability.deployment.image.tag }} + imagePullPolicy: {{ .Values.inferenceCapability.deployment.image.pullPolicy }} + command: {{ toYaml .Values.inferenceCapability.deployment.command | nindent 14 }} + ports: + - containerPort: {{ .Values.inferenceCapability.service.port }} + envFrom: + - configMapRef: + name: {{ .Values.config.envConfigMapName }} + env: + {{- include "danswer-stack.envSecrets" . | nindent 12}} + volumeMounts: + {{- range .Values.inferenceCapability.deployment.volumeMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + {{- end }} + volumes: + {{- range .Values.inferenceCapability.deployment.volumes }} + - name: {{ .name }} + persistentVolumeClaim: + claimName: {{ .persistentVolumeClaim.claimName }} + {{- end }} diff --git a/deployment/helm/templates/inference-model-pvc.yaml b/deployment/helm/templates/inference-model-pvc.yaml new file mode 100644 index 000000000..fe47fa879 --- /dev/null +++ b/deployment/helm/templates/inference-model-pvc.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Values.inferenceCapability.pvc.name }} +spec: + accessModes: + {{- toYaml .Values.inferenceCapability.pvc.accessModes | nindent 4 }} + resources: + requests: + storage: {{ .Values.inferenceCapability.pvc.storage }} diff --git a/deployment/helm/templates/inference-model-service.yaml b/deployment/helm/templates/inference-model-service.yaml new file mode 100644 index 000000000..74433ac11 --- /dev/null +++ b/deployment/helm/templates/inference-model-service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "danswer-stack.fullname" . }}-inference-model-service +spec: + type: {{ .Values.inferenceCapability.service.type }} + ports: + - port: {{ .Values.inferenceCapability.service.port }} + targetPort: {{ .Values.inferenceCapability.service.port }} + protocol: TCP + name: {{ .Values.inferenceCapability.service.name }} + selector: + {{- range .Values.inferenceCapability.deployment.labels }} + {{ .key }}: {{ .value }} + {{- end }} diff --git a/deployment/helm/templates/nginx-conf.yaml b/deployment/helm/templates/nginx-conf.yaml new file mode 100644 index 000000000..81ecbaaa2 --- /dev/null +++ b/deployment/helm/templates/nginx-conf.yaml @@ -0,0 +1,44 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: danswer-nginx-conf +data: + nginx.conf: | + upstream api_server { + server {{ include "danswer-stack.fullname" . }}-api-service:{{ .Values.api.service.port }} fail_timeout=0; + } + + upstream web_server { + server {{ include "danswer-stack.fullname" . }}-webserver:{{ .Values.webserver.service.port }} fail_timeout=0; + } + + server { + listen 1024; + server_name $$DOMAIN; + + client_max_body_size 5G; # Maximum upload size + + location ~ ^/api(.*)$ { + rewrite ^/api(/.*)$ $1 break; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $host; + proxy_set_header Host $host; + proxy_http_version 1.1; + proxy_buffering off; + proxy_redirect off; + proxy_pass http://api_server; + } + + location / { + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $host; + proxy_set_header Host $host; + proxy_http_version 1.1; + proxy_redirect off; + proxy_pass http://web_server; + } + } diff --git a/deployment/helm/templates/serviceaccount.yaml b/deployment/helm/templates/serviceaccount.yaml new file mode 100644 index 000000000..afd351217 --- /dev/null +++ b/deployment/helm/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "danswer-stack.serviceAccountName" . }} + labels: + {{- include "danswer-stack.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/deployment/helm/templates/tests/test-connection.yaml b/deployment/helm/templates/tests/test-connection.yaml new file mode 100644 index 000000000..60fbd1054 --- /dev/null +++ b/deployment/helm/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "danswer-stack.fullname" . }}-test-connection" + labels: + {{- include "danswer-stack.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "danswer-stack.fullname" . }}:{{ .Values.webserver.service.port }}'] + restartPolicy: Never diff --git a/deployment/helm/templates/webserver-deployment.yaml b/deployment/helm/templates/webserver-deployment.yaml new file mode 100644 index 000000000..c3505248f --- /dev/null +++ b/deployment/helm/templates/webserver-deployment.yaml @@ -0,0 +1,60 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "danswer-stack.fullname" . }}-webserver + labels: + {{- include "danswer-stack.labels" . | nindent 4 }} +spec: + {{- if not .Values.webserver.autoscaling.enabled }} + replicas: {{ .Values.webserver.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "danswer-stack.selectorLabels" . | nindent 6 }} + {{- if .Values.webserver.deploymentLabels }} + {{- toYaml .Values.webserver.deploymentLabels | nindent 6 }} + {{- end }} + template: + metadata: + {{- with .Values.webserver.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "danswer-stack.labels" . | nindent 8 }} + {{- with .Values.webserver.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "danswer-stack.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.webserver.podSecurityContext | nindent 8 }} + containers: + - name: web-server + securityContext: + {{- toYaml .Values.webserver.securityContext | nindent 12 }} + image: "{{ .Values.webserver.image.repository }}:{{ .Values.webserver.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.webserver.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.webserver.service.port }} + protocol: TCP + resources: + {{- toYaml .Values.webserver.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ .Values.config.envConfigMapName }} + env: + {{- include "danswer-stack.envSecrets" . | nindent 12}} + {{- with .Values.webserver.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.webserver.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/deployment/helm/templates/webserver-hpa.yaml b/deployment/helm/templates/webserver-hpa.yaml new file mode 100644 index 000000000..b46820a7f --- /dev/null +++ b/deployment/helm/templates/webserver-hpa.yaml @@ -0,0 +1,32 @@ +{{- if .Values.webserver.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "danswer-stack.fullname" . }}-webserver + labels: + {{- include "danswer-stack.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "danswer-stack.fullname" . }} + minReplicas: {{ .Values.webserver.autoscaling.minReplicas }} + maxReplicas: {{ .Values.webserver.autoscaling.maxReplicas }} + metrics: + {{- if .Values.webserver.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.webserver.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.webserver.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/deployment/helm/templates/webserver-service.yaml b/deployment/helm/templates/webserver-service.yaml new file mode 100644 index 000000000..3e33566fc --- /dev/null +++ b/deployment/helm/templates/webserver-service.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "danswer-stack.fullname" . }}-webserver + labels: + {{- include "danswer-stack.labels" . | nindent 4 }} + {{- if .Values.webserver.deploymentLabels }} + {{- toYaml .Values.webserver.deploymentLabels | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.webserver.service.type }} + ports: + - port: {{ .Values.webserver.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "danswer-stack.selectorLabels" . | nindent 4 }} + {{- if .Values.webserver.deploymentLabels }} + {{- toYaml .Values.webserver.deploymentLabels | nindent 4 }} + {{- end }} diff --git a/deployment/helm/values.yaml b/deployment/helm/values.yaml new file mode 100644 index 000000000..19fa2f6e3 --- /dev/null +++ b/deployment/helm/values.yaml @@ -0,0 +1,453 @@ +# Default values for danswer-stack. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" + +inferenceCapability: + service: + name: inference-model-server-service + type: ClusterIP + port: 9000 + pvc: + name: inference-model-pvc + accessModes: + - ReadWriteOnce + storage: 3Gi + deployment: + name: inference-model-server-deployment + replicas: 1 + labels: + - key: app + value: inference-model-server + image: + repository: danswer/danswer-model-server + tag: latest + pullPolicy: IfNotPresent + command: ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"] + port: 9000 + volumeMounts: + - name: inference-model-storage + mountPath: /root/.cache + volumes: + - name: inference-model-storage + persistentVolumeClaim: + claimName: inference-model-pvc + podLabels: + - key: app + value: inference-model-server + +indexCapability: + service: + type: ClusterIP + port: 9000 + name: indexing-model-server-port + deploymentLabels: + app: indexing-model-server + podLabels: + app: indexing-model-server + indexingOnly: "True" + podAnnotations: {} + volumeMounts: + - name: indexing-model-storage + mountPath: /root/.cache + volumes: + - name: indexing-model-storage + persistentVolumeClaim: + claimName: indexing-model-storage + indexingModelPVC: + name: indexing-model-storage + accessMode: "ReadWriteOnce" + storage: "3Gi" + +config: + envConfigMapName: env-configmap + +serviceAccount: + # Specifies whether a service account should be created + create: false + # Automatically mount a ServiceAccount's API credentials? + automount: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +postgresql: + primary: + persistence: + size: 5Gi + enabled: true + auth: + existingSecret: danswer-secrets + secretKeys: + adminPasswordKey: postgres_password #overwriting as postgres typically expects 'postgres-password' + +nginx: + containerPorts: + http: 1024 + extraEnvVars: + - name: DOMAIN + value: localhost + service: + ports: + http: 80 + danswer: 3000 + targetPort: + http: http + danswer: http + + existingServerBlockConfigmap: danswer-nginx-conf + +webserver: + replicaCount: 1 + image: + repository: danswer/danswer-web-server + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + deploymentLabels: + app: web-server + podAnnotations: {} + podLabels: + app: web-server + podSecurityContext: {} + # fsGroup: 2000 + + securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + + service: + type: ClusterIP + port: 3000 + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + + # Additional volumes on the output Deployment definition. + volumes: [] + # - name: foo + # secret: + # secretName: mysecret + # optional: false + + # Additional volumeMounts on the output Deployment definition. + volumeMounts: [] + # - name: foo + # mountPath: "/etc/foo" + # readOnly: true + + nodeSelector: {} + tolerations: [] + affinity: {} + +api: + replicaCount: 1 + image: + repository: danswer/danswer-backend + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + deploymentLabels: + app: api-server + podAnnotations: {} + podLabels: + scope: danswer-backend + app: api-server + + podSecurityContext: {} + # fsGroup: 2000 + + securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + + service: + type: ClusterIP + port: 8080 + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # requests: + # cpu: 1000m # Requests 1 CPU core + # memory: 1Gi # Requests 1 GiB of memory + # limits: + # cpu: 2000m # Limits to 2 CPU cores + # memory: 2Gi # Limits to 2 GiB of memory + + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + + # Additional volumes on the output Deployment definition. + volumes: [] + # - name: foo + # secret: + # secretName: mysecret + # optional: false + + # Additional volumeMounts on the output Deployment definition. + volumeMounts: [] + # - name: foo + # mountPath: "/etc/foo" + # readOnly: true + + nodeSelector: {} + tolerations: [] + + +background: + replicaCount: 1 + image: + repository: danswer/danswer-backend + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: latest + podAnnotations: {} + podLabels: + scope: danswer-backend + app: background + deploymentLabels: + app: background + podSecurityContext: {} + # fsGroup: 2000 + + securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + enableMiniChunk: "true" + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # requests: + # cpu: 1000m # Requests 1 CPU core + # memory: 1Gi # Requests 1 GiB of memory + # limits: + # cpu: 2000m # Limits to 2 CPU cores + # memory: 2Gi # Limits to 2 GiB of memory + + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + + # Additional volumes on the output Deployment definition. + volumes: [] + # - name: foo + # secret: + # secretName: mysecret + # optional: false + + # Additional volumeMounts on the output Deployment definition. + volumeMounts: [] + # - name: foo + # mountPath: "/etc/foo" + # readOnly: true + + nodeSelector: {} + tolerations: [] + +vespa: + replicaCount: 1 + image: + repository: vespa + pullPolicy: IfNotPresent + tag: "8.277.17" + podAnnotations: {} + podLabels: + app: vespa + app.kubernetes.io/instance: danswer-stack-kn + app.kubernetes.io/name: vespa + enabled: true + + podSecurityContext: {} + # fsGroup: 2000 + + securityContext: + privileged: true + runAsUser: 0 + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + + resources: + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # requests: + # cpu: 1500m + # memory: 4000Mi + # # limits: + # # cpu: 100m + # # memory: 128Mi + + nodeSelector: {} + tolerations: [] + affinity: {} + + +#ingress: +# enabled: false +# className: "" +# annotations: {} +# # kubernetes.io/ingress.class: nginx +# # kubernetes.io/tls-acme: "true" +# hosts: +# - host: chart-example.local +# paths: +# - path: / +# pathType: ImplementationSpecific +# tls: [] +# # - secretName: chart-example-tls +# # hosts: +# # - chart-example.local + +persistence: + vespa: + enabled: true + existingClaim: "" + storageClassName: "" + accessModes: + - ReadWriteOnce + size: 5Gi + +auth: + # for storing smtp, oauth, slack, and other secrets + # keys are lowercased version of env vars (e.g. SMTP_USER -> smtp_user) + existingSecret: "" # danswer-secrets + # optionally override the secret keys to reference in the secret + secretKeys: + postgres_password: "postgres_password" + smtp_pass: "" + oauth_client_id: "" + oauth_client_secret: "" + oauth_cookie_secret: "" + gen_ai_api_key: "" + danswer_bot_slack_app_token: "" + danswer_bot_slack_bot_token: "" + # will be overridden by the existingSecret if set + secretName: "danswer-secrets" + # set values as strings, they will be base64 encoded + secrets: + postgres_password: "postgres" + smtp_pass: "" + oauth_client_id: "" + oauth_client_secret: "" + oauth_cookie_secret: "" + gen_ai_api_key: "" + danswer_bot_slack_app_token: "" + danswer_bot_slack_bot_token: "" + +configMap: + AUTH_TYPE: "disabled" # Change this for production uses unless Danswer is only accessible behind VPN + SESSION_EXPIRE_TIME_SECONDS: "86400" # 1 Day Default + VALID_EMAIL_DOMAINS: "" # Can be something like danswer.ai, as an extra double-check + SMTP_SERVER: "" # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com' + SMTP_PORT: "" # For sending verification emails, if unspecified then defaults to '587' + SMTP_USER: "" # 'your-email@company.com' + # SMTP_PASS: "" # 'your-gmail-password' + EMAIL_FROM: "" # 'your-email@company.com' SMTP_USER missing used instead + # Gen AI Settings + GEN_AI_MODEL_PROVIDER: "" + GEN_AI_MODEL_VERSION: "" + FAST_GEN_AI_MODEL_VERSION: "" + # GEN_AI_API_KEY: "" + GEN_AI_API_ENDPOINT: "" + GEN_AI_API_VERSION: "" + GEN_AI_LLM_PROVIDER_TYPE: "" + GEN_AI_MAX_TOKENS: "" + QA_TIMEOUT: "60" + MAX_CHUNKS_FED_TO_CHAT: "" + DISABLE_LLM_FILTER_EXTRACTION: "" + DISABLE_LLM_CHUNK_FILTER: "" + DISABLE_LLM_CHOOSE_SEARCH: "" + DISABLE_LLM_QUERY_REPHRASE: "" + # Query Options + DOC_TIME_DECAY: "" + HYBRID_ALPHA: "" + EDIT_KEYWORD_QUERY: "" + MULTILINGUAL_QUERY_EXPANSION: "" + QA_PROMPT_OVERRIDE: "" + # Don't change the NLP models unless you know what you're doing + DOCUMENT_ENCODER_MODEL: "" + NORMALIZE_EMBEDDINGS: "" + ASYM_QUERY_PREFIX: "" + ASYM_PASSAGE_PREFIX: "" + ENABLE_RERANKING_REAL_TIME_FLOW: "" + ENABLE_RERANKING_ASYNC_FLOW: "" + MODEL_SERVER_PORT: "" + MIN_THREADS_ML_MODELS: "" + # Indexing Configs + NUM_INDEXING_WORKERS: "" + DISABLE_INDEX_UPDATE_ON_SWAP: "" + DASK_JOB_CLIENT_ENABLED: "" + CONTINUE_ON_CONNECTOR_FAILURE: "" + EXPERIMENTAL_CHECKPOINTING_ENABLED: "" + CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: "" + JIRA_API_VERSION: "" + GONG_CONNECTOR_START_TIME: "" + NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: "" + # DanswerBot SlackBot Configs + # DANSWER_BOT_SLACK_APP_TOKEN: "" + # DANSWER_BOT_SLACK_BOT_TOKEN: "" + DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER: "" + DANSWER_BOT_DISPLAY_ERROR_MSGS: "" + DANSWER_BOT_RESPOND_EVERY_CHANNEL: "" + DANSWER_BOT_DISABLE_COT: "" # Currently unused + NOTIFY_SLACKBOT_NO_ANSWER: "" + # Logging + # Optional Telemetry, please keep it on (nothing sensitive is collected)? <3 + # https://docs.danswer.dev/more/telemetry + DISABLE_TELEMETRY: "" + LOG_LEVEL: "" + LOG_ALL_MODEL_INTERACTIONS: "" + LOG_VESPA_TIMING_INFORMATION: "" + # Shared or Non-backend Related + WEB_DOMAIN: "http://localhost:3000" # for web server and api server + DOMAIN: "localhost" # for nginx diff --git a/deployment/kubernetes/api_server-service-deployment.yaml b/deployment/kubernetes/api_server-service-deployment.yaml index 63d86ded5..5853a3609 100644 --- a/deployment/kubernetes/api_server-service-deployment.yaml +++ b/deployment/kubernetes/api_server-service-deployment.yaml @@ -44,15 +44,3 @@ spec: envFrom: - configMapRef: name: env-configmap - volumeMounts: - - name: dynamic-storage - mountPath: /home/storage - - name: file-connector-storage - mountPath: /home/file_connector_storage - volumes: - - name: dynamic-storage - persistentVolumeClaim: - claimName: dynamic-pvc - - name: file-connector-storage - persistentVolumeClaim: - claimName: file-connector-pvc diff --git a/deployment/kubernetes/background-deployment.yaml b/deployment/kubernetes/background-deployment.yaml index 77bfc65fe..82369e6d3 100644 --- a/deployment/kubernetes/background-deployment.yaml +++ b/deployment/kubernetes/background-deployment.yaml @@ -22,15 +22,3 @@ spec: envFrom: - configMapRef: name: env-configmap - volumeMounts: - - name: dynamic-storage - mountPath: /home/storage - - name: file-connector-storage - mountPath: /home/file_connector_storage - volumes: - - name: dynamic-storage - persistentVolumeClaim: - claimName: dynamic-pvc - - name: file-connector-storage - persistentVolumeClaim: - claimName: file-connector-pvc diff --git a/deployment/kubernetes/env-configmap.yaml b/deployment/kubernetes/env-configmap.yaml index 11ec91934..45cc806d8 100644 --- a/deployment/kubernetes/env-configmap.yaml +++ b/deployment/kubernetes/env-configmap.yaml @@ -5,6 +5,7 @@ metadata: data: # Auth Setting, also check the secrets file AUTH_TYPE: "disabled" # Change this for production uses unless Danswer is only accessible behind VPN + ENCRYPTION_KEY_SECRET: "" # This should not be specified directly in the yaml, this is just for reference SESSION_EXPIRE_TIME_SECONDS: "86400" # 1 Day Default VALID_EMAIL_DOMAINS: "" # Can be something like danswer.ai, as an extra double-check SMTP_SERVER: "" # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com' @@ -13,9 +14,9 @@ data: SMTP_PASS: "" # 'your-gmail-password' EMAIL_FROM: "" # 'your-email@company.com' SMTP_USER missing used instead # Gen AI Settings - GEN_AI_MODEL_PROVIDER: "openai" - GEN_AI_MODEL_VERSION: "gpt-3.5-turbo-0125" # Use GPT-4 if you have it - FAST_GEN_AI_MODEL_VERSION: "gpt-3.5-turbo-0125" + GEN_AI_MODEL_PROVIDER: "" + GEN_AI_MODEL_VERSION: "" + FAST_GEN_AI_MODEL_VERSION: "" GEN_AI_API_KEY: "" GEN_AI_API_ENDPOINT: "" GEN_AI_API_VERSION: "" @@ -26,6 +27,7 @@ data: DISABLE_LLM_FILTER_EXTRACTION: "" DISABLE_LLM_CHUNK_FILTER: "" DISABLE_LLM_CHOOSE_SEARCH: "" + DISABLE_LLM_QUERY_REPHRASE: "" # Query Options DOC_TIME_DECAY: "" HYBRID_ALPHA: "" @@ -42,16 +44,20 @@ data: ASYM_PASSAGE_PREFIX: "" ENABLE_RERANKING_REAL_TIME_FLOW: "" ENABLE_RERANKING_ASYNC_FLOW: "" - MODEL_SERVER_HOST: "" + MODEL_SERVER_HOST: "inference-model-server-service" MODEL_SERVER_PORT: "" - INDEXING_MODEL_SERVER_HOST: "" + INDEXING_MODEL_SERVER_HOST: "indexing-model-server-service" MIN_THREADS_ML_MODELS: "" # Indexing Configs NUM_INDEXING_WORKERS: "" + ENABLED_CONNECTOR_TYPES: "" + DISABLE_INDEX_UPDATE_ON_SWAP: "" DASK_JOB_CLIENT_ENABLED: "" CONTINUE_ON_CONNECTOR_FAILURE: "" EXPERIMENTAL_CHECKPOINTING_ENABLED: "" CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: "" + JIRA_API_VERSION: "" + WEB_CONNECTOR_VALIDATE_URLS: "" GONG_CONNECTOR_START_TIME: "" NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: "" # DanswerBot SlackBot Configs diff --git a/deployment/kubernetes/indexing_model_server-service-deployment.yaml b/deployment/kubernetes/indexing_model_server-service-deployment.yaml new file mode 100644 index 000000000..d44b52e92 --- /dev/null +++ b/deployment/kubernetes/indexing_model_server-service-deployment.yaml @@ -0,0 +1,59 @@ +apiVersion: v1 +kind: Service +metadata: + name: indexing-model-server-service +spec: + selector: + app: indexing-model-server + ports: + - name: indexing-model-server-port + protocol: TCP + port: 9000 + targetPort: 9000 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: indexing-model-server-deployment +spec: + replicas: 1 + selector: + matchLabels: + app: indexing-model-server + template: + metadata: + labels: + app: indexing-model-server + spec: + containers: + - name: indexing-model-server + image: danswer/danswer-model-server:latest + imagePullPolicy: IfNotPresent + command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ] + ports: + - containerPort: 9000 + envFrom: + - configMapRef: + name: env-configmap + env: + - name: INDEXING_ONLY + value: "True" + volumeMounts: + - name: indexing-model-storage + mountPath: /root/.cache + volumes: + - name: indexing-model-storage + persistentVolumeClaim: + claimName: indexing-model-pvc +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: indexing-model-pvc +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 3Gi diff --git a/deployment/kubernetes/inference_model_server-service-deployment.yaml b/deployment/kubernetes/inference_model_server-service-deployment.yaml new file mode 100644 index 000000000..790dc633d --- /dev/null +++ b/deployment/kubernetes/inference_model_server-service-deployment.yaml @@ -0,0 +1,56 @@ +apiVersion: v1 +kind: Service +metadata: + name: inference-model-server-service +spec: + selector: + app: inference-model-server + ports: + - name: inference-model-server-port + protocol: TCP + port: 9000 + targetPort: 9000 + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: inference-model-server-deployment +spec: + replicas: 1 + selector: + matchLabels: + app: inference-model-server + template: + metadata: + labels: + app: inference-model-server + spec: + containers: + - name: inference-model-server + image: danswer/danswer-model-server:latest + imagePullPolicy: IfNotPresent + command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ] + ports: + - containerPort: 9000 + envFrom: + - configMapRef: + name: env-configmap + volumeMounts: + - name: inference-model-storage + mountPath: /root/.cache + volumes: + - name: inference-model-storage + persistentVolumeClaim: + claimName: inference-model-pvc +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: inference-model-pvc +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 3Gi diff --git a/deployment/kubernetes/persistent-volumes.yaml b/deployment/kubernetes/persistent-volumes.yaml deleted file mode 100644 index 8376b98e6..000000000 --- a/deployment/kubernetes/persistent-volumes.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: dynamic-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 1Gi ---- -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: file-connector-pvc -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 1Gi diff --git a/deployment/kubernetes/postgres-service-deployment.yaml b/deployment/kubernetes/postgres-service-deployment.yaml index f33efa2ba..17330204c 100644 --- a/deployment/kubernetes/postgres-service-deployment.yaml +++ b/deployment/kubernetes/postgres-service-deployment.yaml @@ -54,4 +54,4 @@ spec: resources: requests: # Adjust the storage request size as needed. - storage: 1Gi + storage: 5Gi diff --git a/deployment/kubernetes/vespa-service-deployment.yaml b/deployment/kubernetes/vespa-service-deployment.yaml index 4fa5aa9fa..5016258b7 100644 --- a/deployment/kubernetes/vespa-service-deployment.yaml +++ b/deployment/kubernetes/vespa-service-deployment.yaml @@ -60,4 +60,4 @@ spec: resources: requests: # Adjust the storage request size as needed. - storage: 1Gi + storage: 5Gi diff --git a/web/Dockerfile b/web/Dockerfile index 05c7bb3eb..3d27813c7 100644 --- a/web/Dockerfile +++ b/web/Dockerfile @@ -1,5 +1,11 @@ FROM node:20-alpine AS base +LABEL com.danswer.maintainer="founders@danswer.ai" +LABEL com.danswer.description="This image is for the frontend/webserver of Danswer. It is MIT \ +Licensed and free for all to use. You can find it at \ +https://hub.docker.com/r/danswer/danswer-web-server. For more details, visit \ +https://github.com/danswer-ai/danswer." + # Default DANSWER_VERSION, typically overriden during builds by GitHub Actions. ARG DANSWER_VERSION=0.3-dev ENV DANSWER_VERSION=${DANSWER_VERSION} @@ -26,6 +32,9 @@ WORKDIR /app COPY --from=deps /app/node_modules ./node_modules COPY . . +# needed to get the `standalone` dir we expect later +ENV NEXT_PRIVATE_STANDALONE true + # Disable automatic telemetry collection ENV NEXT_TELEMETRY_DISABLED 1 @@ -35,6 +44,19 @@ ENV NEXT_TELEMETRY_DISABLED 1 ARG NEXT_PUBLIC_DISABLE_STREAMING ENV NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING} +ARG NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA +ENV NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA} + +# allow user to specify custom feedback options +ARG NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS +ENV NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS} + +ARG NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS +ENV NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS} + +ARG NEXT_PUBLIC_DISABLE_LOGOUT +ENV NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT} + RUN npm run build # Step 3. Production image, copy all the files and run next @@ -70,8 +92,22 @@ COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static ARG NEXT_PUBLIC_DISABLE_STREAMING ENV NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING} +ARG NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA +ENV NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA} + +# allow user to specify custom feedback options +ARG NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS +ENV NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS} + +ARG NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS +ENV NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS} + +ARG NEXT_PUBLIC_DISABLE_LOGOUT +ENV NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT} + # Note: Don't expose ports here, Compose will handle that for us if necessary. # If you want to run this without compose, specify the ports to # expose via cli CMD ["node", "server.js"] + diff --git a/web/next.config.js b/web/next.config.js index 6f7de34ae..1586af8d1 100644 --- a/web/next.config.js +++ b/web/next.config.js @@ -7,6 +7,7 @@ const version = env_version || package_version; /** @type {import('next').NextConfig} */ const nextConfig = { output: "standalone", + swcMinify: true, rewrites: async () => { // In production, something else (nginx in the one box setup) should take // care of this rewrite. TODO (chris): better support setups where @@ -24,13 +25,7 @@ const nextConfig = { // In production, something else (nginx in the one box setup) should take // care of this redirect. TODO (chris): better support setups where // web_server and api_server are on different machines. - const defaultRedirects = [ - { - source: "/", - destination: "/search", - permanent: true, - }, - ]; + const defaultRedirects = []; if (process.env.NODE_ENV === "production") return defaultRedirects; diff --git a/web/package-lock.json b/web/package-lock.json index 17b8a2912..0e19baab4 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -12,45 +12,49 @@ "@dnd-kit/modifiers": "^7.0.0", "@dnd-kit/sortable": "^8.0.0", "@phosphor-icons/react": "^2.0.8", + "@radix-ui/react-dialog": "^1.0.5", + "@radix-ui/react-popover": "^1.0.7", + "@radix-ui/react-tooltip": "^1.0.7", "@tremor/react": "^3.9.2", "@types/js-cookie": "^3.0.3", + "@types/lodash": "^4.17.0", "@types/node": "18.15.11", + "@types/prismjs": "^1.26.4", "@types/react": "18.0.32", "@types/react-dom": "18.0.11", + "@types/uuid": "^9.0.8", "autoprefixer": "^10.4.14", "formik": "^2.2.9", "js-cookie": "^3.0.5", - "next": "^14.0.0", + "lodash": "^4.17.21", + "mdast-util-find-and-replace": "^3.0.1", + "next": "^14.2.3", + "npm": "^10.8.0", "postcss": "^8.4.31", - "react": "^18.2.0", - "react-dom": "^18.2.0", + "prismjs": "^1.29.0", + "react": "^18.3.1", + "react-dom": "^18.3.1", "react-dropzone": "^14.2.3", "react-icons": "^4.8.0", "react-loader-spinner": "^5.4.5", - "react-markdown": "^8.0.7", + "react-markdown": "^9.0.1", + "rehype-prism-plus": "^2.0.0", + "remark-gfm": "^4.0.0", "semver": "^7.5.4", "sharp": "^0.32.6", "swr": "^2.1.5", "tailwindcss": "^3.3.1", "typescript": "5.0.3", + "uuid": "^9.0.1", "yup": "^1.1.1" }, "devDependencies": { "@tailwindcss/typography": "^0.5.10", "eslint": "^8.48.0", - "eslint-config-next": "^14.0.0", + "eslint-config-next": "^14.1.0", "prettier": "2.8.8" } }, - "node_modules/@aashutoshrathi/word-wrap": { - "version": "1.2.6", - "resolved": "https://registry.npmjs.org/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz", - "integrity": "sha512-1Yjs2SvM8TflER/OD3cOjhWWOZb58A2t7wpE2S9XfBYTiIl+XFhQG2bjy4Pu1I+EAlCNUzRDYDdFwFYUKvXcIA==", - "dev": true, - "engines": { - "node": ">=0.10.0" - } - }, "node_modules/@alloc/quick-lru": { "version": "5.2.0", "resolved": "https://registry.npmjs.org/@alloc/quick-lru/-/quick-lru-5.2.0.tgz", @@ -63,119 +67,55 @@ } }, "node_modules/@ampproject/remapping": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.2.1.tgz", - "integrity": "sha512-lFMjJTrFL3j7L9yBxwYfCq2k6qqwHyzuUl/XBnif78PWTJYyL/dfowQHWE3sp6U6ZzqWiiIZnpTMO96zhkjwtg==", + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz", + "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==", "peer": true, "dependencies": { - "@jridgewell/gen-mapping": "^0.3.0", - "@jridgewell/trace-mapping": "^0.3.9" + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.24" }, "engines": { "node": ">=6.0.0" } }, "node_modules/@babel/code-frame": { - "version": "7.23.5", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.23.5.tgz", - "integrity": "sha512-CgH3s1a96LipHCmSUmYFPwY7MNx8C3avkq7i4Wl3cfa662ldtUe4VM1TPXX70pfmrlWTb6jLqTYrZyT2ZTJBgA==", + "version": "7.24.2", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.24.2.tgz", + "integrity": "sha512-y5+tLQyV8pg3fsiln67BVLD1P13Eg4lh5RW9mF0zUuvLrv9uIQ4MCL+CRT+FTsBlBjcIan6PGsLcBN0m3ClUyQ==", "dependencies": { - "@babel/highlight": "^7.23.4", - "chalk": "^2.4.2" + "@babel/highlight": "^7.24.2", + "picocolors": "^1.0.0" }, "engines": { "node": ">=6.9.0" } }, - "node_modules/@babel/code-frame/node_modules/ansi-styles": { - "version": "3.2.1", - "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz", - "integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==", - "dependencies": { - "color-convert": "^1.9.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/@babel/code-frame/node_modules/chalk": { - "version": "2.4.2", - "resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz", - "integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==", - "dependencies": { - "ansi-styles": "^3.2.1", - "escape-string-regexp": "^1.0.5", - "supports-color": "^5.3.0" - }, - "engines": { - "node": ">=4" - } - }, - "node_modules/@babel/code-frame/node_modules/color-convert": { - "version": "1.9.3", - "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz", - "integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==", - "dependencies": { - "color-name": "1.1.3" - } - }, - "node_modules/@babel/code-frame/node_modules/color-name": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", - "integrity": "sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==" - }, - "node_modules/@babel/code-frame/node_modules/escape-string-regexp": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz", - "integrity": "sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==", - "engines": { - "node": ">=0.8.0" - } - }, - "node_modules/@babel/code-frame/node_modules/has-flag": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", - "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", - "engines": { - "node": ">=4" - } - }, - "node_modules/@babel/code-frame/node_modules/supports-color": { - "version": "5.5.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", - "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", - "dependencies": { - "has-flag": "^3.0.0" - }, - "engines": { - "node": ">=4" - } - }, "node_modules/@babel/compat-data": { - "version": "7.23.5", - "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.23.5.tgz", - "integrity": "sha512-uU27kfDRlhfKl+w1U6vp16IuvSLtjAxdArVXPa9BvLkrr7CYIsxH5adpHObeAGY/41+syctUWOZ140a2Rvkgjw==", + "version": "7.24.4", + "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.24.4.tgz", + "integrity": "sha512-vg8Gih2MLK+kOkHJp4gBEIkyaIi00jgWot2D9QOmmfLC8jINSOzmCLta6Bvz/JSBCqnegV0L80jhxkol5GWNfQ==", "peer": true, "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/core": { - "version": "7.23.5", - "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.23.5.tgz", - "integrity": "sha512-Cwc2XjUrG4ilcfOw4wBAK+enbdgwAcAJCfGUItPBKR7Mjw4aEfAFYrLxeRp4jWgtNIKn3n2AlBOfwwafl+42/g==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.24.5.tgz", + "integrity": "sha512-tVQRucExLQ02Boi4vdPp49svNGcfL2GhdTCT9aldhXgCJVAI21EtRfBettiuLUwce/7r6bFdgs6JFkcdTiFttA==", "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", - "@babel/code-frame": "^7.23.5", - "@babel/generator": "^7.23.5", - "@babel/helper-compilation-targets": "^7.22.15", - "@babel/helper-module-transforms": "^7.23.3", - "@babel/helpers": "^7.23.5", - "@babel/parser": "^7.23.5", - "@babel/template": "^7.22.15", - "@babel/traverse": "^7.23.5", - "@babel/types": "^7.23.5", + "@babel/code-frame": "^7.24.2", + "@babel/generator": "^7.24.5", + "@babel/helper-compilation-targets": "^7.23.6", + "@babel/helper-module-transforms": "^7.24.5", + "@babel/helpers": "^7.24.5", + "@babel/parser": "^7.24.5", + "@babel/template": "^7.24.0", + "@babel/traverse": "^7.24.5", + "@babel/types": "^7.24.5", "convert-source-map": "^2.0.0", "debug": "^4.1.0", "gensync": "^1.0.0-beta.2", @@ -200,13 +140,13 @@ } }, "node_modules/@babel/generator": { - "version": "7.23.5", - "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.23.5.tgz", - "integrity": "sha512-BPssCHrBD+0YrxviOa3QzpqwhNIXKEtOa2jQrm4FlmkC2apYgRnQcmPWiGZDlGxiNtltnUFolMe8497Esry+jA==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.24.5.tgz", + "integrity": "sha512-x32i4hEXvr+iI0NEoEfDKzlemF8AmtOP8CcrRaEcpzysWuoEb1KknpcvMsHKPONoKZiDuItklgWhB18xEhr9PA==", "dependencies": { - "@babel/types": "^7.23.5", - "@jridgewell/gen-mapping": "^0.3.2", - "@jridgewell/trace-mapping": "^0.3.17", + "@babel/types": "^7.24.5", + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.25", "jsesc": "^2.5.1" }, "engines": { @@ -225,14 +165,14 @@ } }, "node_modules/@babel/helper-compilation-targets": { - "version": "7.22.15", - "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.22.15.tgz", - "integrity": "sha512-y6EEzULok0Qvz8yyLkCvVX+02ic+By2UdOhylwUOvOn9dvYc9mKICJuuU1n1XBI02YWsNsnrY1kc6DVbjcXbtw==", + "version": "7.23.6", + "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.23.6.tgz", + "integrity": "sha512-9JB548GZoQVmzrFgp8o7KxdgkTGm6xs9DW0o/Pim72UDjzr5ObUQ6ZzYPqA+g9OTS2bBQoctLJrky0RDCAWRgQ==", "peer": true, "dependencies": { - "@babel/compat-data": "^7.22.9", - "@babel/helper-validator-option": "^7.22.15", - "browserslist": "^4.21.9", + "@babel/compat-data": "^7.23.5", + "@babel/helper-validator-option": "^7.23.5", + "browserslist": "^4.22.2", "lru-cache": "^5.1.1", "semver": "^6.3.1" }, @@ -258,12 +198,6 @@ "semver": "bin/semver.js" } }, - "node_modules/@babel/helper-compilation-targets/node_modules/yallist": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", - "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", - "peer": true - }, "node_modules/@babel/helper-environment-visitor": { "version": "7.22.20", "resolved": "https://registry.npmjs.org/@babel/helper-environment-visitor/-/helper-environment-visitor-7.22.20.tgz", @@ -296,27 +230,27 @@ } }, "node_modules/@babel/helper-module-imports": { - "version": "7.22.15", - "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.22.15.tgz", - "integrity": "sha512-0pYVBnDKZO2fnSPCrgM/6WMc7eS20Fbok+0r88fp+YtWVLZrp4CkafFGIp+W0VKw4a22sgebPT99y+FDNMdP4w==", + "version": "7.24.3", + "resolved": "https://registry.npmjs.org/@babel/helper-module-imports/-/helper-module-imports-7.24.3.tgz", + "integrity": "sha512-viKb0F9f2s0BCS22QSF308z/+1YWKV/76mwt61NBzS5izMzDPwdq1pTrzf+Li3npBWX9KdQbkeCt1jSAM7lZqg==", "dependencies": { - "@babel/types": "^7.22.15" + "@babel/types": "^7.24.0" }, "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/helper-module-transforms": { - "version": "7.23.3", - "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.23.3.tgz", - "integrity": "sha512-7bBs4ED9OmswdfDzpz4MpWgSrV7FXlc3zIagvLFjS5H+Mk7Snr21vQ6QwrsoCGMfNC4e4LQPdoULEt4ykz0SRQ==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.24.5.tgz", + "integrity": "sha512-9GxeY8c2d2mdQUP1Dye0ks3VDyIMS98kt/llQ2nUId8IsWqTF0l1LkSX0/uP7l7MCDrzXS009Hyhe2gzTiGW8A==", "peer": true, "dependencies": { "@babel/helper-environment-visitor": "^7.22.20", - "@babel/helper-module-imports": "^7.22.15", - "@babel/helper-simple-access": "^7.22.5", - "@babel/helper-split-export-declaration": "^7.22.6", - "@babel/helper-validator-identifier": "^7.22.20" + "@babel/helper-module-imports": "^7.24.3", + "@babel/helper-simple-access": "^7.24.5", + "@babel/helper-split-export-declaration": "^7.24.5", + "@babel/helper-validator-identifier": "^7.24.5" }, "engines": { "node": ">=6.9.0" @@ -326,48 +260,48 @@ } }, "node_modules/@babel/helper-plugin-utils": { - "version": "7.22.5", - "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.22.5.tgz", - "integrity": "sha512-uLls06UVKgFG9QD4OeFYLEGteMIAa5kpTPcFL28yuCIIzsf6ZyKZMllKVOCZFhiZ5ptnwX4mtKdWCBE/uT4amg==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.24.5.tgz", + "integrity": "sha512-xjNLDopRzW2o6ba0gKbkZq5YWEBaK3PCyTOY1K2P/O07LGMhMqlMXPxwN4S5/RhWuCobT8z0jrlKGlYmeR1OhQ==", "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/helper-simple-access": { - "version": "7.22.5", - "resolved": "https://registry.npmjs.org/@babel/helper-simple-access/-/helper-simple-access-7.22.5.tgz", - "integrity": "sha512-n0H99E/K+Bika3++WNL17POvo4rKWZ7lZEp1Q+fStVbUi8nxPQEBOlTmCOxW/0JsS56SKKQ+ojAe2pHKJHN35w==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/@babel/helper-simple-access/-/helper-simple-access-7.24.5.tgz", + "integrity": "sha512-uH3Hmf5q5n7n8mz7arjUlDOCbttY/DW4DYhE6FUsjKJ/oYC1kQQUvwEQWxRwUpX9qQKRXeqLwWxrqilMrf32sQ==", "peer": true, "dependencies": { - "@babel/types": "^7.22.5" + "@babel/types": "^7.24.5" }, "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/helper-split-export-declaration": { - "version": "7.22.6", - "resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.22.6.tgz", - "integrity": "sha512-AsUnxuLhRYsisFiaJwvp1QF+I3KjD5FOxut14q/GzovUe6orHLesW2C7d754kRm53h5gqrz6sFl6sxc4BVtE/g==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/@babel/helper-split-export-declaration/-/helper-split-export-declaration-7.24.5.tgz", + "integrity": "sha512-5CHncttXohrHk8GWOFCcCl4oRD9fKosWlIRgWm4ql9VYioKm52Mk2xsmoohvm7f3JoiLSM5ZgJuRaf5QZZYd3Q==", "dependencies": { - "@babel/types": "^7.22.5" + "@babel/types": "^7.24.5" }, "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/helper-string-parser": { - "version": "7.23.4", - "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.23.4.tgz", - "integrity": "sha512-803gmbQdqwdf4olxrX4AJyFBV/RTr3rSmOj0rKwesmzlfhYNDEs+/iOcznzpNWlJlIlTJC2QfPFcHB6DlzdVLQ==", + "version": "7.24.1", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.24.1.tgz", + "integrity": "sha512-2ofRCjnnA9y+wk8b9IAREroeUP02KHp431N2mhKniy2yKIDKpbrHv9eXwm8cBeWQYcJmzv5qKCu65P47eCF7CQ==", "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/helper-validator-identifier": { - "version": "7.22.20", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz", - "integrity": "sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.24.5.tgz", + "integrity": "sha512-3q93SSKX2TWCG30M2G2kwaKeTYgEUp5Snjuj8qm729SObL6nbtUldAi37qbxkD5gg3xnBio+f9nqpSepGZMvxA==", "engines": { "node": ">=6.9.0" } @@ -382,27 +316,28 @@ } }, "node_modules/@babel/helpers": { - "version": "7.23.5", - "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.23.5.tgz", - "integrity": "sha512-oO7us8FzTEsG3U6ag9MfdF1iA/7Z6dz+MtFhifZk8C8o453rGJFFWUP1t+ULM9TUIAzC9uxXEiXjOiVMyd7QPg==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.24.5.tgz", + "integrity": "sha512-CiQmBMMpMQHwM5m01YnrM6imUG1ebgYJ+fAIW4FZe6m4qHTPaRHti+R8cggAwkdz4oXhtO4/K9JWlh+8hIfR2Q==", "peer": true, "dependencies": { - "@babel/template": "^7.22.15", - "@babel/traverse": "^7.23.5", - "@babel/types": "^7.23.5" + "@babel/template": "^7.24.0", + "@babel/traverse": "^7.24.5", + "@babel/types": "^7.24.5" }, "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/highlight": { - "version": "7.23.4", - "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.23.4.tgz", - "integrity": "sha512-acGdbYSfp2WheJoJm/EBBBLh/ID8KDc64ISZ9DYtBmC8/Q204PZJLHyzeB5qMzJ5trcOkybd78M4x2KWsUq++A==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.24.5.tgz", + "integrity": "sha512-8lLmua6AVh/8SLJRRVD6V8p73Hir9w5mJrhE+IPpILG31KKlI9iz5zmBYKcWPS59qSfgP9RaSBQSHHE81WKuEw==", "dependencies": { - "@babel/helper-validator-identifier": "^7.22.20", + "@babel/helper-validator-identifier": "^7.24.5", "chalk": "^2.4.2", - "js-tokens": "^4.0.0" + "js-tokens": "^4.0.0", + "picocolors": "^1.0.0" }, "engines": { "node": ">=6.9.0" @@ -473,9 +408,9 @@ } }, "node_modules/@babel/parser": { - "version": "7.23.5", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.23.5.tgz", - "integrity": "sha512-hOOqoiNXrmGdFbhgCzu6GiURxUgM27Xwd/aPuu8RfHEZPBzL1Z54okAHAQjXfcQNwvrlkAmAp4SlRTZ45vlthQ==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.24.5.tgz", + "integrity": "sha512-EOv5IK8arwh3LI47dz1b0tKUb/1uhHAnHJOrjgtQMIpu1uXd9mlFrJg9IUgGUgZ41Ch0K8REPTYpO7B76b4vJg==", "bin": { "parser": "bin/babel-parser.js" }, @@ -484,11 +419,11 @@ } }, "node_modules/@babel/plugin-syntax-jsx": { - "version": "7.23.3", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.23.3.tgz", - "integrity": "sha512-EB2MELswq55OHUoRZLGg/zC7QWUKfNLpE57m/S2yr1uEneIgsTgrSzXP3NXEsMkVn76OlaVVnzN+ugObuYGwhg==", + "version": "7.24.1", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.24.1.tgz", + "integrity": "sha512-2eCtxZXf+kbkMIsXS4poTvT4Yu5rXiRa+9xGVT56raghjmBTKMpFNc9R4IDiB4emao9eO22Ox7CxuJG7BgExqA==", "dependencies": { - "@babel/helper-plugin-utils": "^7.22.5" + "@babel/helper-plugin-utils": "^7.24.0" }, "engines": { "node": ">=6.9.0" @@ -498,9 +433,9 @@ } }, "node_modules/@babel/runtime": { - "version": "7.23.5", - "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.23.5.tgz", - "integrity": "sha512-NdUTHcPe4C99WxPub+K9l9tK5/lV4UXIoaHSYgzco9BCyjKAAwzdBI+wWtYqHt7LJdbo74ZjRPJgzVweq1sz0w==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.24.5.tgz", + "integrity": "sha512-Nms86NXrsaeU9vbBJKni6gXiEXZ4CVpYVzEjDH9Sb8vmZ3UljyA1GSOJl/6LGPO8EHLuSF9H+IxNXHPX8QHJ4g==", "dependencies": { "regenerator-runtime": "^0.14.0" }, @@ -509,32 +444,32 @@ } }, "node_modules/@babel/template": { - "version": "7.22.15", - "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.22.15.tgz", - "integrity": "sha512-QPErUVm4uyJa60rkI73qneDacvdvzxshT3kksGqlGWYdOTIUOwJ7RDUL8sGqslY1uXWSL6xMFKEXDS3ox2uF0w==", + "version": "7.24.0", + "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.24.0.tgz", + "integrity": "sha512-Bkf2q8lMB0AFpX0NFEqSbx1OkTHf0f+0j82mkw+ZpzBnkk7e9Ql0891vlfgi+kHwOk8tQjiQHpqh4LaSa0fKEA==", "dependencies": { - "@babel/code-frame": "^7.22.13", - "@babel/parser": "^7.22.15", - "@babel/types": "^7.22.15" + "@babel/code-frame": "^7.23.5", + "@babel/parser": "^7.24.0", + "@babel/types": "^7.24.0" }, "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/traverse": { - "version": "7.23.5", - "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.23.5.tgz", - "integrity": "sha512-czx7Xy5a6sapWWRx61m1Ke1Ra4vczu1mCTtJam5zRTBOonfdJ+S/B6HYmGYu3fJtr8GGET3si6IhgWVBhJ/m8w==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.24.5.tgz", + "integrity": "sha512-7aaBLeDQ4zYcUFDUD41lJc1fG8+5IU9DaNSJAgal866FGvmD5EbWQgnEC6kO1gGLsX0esNkfnJSndbTXA3r7UA==", "dependencies": { - "@babel/code-frame": "^7.23.5", - "@babel/generator": "^7.23.5", + "@babel/code-frame": "^7.24.2", + "@babel/generator": "^7.24.5", "@babel/helper-environment-visitor": "^7.22.20", "@babel/helper-function-name": "^7.23.0", "@babel/helper-hoist-variables": "^7.22.5", - "@babel/helper-split-export-declaration": "^7.22.6", - "@babel/parser": "^7.23.5", - "@babel/types": "^7.23.5", - "debug": "^4.1.0", + "@babel/helper-split-export-declaration": "^7.24.5", + "@babel/parser": "^7.24.5", + "@babel/types": "^7.24.5", + "debug": "^4.3.1", "globals": "^11.1.0" }, "engines": { @@ -550,12 +485,12 @@ } }, "node_modules/@babel/types": { - "version": "7.23.5", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.23.5.tgz", - "integrity": "sha512-ON5kSOJwVO6xXVRTvOI0eOnWe7VdUcIpsovGo9U/Br4Ie4UVFQTboO2cYnDhAGU6Fp+UxSiT+pMft0SMHfuq6w==", + "version": "7.24.5", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.24.5.tgz", + "integrity": "sha512-6mQNsaLeXTw0nxYUYu+NSa4Hx4BlF1x1x8/PMFbiR+GBSr+2DkECc69b8hgy2frEodNcvPffeH8YfWd3LI6jhQ==", "dependencies": { - "@babel/helper-string-parser": "^7.23.4", - "@babel/helper-validator-identifier": "^7.22.20", + "@babel/helper-string-parser": "^7.24.1", + "@babel/helper-validator-identifier": "^7.24.5", "to-fast-properties": "^2.0.0" }, "engines": { @@ -625,9 +560,9 @@ } }, "node_modules/@emotion/is-prop-valid": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/@emotion/is-prop-valid/-/is-prop-valid-1.2.1.tgz", - "integrity": "sha512-61Mf7Ufx4aDxx1xlDeOm8aFFigGHE4z+0sKCa+IHCeZKiyP9RLD0Mmx7m8b9/Cf37f7NAvQOOJAbQQGVr5uERw==", + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/@emotion/is-prop-valid/-/is-prop-valid-1.2.2.tgz", + "integrity": "sha512-uNsoYd37AFmaCdXlg6EYD1KaPOaRWRByMCYzbKUX4+hhMfrxdVSelShywL4JVaAeM/eHUOSprYBQls+/neX3pw==", "dependencies": { "@emotion/memoize": "^0.8.1" } @@ -695,29 +630,29 @@ } }, "node_modules/@eslint/js": { - "version": "8.55.0", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.55.0.tgz", - "integrity": "sha512-qQfo2mxH5yVom1kacMtZZJFVdW+E70mqHMJvVg6WTLo+VBuQJ4TojZlfWBjK0ve5BdEeNAVxOsl/nvNMpJOaJA==", + "version": "8.57.0", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.57.0.tgz", + "integrity": "sha512-Ys+3g2TaW7gADOJzPt83SJtCDhMjndcDMFVQ/Tj9iA1BfJzFKD9mAUXT3OenpuPHbI6P/myECxRJrofUsDx/5g==", "dev": true, "engines": { "node": "^12.22.0 || ^14.17.0 || >=16.0.0" } }, "node_modules/@floating-ui/core": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.5.1.tgz", - "integrity": "sha512-QgcKYwzcc8vvZ4n/5uklchy8KVdjJwcOeI+HnnTNclJjs2nYsy23DOCf+sSV1kBwD9yDAoVKCkv/gEPzgQU3Pw==", + "version": "1.6.2", + "resolved": "https://registry.npmjs.org/@floating-ui/core/-/core-1.6.2.tgz", + "integrity": "sha512-+2XpQV9LLZeanU4ZevzRnGFg2neDeKHgFLjP6YLW+tly0IvrhqT4u8enLGjLH3qeh85g19xY5rsAusfwTdn5lg==", "dependencies": { - "@floating-ui/utils": "^0.1.3" + "@floating-ui/utils": "^0.2.0" } }, "node_modules/@floating-ui/dom": { - "version": "1.5.3", - "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.5.3.tgz", - "integrity": "sha512-ClAbQnEqJAKCJOEbbLo5IUlZHkNszqhuxS4fHAVxRPXPya6Ysf2G8KypnYcOTpx6I8xcgF9bbHb6g/2KpbV8qA==", + "version": "1.6.5", + "resolved": "https://registry.npmjs.org/@floating-ui/dom/-/dom-1.6.5.tgz", + "integrity": "sha512-Nsdud2X65Dz+1RHjAIP0t8z5e2ff/IRbei6BqFrl1urT8sDVzM1HMQ+R0XcU5ceRfyO3I6ayeqIfh+6Wb8LGTw==", "dependencies": { - "@floating-ui/core": "^1.4.2", - "@floating-ui/utils": "^0.1.3" + "@floating-ui/core": "^1.0.0", + "@floating-ui/utils": "^0.2.0" } }, "node_modules/@floating-ui/react": { @@ -735,6 +670,18 @@ } }, "node_modules/@floating-ui/react-dom": { + "version": "2.0.9", + "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.0.9.tgz", + "integrity": "sha512-q0umO0+LQK4+p6aGyvzASqKbKOJcAHJ7ycE9CuUvfx3s9zTHWmGJTPOIlM/hmSBfUfg/XfY5YhLBLR/LHwShQQ==", + "dependencies": { + "@floating-ui/dom": "^1.0.0" + }, + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, + "node_modules/@floating-ui/react/node_modules/@floating-ui/react-dom": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-1.3.0.tgz", "integrity": "sha512-htwHm67Ji5E/pROEAr7f8IKFShuiCKHwUC/UY4vC3I5jiSvGFAYnSYiZO5MlGmads+QqvUkR9ANHEguGrDv72g==", @@ -747,15 +694,16 @@ } }, "node_modules/@floating-ui/utils": { - "version": "0.1.6", - "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.1.6.tgz", - "integrity": "sha512-OfX7E2oUDYxtBvsuS4e/jSn4Q9Qb6DzgeYtsAdkPZ47znpoNsMgZw0+tVijiv3uGNR6dgNlty6r9rzIzHjtd/A==" + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.2.tgz", + "integrity": "sha512-J4yDIIthosAsRZ5CPYP/jQvUAQtlZTTD/4suA08/FEnlxqW3sKS9iAhgsa9VYLZ6vDHn/ixJgIqRQPotoBjxIw==" }, "node_modules/@headlessui/react": { - "version": "1.7.17", - "resolved": "https://registry.npmjs.org/@headlessui/react/-/react-1.7.17.tgz", - "integrity": "sha512-4am+tzvkqDSSgiwrsEpGWqgGo9dz8qU5M3znCkC4PgkpY4HcCZzEDEvozltGGGHIKl9jbXbZPSH5TWn4sWJdow==", + "version": "1.7.19", + "resolved": "https://registry.npmjs.org/@headlessui/react/-/react-1.7.19.tgz", + "integrity": "sha512-Ll+8q3OlMJfJbAKM/+/Y2q6PPYbryqNTXDbryx7SXLIDamkF6iQFbriYHga0dY44PvDhvvBWCx1Xj4U5+G4hOw==", "dependencies": { + "@tanstack/react-virtual": "^3.0.0-beta.60", "client-only": "^0.0.1" }, "engines": { @@ -767,9 +715,9 @@ } }, "node_modules/@headlessui/tailwindcss": { - "version": "0.1.3", - "resolved": "https://registry.npmjs.org/@headlessui/tailwindcss/-/tailwindcss-0.1.3.tgz", - "integrity": "sha512-3aMdDyYZx9A15euRehpppSyQnb2gIw2s/Uccn2ELIoLQ9oDy0+9oRygNWNjXCD5Dt+w1pxo7C+XoiYvGcqA4Kg==", + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/@headlessui/tailwindcss/-/tailwindcss-0.2.0.tgz", + "integrity": "sha512-fpL830Fln1SykOCboExsWr3JIVeQKieLJ3XytLe/tt1A0XzqUthOftDmjcCYLW62w7mQI7wXcoPXr3tZ9QfGxw==", "engines": { "node": ">=10" }, @@ -778,13 +726,13 @@ } }, "node_modules/@humanwhocodes/config-array": { - "version": "0.11.13", - "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.13.tgz", - "integrity": "sha512-JSBDMiDKSzQVngfRjOdFXgFfklaXI4K9nLF49Auh21lmBWRLIK3+xTErTWD4KU54pb6coM6ESE7Awz/FNU3zgQ==", + "version": "0.11.14", + "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.14.tgz", + "integrity": "sha512-3T8LkOmg45BV5FICb15QQMsyUSWrQ8AygVfC7ZG32zOalnqrilm018ZVCw0eapXux8FtA33q8PSRSstjee3jSg==", "dev": true, "dependencies": { - "@humanwhocodes/object-schema": "^2.0.1", - "debug": "^4.1.1", + "@humanwhocodes/object-schema": "^2.0.2", + "debug": "^4.3.1", "minimatch": "^3.0.5" }, "engines": { @@ -805,36 +753,77 @@ } }, "node_modules/@humanwhocodes/object-schema": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-2.0.1.tgz", - "integrity": "sha512-dvuCeX5fC9dXgJn9t+X5atfmgQAzUOWqS1254Gh0m6i8wKd10ebXkfNKiRK+1GWi/yTvvLDHpoxLr0xxxeslWw==", + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-2.0.3.tgz", + "integrity": "sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA==", "dev": true }, - "node_modules/@jridgewell/gen-mapping": { - "version": "0.3.3", - "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.3.tgz", - "integrity": "sha512-HLhSWOLRi875zjjMG/r+Nv0oCW8umGb0BgEhyX3dDX3egwZtB8PqLnjz3yedt8R5StBrzcg4aBpnh8UA9D1BoQ==", + "node_modules/@isaacs/cliui": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", + "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", "dependencies": { - "@jridgewell/set-array": "^1.0.1", + "string-width": "^5.1.2", + "string-width-cjs": "npm:string-width@^4.2.0", + "strip-ansi": "^7.0.1", + "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", + "wrap-ansi": "^8.1.0", + "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@isaacs/cliui/node_modules/ansi-regex": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", + "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/@isaacs/cliui/node_modules/strip-ansi": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.5", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz", + "integrity": "sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==", + "dependencies": { + "@jridgewell/set-array": "^1.2.1", "@jridgewell/sourcemap-codec": "^1.4.10", - "@jridgewell/trace-mapping": "^0.3.9" + "@jridgewell/trace-mapping": "^0.3.24" }, "engines": { "node": ">=6.0.0" } }, "node_modules/@jridgewell/resolve-uri": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.1.tgz", - "integrity": "sha512-dSYZh7HhCDtCKm4QakX0xFpsRDqjjtZf/kjI/v3T3Nwt5r8/qz/M19F9ySyOqU94SXBmeG9ttTul+YnR4LOxFA==", + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", "engines": { "node": ">=6.0.0" } }, "node_modules/@jridgewell/set-array": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.1.2.tgz", - "integrity": "sha512-xnkseuNADM0gt2bs+BvhO0p78Mk762YnZdsuzFV018NoG1Sj1SCQvpSqa7XUaTam5vAGasABV9qXASMKnFMwMw==", + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz", + "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==", "engines": { "node": ">=6.0.0" } @@ -845,32 +834,32 @@ "integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==" }, "node_modules/@jridgewell/trace-mapping": { - "version": "0.3.20", - "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.20.tgz", - "integrity": "sha512-R8LcPeWZol2zR8mmH3JeKQ6QRCFb7XgUhV9ZlGhHLGyg4wpPiPZNQOOWhFZhxKw8u//yTbNGI42Bx/3paXEQ+Q==", + "version": "0.3.25", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz", + "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==", "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", "@jridgewell/sourcemap-codec": "^1.4.14" } }, "node_modules/@next/env": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/@next/env/-/env-14.0.3.tgz", - "integrity": "sha512-7xRqh9nMvP5xrW4/+L0jgRRX+HoNRGnfJpD+5Wq6/13j3dsdzxO3BCXn7D3hMqsDb+vjZnJq+vI7+EtgrYZTeA==" + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.3.tgz", + "integrity": "sha512-W7fd7IbkfmeeY2gXrzJYDx8D2lWKbVoTIj1o1ScPHNzvp30s1AuoEFSdr39bC5sjxJaxTtq3OTCZboNp0lNWHA==" }, "node_modules/@next/eslint-plugin-next": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-14.0.3.tgz", - "integrity": "sha512-j4K0n+DcmQYCVnSAM+UByTVfIHnYQy2ODozfQP+4RdwtRDfobrIvKq1K4Exb2koJ79HSSa7s6B2SA8T/1YR3RA==", + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-14.2.3.tgz", + "integrity": "sha512-L3oDricIIjgj1AVnRdRor21gI7mShlSwU/1ZGHmqM3LzHhXXhdkrfeNY5zif25Bi5Dd7fiJHsbhoZCHfXYvlAw==", "dev": true, "dependencies": { - "glob": "7.1.7" + "glob": "10.3.10" } }, "node_modules/@next/swc-darwin-arm64": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.0.3.tgz", - "integrity": "sha512-64JbSvi3nbbcEtyitNn2LEDS/hcleAFpHdykpcnrstITFlzFgB/bW0ER5/SJJwUPj+ZPY+z3e+1jAfcczRLVGw==", + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.3.tgz", + "integrity": "sha512-3pEYo/RaGqPP0YzwnlmPN2puaF2WMLM3apt5jLW2fFdXD9+pqcoTzRk+iZsf8ta7+quAe4Q6Ms0nR0SFGFdS1A==", "cpu": [ "arm64" ], @@ -883,9 +872,9 @@ } }, "node_modules/@next/swc-darwin-x64": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.0.3.tgz", - "integrity": "sha512-RkTf+KbAD0SgYdVn1XzqE/+sIxYGB7NLMZRn9I4Z24afrhUpVJx6L8hsRnIwxz3ERE2NFURNliPjJ2QNfnWicQ==", + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.3.tgz", + "integrity": "sha512-6adp7waE6P1TYFSXpY366xwsOnEXM+y1kgRpjSRVI2CBDOcbRjsJ67Z6EgKIqWIue52d2q/Mx8g9MszARj8IEA==", "cpu": [ "x64" ], @@ -898,9 +887,9 @@ } }, "node_modules/@next/swc-linux-arm64-gnu": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.0.3.tgz", - "integrity": "sha512-3tBWGgz7M9RKLO6sPWC6c4pAw4geujSwQ7q7Si4d6bo0l6cLs4tmO+lnSwFp1Tm3lxwfMk0SgkJT7EdwYSJvcg==", + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.3.tgz", + "integrity": "sha512-cuzCE/1G0ZSnTAHJPUT1rPgQx1w5tzSX7POXSLaS7w2nIUJUD+e25QoXD/hMfxbsT9rslEXugWypJMILBj/QsA==", "cpu": [ "arm64" ], @@ -913,9 +902,9 @@ } }, "node_modules/@next/swc-linux-arm64-musl": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.0.3.tgz", - "integrity": "sha512-v0v8Kb8j8T23jvVUWZeA2D8+izWspeyeDGNaT2/mTHWp7+37fiNfL8bmBWiOmeumXkacM/AB0XOUQvEbncSnHA==", + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.3.tgz", + "integrity": "sha512-0D4/oMM2Y9Ta3nGuCcQN8jjJjmDPYpHX9OJzqk42NZGJocU2MqhBq5tWkJrUQOQY9N+In9xOdymzapM09GeiZw==", "cpu": [ "arm64" ], @@ -928,9 +917,9 @@ } }, "node_modules/@next/swc-linux-x64-gnu": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.0.3.tgz", - "integrity": "sha512-VM1aE1tJKLBwMGtyBR21yy+STfl0MapMQnNrXkxeyLs0GFv/kZqXS5Jw/TQ3TSUnbv0QPDf/X8sDXuMtSgG6eg==", + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.3.tgz", + "integrity": "sha512-ENPiNnBNDInBLyUU5ii8PMQh+4XLr4pG51tOp6aJ9xqFQ2iRI6IH0Ds2yJkAzNV1CfyagcyzPfROMViS2wOZ9w==", "cpu": [ "x64" ], @@ -943,9 +932,9 @@ } }, "node_modules/@next/swc-linux-x64-musl": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.0.3.tgz", - "integrity": "sha512-64EnmKy18MYFL5CzLaSuUn561hbO1Gk16jM/KHznYP3iCIfF9e3yULtHaMy0D8zbHfxset9LTOv6cuYKJgcOxg==", + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.3.tgz", + "integrity": "sha512-BTAbq0LnCbF5MtoM7I/9UeUu/8ZBY0i8SFjUMCbPDOLv+un67e2JgyN4pmgfXBwy/I+RHu8q+k+MCkDN6P9ViQ==", "cpu": [ "x64" ], @@ -958,9 +947,9 @@ } }, "node_modules/@next/swc-win32-arm64-msvc": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.0.3.tgz", - "integrity": "sha512-WRDp8QrmsL1bbGtsh5GqQ/KWulmrnMBgbnb+59qNTW1kVi1nG/2ndZLkcbs2GX7NpFLlToLRMWSQXmPzQm4tog==", + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.3.tgz", + "integrity": "sha512-AEHIw/dhAMLNFJFJIJIyOFDzrzI5bAjI9J26gbO5xhAKHYTZ9Or04BesFPXiAYXDNdrwTP2dQceYA4dL1geu8A==", "cpu": [ "arm64" ], @@ -973,9 +962,9 @@ } }, "node_modules/@next/swc-win32-ia32-msvc": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.0.3.tgz", - "integrity": "sha512-EKffQeqCrj+t6qFFhIFTRoqb2QwX1mU7iTOvMyLbYw3QtqTw9sMwjykyiMlZlrfm2a4fA84+/aeW+PMg1MjuTg==", + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.3.tgz", + "integrity": "sha512-vga40n1q6aYb0CLrM+eEmisfKCR45ixQYXuBXxOOmmoV8sYST9k7E3US32FsY+CkkF7NtzdcebiFT4CHuMSyZw==", "cpu": [ "ia32" ], @@ -988,9 +977,9 @@ } }, "node_modules/@next/swc-win32-x64-msvc": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.0.3.tgz", - "integrity": "sha512-ERhKPSJ1vQrPiwrs15Pjz/rvDHZmkmvbf/BjPN/UCOI++ODftT0GtasDPi0j+y6PPJi5HsXw+dpRaXUaw4vjuQ==", + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.3.tgz", + "integrity": "sha512-Q1/zm43RWynxrO7lW4ehciQVj+5ePBhOK+/K2P7pLFX3JaJ/IZVC69SHidrmZSOkqz7ECIOhhy7XhAFG4JYyHA==", "cpu": [ "x64" ], @@ -1035,9 +1024,9 @@ } }, "node_modules/@phosphor-icons/react": { - "version": "2.0.15", - "resolved": "https://registry.npmjs.org/@phosphor-icons/react/-/react-2.0.15.tgz", - "integrity": "sha512-PQKNcRrfERlC8gJGNz0su0i9xVmeubXSNxucPcbCLDd9u0cwJVTEyYK87muul/svf0UXFdL2Vl6bbeOhT1Mwow==", + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@phosphor-icons/react/-/react-2.1.5.tgz", + "integrity": "sha512-B7vRm/w+P/+eavWZP5CB5Ul0ffK4Y7fpd/auWKuGvm+8pVgAJzbOK8O0s+DqzR+TwWkh5pHtJTuoAtaSvgCPzg==", "engines": { "node": ">=10" }, @@ -1046,24 +1035,555 @@ "react-dom": ">= 16.8" } }, + "node_modules/@pkgjs/parseargs": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", + "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", + "optional": true, + "engines": { + "node": ">=14" + } + }, + "node_modules/@radix-ui/primitive": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.0.1.tgz", + "integrity": "sha512-yQ8oGX2GVsEYMWGxcovu1uGWPCxV5BFfeeYxqPmuAzUyLT9qmaMXSAhXpb0WrspIeqYzdJpkh2vHModJPgRIaw==", + "dependencies": { + "@babel/runtime": "^7.13.10" + } + }, + "node_modules/@radix-ui/react-arrow": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.0.3.tgz", + "integrity": "sha512-wSP+pHsB/jQRaL6voubsQ/ZlrGBHHrOjmBnr19hxYgtS0WvAFwZhK2WP/YY5yF9uKECCEEDGxuLxq1NBK51wFA==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-primitive": "1.0.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-compose-refs": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.0.1.tgz", + "integrity": "sha512-fDSBgd44FKHa1FRMU59qBMPFcl2PZE+2nmqunj+BWFyYYjnhIDWL2ItDs3rrbJDQOtzt5nIebLCQc4QRfz6LJw==", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-context": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.0.1.tgz", + "integrity": "sha512-ebbrdFoYTcuZ0v4wG5tedGnp9tzcV8awzsxYph7gXUyvnNLuTIcCk1q17JEbnVhXAKG9oX3KtchwiMIAYp9NLg==", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.0.5.tgz", + "integrity": "sha512-GjWJX/AUpB703eEBanuBnIWdIXg6NvJFCXcNlSZk4xdszCdhrJgBoUd1cGk67vFO+WdA2pfI/plOpqz/5GUP6Q==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/primitive": "1.0.1", + "@radix-ui/react-compose-refs": "1.0.1", + "@radix-ui/react-context": "1.0.1", + "@radix-ui/react-dismissable-layer": "1.0.5", + "@radix-ui/react-focus-guards": "1.0.1", + "@radix-ui/react-focus-scope": "1.0.4", + "@radix-ui/react-id": "1.0.1", + "@radix-ui/react-portal": "1.0.4", + "@radix-ui/react-presence": "1.0.1", + "@radix-ui/react-primitive": "1.0.3", + "@radix-ui/react-slot": "1.0.2", + "@radix-ui/react-use-controllable-state": "1.0.1", + "aria-hidden": "^1.1.1", + "react-remove-scroll": "2.5.5" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dismissable-layer": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.0.5.tgz", + "integrity": "sha512-aJeDjQhywg9LBu2t/At58hCvr7pEm0o2Ke1x33B+MhjNmmZ17sy4KImo0KPLgsnc/zN7GPdce8Cnn0SWvwZO7g==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/primitive": "1.0.1", + "@radix-ui/react-compose-refs": "1.0.1", + "@radix-ui/react-primitive": "1.0.3", + "@radix-ui/react-use-callback-ref": "1.0.1", + "@radix-ui/react-use-escape-keydown": "1.0.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-focus-guards": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.0.1.tgz", + "integrity": "sha512-Rect2dWbQ8waGzhMavsIbmSVCgYxkXLxxR3ZvCX79JOglzdEy4JXMb98lq4hPxUbLr77nP0UOGf4rcMU+s1pUA==", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-focus-scope": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.0.4.tgz", + "integrity": "sha512-sL04Mgvf+FmyvZeYfNu1EPAaaxD+aw7cYeIB9L9Fvq8+urhltTRaEo5ysKOpHuKPclsZcSUMKlN05x4u+CINpA==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-compose-refs": "1.0.1", + "@radix-ui/react-primitive": "1.0.3", + "@radix-ui/react-use-callback-ref": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-id": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.0.1.tgz", + "integrity": "sha512-tI7sT/kqYp8p96yGWY1OAnLHrqDgzHefRBKQ2YAkBS5ja7QLcZ9Z/uY7bEjPUatf8RomoXM8/1sMj1IJaE5UzQ==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-use-layout-effect": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-popover": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.0.7.tgz", + "integrity": "sha512-shtvVnlsxT6faMnK/a7n0wptwBD23xc1Z5mdrtKLwVEfsEMXodS0r5s0/g5P0hX//EKYZS2sxUjqfzlg52ZSnQ==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/primitive": "1.0.1", + "@radix-ui/react-compose-refs": "1.0.1", + "@radix-ui/react-context": "1.0.1", + "@radix-ui/react-dismissable-layer": "1.0.5", + "@radix-ui/react-focus-guards": "1.0.1", + "@radix-ui/react-focus-scope": "1.0.4", + "@radix-ui/react-id": "1.0.1", + "@radix-ui/react-popper": "1.1.3", + "@radix-ui/react-portal": "1.0.4", + "@radix-ui/react-presence": "1.0.1", + "@radix-ui/react-primitive": "1.0.3", + "@radix-ui/react-slot": "1.0.2", + "@radix-ui/react-use-controllable-state": "1.0.1", + "aria-hidden": "^1.1.1", + "react-remove-scroll": "2.5.5" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-popper": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.1.3.tgz", + "integrity": "sha512-cKpopj/5RHZWjrbF2846jBNacjQVwkP068DfmgrNJXpvVWrOvlAmE9xSiy5OqeE+Gi8D9fP+oDhUnPqNMY8/5w==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@floating-ui/react-dom": "^2.0.0", + "@radix-ui/react-arrow": "1.0.3", + "@radix-ui/react-compose-refs": "1.0.1", + "@radix-ui/react-context": "1.0.1", + "@radix-ui/react-primitive": "1.0.3", + "@radix-ui/react-use-callback-ref": "1.0.1", + "@radix-ui/react-use-layout-effect": "1.0.1", + "@radix-ui/react-use-rect": "1.0.1", + "@radix-ui/react-use-size": "1.0.1", + "@radix-ui/rect": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-portal": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.0.4.tgz", + "integrity": "sha512-Qki+C/EuGUVCQTOTD5vzJzJuMUlewbzuKyUy+/iHM2uwGiru9gZeBJtHAPKAEkB5KWGi9mP/CHKcY0wt1aW45Q==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-primitive": "1.0.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-presence": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.0.1.tgz", + "integrity": "sha512-UXLW4UAbIY5ZjcvzjfRFo5gxva8QirC9hF7wRE4U5gz+TP0DbRk+//qyuAQ1McDxBt1xNMBTaciFGvEmJvAZCg==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-compose-refs": "1.0.1", + "@radix-ui/react-use-layout-effect": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-primitive": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-1.0.3.tgz", + "integrity": "sha512-yi58uVyoAcK/Nq1inRY56ZSjKypBNKTa/1mcL8qdl6oJeEaDbOldlzrGn7P6Q3Id5d+SYNGc5AJgc4vGhjs5+g==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-slot": "1.0.2" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-slot": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.0.2.tgz", + "integrity": "sha512-YeTpuq4deV+6DusvVUW4ivBgnkHwECUu0BiN43L5UCDFgdhsRUWAghhTF5MbvNTPzmiFOx90asDSUjWuCNapwg==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-compose-refs": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-tooltip": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.0.7.tgz", + "integrity": "sha512-lPh5iKNFVQ/jav/j6ZrWq3blfDJ0OH9R6FlNUHPMqdLuQ9vwDgFsRxvl8b7Asuy5c8xmoojHUxKHQSOAvMHxyw==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/primitive": "1.0.1", + "@radix-ui/react-compose-refs": "1.0.1", + "@radix-ui/react-context": "1.0.1", + "@radix-ui/react-dismissable-layer": "1.0.5", + "@radix-ui/react-id": "1.0.1", + "@radix-ui/react-popper": "1.1.3", + "@radix-ui/react-portal": "1.0.4", + "@radix-ui/react-presence": "1.0.1", + "@radix-ui/react-primitive": "1.0.3", + "@radix-ui/react-slot": "1.0.2", + "@radix-ui/react-use-controllable-state": "1.0.1", + "@radix-ui/react-visually-hidden": "1.0.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-use-callback-ref": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.0.1.tgz", + "integrity": "sha512-D94LjX4Sp0xJFVaoQOd3OO9k7tpBYNOXdVhkltUbGv2Qb9OXdrg/CpsjlZv7ia14Sylv398LswWBVVu5nqKzAQ==", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-use-controllable-state": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.0.1.tgz", + "integrity": "sha512-Svl5GY5FQeN758fWKrjM6Qb7asvXeiZltlT4U2gVfl8Gx5UAv2sMR0LWo8yhsIZh2oQ0eFdZ59aoOOMV7b47VA==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-use-callback-ref": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-use-escape-keydown": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.0.3.tgz", + "integrity": "sha512-vyL82j40hcFicA+M4Ex7hVkB9vHgSse1ZWomAqV2Je3RleKGO5iM8KMOEtfoSB0PnIelMd2lATjTGMYqN5ylTg==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-use-callback-ref": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-use-layout-effect": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.0.1.tgz", + "integrity": "sha512-v/5RegiJWYdoCvMnITBkNNx6bCj20fiaJnWtRkU18yITptraXjffz5Qbn05uOiQnOvi+dbkznkoaMltz1GnszQ==", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-use-rect": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.0.1.tgz", + "integrity": "sha512-Cq5DLuSiuYVKNU8orzJMbl15TXilTnJKUCltMVQg53BQOF1/C5toAaGrowkgksdBQ9H+SRL23g0HDmg9tvmxXw==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/rect": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-use-size": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.0.1.tgz", + "integrity": "sha512-ibay+VqrgcaI6veAojjofPATwledXiSmX+C0KrBk/xgpX9rBzPV3OsfwlhQdUOFbh+LKQorLYT+xTXW9V8yd0g==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-use-layout-effect": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-visually-hidden": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.0.3.tgz", + "integrity": "sha512-D4w41yN5YRKtu464TLnByKzMDG/JlMPHtfZgQAu9v6mNakUqGUI9vUrfQKz8NK41VMm/xbZbh76NUTVtIYqOMA==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-primitive": "1.0.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/rect": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.0.1.tgz", + "integrity": "sha512-fyrgCaedtvMg9NK3en0pnOYJdtfwxUcNolezkNPUsoX57X8oQk+NkqcvzHXD2uKNij6GXmWU9NDru2IWjrO4BQ==", + "dependencies": { + "@babel/runtime": "^7.13.10" + } + }, "node_modules/@rushstack/eslint-patch": { - "version": "1.6.0", - "resolved": "https://registry.npmjs.org/@rushstack/eslint-patch/-/eslint-patch-1.6.0.tgz", - "integrity": "sha512-2/U3GXA6YiPYQDLGwtGlnNgKYBSwCFIHf8Y9LUY5VATHdtbLlU0Y1R3QoBnT0aB4qv/BEiVVsj7LJXoQCgJ2vA==", + "version": "1.10.3", + "resolved": "https://registry.npmjs.org/@rushstack/eslint-patch/-/eslint-patch-1.10.3.tgz", + "integrity": "sha512-qC/xYId4NMebE6w/V33Fh9gWxLgURiNYgVNObbJl2LZv0GUUItCcCqC5axQSwRaAgaxl2mELq1rMzlswaQ0Zxg==", "dev": true }, + "node_modules/@swc/counter": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/@swc/counter/-/counter-0.1.3.tgz", + "integrity": "sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==" + }, "node_modules/@swc/helpers": { - "version": "0.5.2", - "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.2.tgz", - "integrity": "sha512-E4KcWTpoLHqwPHLxidpOqQbcrZVgi0rsmmZXUle1jXmJfuIf/UWpczUJ7MZZ5tlxytgJXyp0w4PGkkeLiuIdZw==", + "version": "0.5.5", + "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.5.tgz", + "integrity": "sha512-KGYxvIOXcceOAbEk4bi/dVLEK9z8sZ0uBB3Il5b1rhfClSpcX0yfRO0KmTkqR2cnQDymwLB+25ZyMzICg/cm/A==", "dependencies": { + "@swc/counter": "^0.1.3", "tslib": "^2.4.0" } }, "node_modules/@tailwindcss/typography": { - "version": "0.5.10", - "resolved": "https://registry.npmjs.org/@tailwindcss/typography/-/typography-0.5.10.tgz", - "integrity": "sha512-Pe8BuPJQJd3FfRnm6H0ulKIGoMEQS+Vq01R6M5aCrFB/ccR/shT+0kXLjouGC1gFLm9hopTFN+DMP0pfwRWzPw==", + "version": "0.5.13", + "resolved": "https://registry.npmjs.org/@tailwindcss/typography/-/typography-0.5.13.tgz", + "integrity": "sha512-ADGcJ8dX21dVVHIwTRgzrcunY6YY9uSlAHHGVKvkA+vLc5qLwEszvKts40lx7z0qc4clpjclwLeK5rVCV2P/uw==", "dev": true, "dependencies": { "lodash.castarray": "^4.4.0", @@ -1075,19 +1595,44 @@ "tailwindcss": ">=3.0.0 || insiders" } }, - "node_modules/@tremor/react": { - "version": "3.11.1", - "resolved": "https://registry.npmjs.org/@tremor/react/-/react-3.11.1.tgz", - "integrity": "sha512-oiBm8vFe0+05RFIHlriSmfZX7BMwgAIFGdvz5kAEbN6G/cGOh2oPkTGG+NPbbk8eyo68f13IT6KfTiMVSEhRSA==", + "node_modules/@tanstack/react-virtual": { + "version": "3.5.0", + "resolved": "https://registry.npmjs.org/@tanstack/react-virtual/-/react-virtual-3.5.0.tgz", + "integrity": "sha512-rtvo7KwuIvqK9zb0VZ5IL7fiJAEnG+0EiFZz8FUOs+2mhGqdGmjKIaT1XU7Zq0eFqL0jonLlhbayJI/J2SA/Bw==", "dependencies": { - "@floating-ui/react": "^0.19.1", - "@headlessui/react": "^1.7.14", - "@headlessui/tailwindcss": "^0.1.3", - "date-fns": "^2.28.0", - "react-day-picker": "^8.7.1", - "react-transition-group": "^4.4.5", - "recharts": "^2.9.0", - "tailwind-merge": "^1.9.1" + "@tanstack/virtual-core": "3.5.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/tannerlinsley" + }, + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0" + } + }, + "node_modules/@tanstack/virtual-core": { + "version": "3.5.0", + "resolved": "https://registry.npmjs.org/@tanstack/virtual-core/-/virtual-core-3.5.0.tgz", + "integrity": "sha512-KnPRCkQTyqhanNC0K63GBG3wA8I+D1fQuVnAvcBF8f13akOKeQp1gSbu6f77zCxhEk727iV5oQnbHLYzHrECLg==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/tannerlinsley" + } + }, + "node_modules/@tremor/react": { + "version": "3.16.3", + "resolved": "https://registry.npmjs.org/@tremor/react/-/react-3.16.3.tgz", + "integrity": "sha512-XiufPz4RRdrHrhwL7Cfcd9XoUEPyN/Q4jwj3kw1OQmFD1sYMCS2pAzzSP62k7zq02Z0QIPBuVK5p7/KQ+h4esQ==", + "dependencies": { + "@floating-ui/react": "^0.19.2", + "@headlessui/react": "^1.7.19", + "@headlessui/tailwindcss": "^0.2.0", + "date-fns": "^3.6.0", + "react-day-picker": "^8.10.1", + "react-transition-state": "^2.1.1", + "recharts": "^2.12.7", + "tailwind-merge": "^1.14.0" }, "peerDependencies": { "react": "^18.0.0", @@ -1118,9 +1663,9 @@ } }, "node_modules/@types/d3-path": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/@types/d3-path/-/d3-path-3.0.2.tgz", - "integrity": "sha512-WAIEVlOCdd/NKRYTsqCpOMHQHemKBEINf8YXMYOtXH0GA7SY0dqMB78P3Uhgfy+4X+/Mlw2wDtlETkN6kQUCMA==" + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/@types/d3-path/-/d3-path-3.1.0.tgz", + "integrity": "sha512-P2dlU/q51fkOc/Gfl3Ul9kicV7l+ra934qBFXCFhrZMOL6du1TM0pm1ThYvENukyOn5h9v+yMJ9Fn5JK4QozrQ==" }, "node_modules/@types/d3-scale": { "version": "4.0.8", @@ -1156,12 +1701,25 @@ "@types/ms": "*" } }, - "node_modules/@types/hast": { - "version": "2.3.8", - "resolved": "https://registry.npmjs.org/@types/hast/-/hast-2.3.8.tgz", - "integrity": "sha512-aMIqAlFd2wTIDZuvLbhUT+TGvMxrNC8ECUIVtH6xxy0sQLs3iu6NO8Kp/VT5je7i5ufnebXzdV1dNDMnvaH6IQ==", + "node_modules/@types/estree": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.5.tgz", + "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==" + }, + "node_modules/@types/estree-jsx": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@types/estree-jsx/-/estree-jsx-1.0.5.tgz", + "integrity": "sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==", "dependencies": { - "@types/unist": "^2" + "@types/estree": "*" + } + }, + "node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "dependencies": { + "@types/unist": "*" } }, "node_modules/@types/hoist-non-react-statics": { @@ -1184,12 +1742,17 @@ "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", "dev": true }, + "node_modules/@types/lodash": { + "version": "4.17.4", + "resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.17.4.tgz", + "integrity": "sha512-wYCP26ZLxaT3R39kiN2+HcJ4kTd3U1waI/cY7ivWYqFP6pW3ZNpvi6Wd6PHZx7T/t8z0vlkXMg3QYLa7DZ/IJQ==" + }, "node_modules/@types/mdast": { - "version": "3.0.15", - "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-3.0.15.tgz", - "integrity": "sha512-LnwD+mUEfxWMa1QpDraczIn6k0Ee3SMicuYSSzS6ZYl2gKS09EClnJYGd8Du6rfc5r/GZEk5o1mRb8TaTj03sQ==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz", + "integrity": "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==", "dependencies": { - "@types/unist": "^2" + "@types/unist": "*" } }, "node_modules/@types/ms": { @@ -1202,10 +1765,15 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-18.15.11.tgz", "integrity": "sha512-E5Kwq2n4SbMzQOn6wnmBjuK9ouqlURrcZDVfbo9ftDDTFt3nk7ZKK4GMOzoYgnpQJKcxwQw+lGaBvvlMo0qN/Q==" }, + "node_modules/@types/prismjs": { + "version": "1.26.4", + "resolved": "https://registry.npmjs.org/@types/prismjs/-/prismjs-1.26.4.tgz", + "integrity": "sha512-rlAnzkW2sZOjbqZ743IHUhFcvzaGbqijwOu8QZnZCjfQzBqFE3s4lOTJEsxikImav9uzz/42I+O7YUs1mWgMlg==" + }, "node_modules/@types/prop-types": { - "version": "15.7.11", - "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.11.tgz", - "integrity": "sha512-ga8y9v9uyeiLdpKddhxYQkxNDrfvuPrlFb0N1qnZZByvcElJaXthF1UhvCh9TLWJBEHeNtdnbysW7Y6Uq8CVng==" + "version": "15.7.12", + "resolved": "https://registry.npmjs.org/@types/prop-types/-/prop-types-15.7.12.tgz", + "integrity": "sha512-5zvhXYtRNRluoE/jAp4GVsSduVUzNWKkOZrCDBWYtE7biZywwdC2AcEzg+cSMLFRfVgeAFqpfNabiPjxFddV1Q==" }, "node_modules/@types/react": { "version": "18.0.32", @@ -1226,25 +1794,30 @@ } }, "node_modules/@types/scheduler": { - "version": "0.16.8", - "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.16.8.tgz", - "integrity": "sha512-WZLiwShhwLRmeV6zH+GkbOFT6Z6VklCItrDioxUnv+u4Ll+8vKeFySoFyK/0ctcRpOmwAicELfmys1sDc/Rw+A==" + "version": "0.23.0", + "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.23.0.tgz", + "integrity": "sha512-YIoDCTH3Af6XM5VuwGG/QL/CJqga1Zm3NkU3HZ4ZHK2fRMPYP1VczsTUqtsf43PH/iJNVlPHAo2oWX7BSdB2Hw==" }, "node_modules/@types/unist": { - "version": "2.0.10", - "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.10.tgz", - "integrity": "sha512-IfYcSBWE3hLpBg8+X2SEa8LVkJdJEkT2Ese2aaLs3ptGdVtABxndrMaxuFlQ1qdFf9Q5rDvDpxI3WwgvKFAsQA==" + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.2.tgz", + "integrity": "sha512-dqId9J8K/vGi5Zr7oo212BGii5m3q5Hxlkwy3WpYuKPklmBEvsbMYYyLxAQpSffdLl/gdW0XUpKWFvYmyoWCoQ==" + }, + "node_modules/@types/uuid": { + "version": "9.0.8", + "resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.8.tgz", + "integrity": "sha512-jg+97EGIcY9AGHJJRaaPVgetKDsrTgbRjQ5Msgjh/DQKEFl0DtyRr/VCOyD1T2R1MNeWPK/u7JoGhlDZnKBAfA==" }, "node_modules/@typescript-eslint/parser": { - "version": "6.13.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-6.13.1.tgz", - "integrity": "sha512-fs2XOhWCzRhqMmQf0eicLa/CWSaYss2feXsy7xBD/pLyWke/jCIVc2s1ikEAtSW7ina1HNhv7kONoEfVNEcdDQ==", + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-7.2.0.tgz", + "integrity": "sha512-5FKsVcHTk6TafQKQbuIVkXq58Fnbkd2wDL4LB7AURN7RUOu1utVP+G8+6u3ZhEroW3DF6hyo3ZEXxgKgp4KeCg==", "dev": true, "dependencies": { - "@typescript-eslint/scope-manager": "6.13.1", - "@typescript-eslint/types": "6.13.1", - "@typescript-eslint/typescript-estree": "6.13.1", - "@typescript-eslint/visitor-keys": "6.13.1", + "@typescript-eslint/scope-manager": "7.2.0", + "@typescript-eslint/types": "7.2.0", + "@typescript-eslint/typescript-estree": "7.2.0", + "@typescript-eslint/visitor-keys": "7.2.0", "debug": "^4.3.4" }, "engines": { @@ -1255,7 +1828,7 @@ "url": "https://opencollective.com/typescript-eslint" }, "peerDependencies": { - "eslint": "^7.0.0 || ^8.0.0" + "eslint": "^8.56.0" }, "peerDependenciesMeta": { "typescript": { @@ -1264,13 +1837,13 @@ } }, "node_modules/@typescript-eslint/scope-manager": { - "version": "6.13.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-6.13.1.tgz", - "integrity": "sha512-BW0kJ7ceiKi56GbT2KKzZzN+nDxzQK2DS6x0PiSMPjciPgd/JRQGMibyaN2cPt2cAvuoH0oNvn2fwonHI+4QUQ==", + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-7.2.0.tgz", + "integrity": "sha512-Qh976RbQM/fYtjx9hs4XkayYujB/aPwglw2choHmf3zBjB4qOywWSdt9+KLRdHubGcoSwBnXUH2sR3hkyaERRg==", "dev": true, "dependencies": { - "@typescript-eslint/types": "6.13.1", - "@typescript-eslint/visitor-keys": "6.13.1" + "@typescript-eslint/types": "7.2.0", + "@typescript-eslint/visitor-keys": "7.2.0" }, "engines": { "node": "^16.0.0 || >=18.0.0" @@ -1281,9 +1854,9 @@ } }, "node_modules/@typescript-eslint/types": { - "version": "6.13.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-6.13.1.tgz", - "integrity": "sha512-gjeEskSmiEKKFIbnhDXUyiqVma1gRCQNbVZ1C8q7Zjcxh3WZMbzWVfGE9rHfWd1msQtPS0BVD9Jz9jded44eKg==", + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-7.2.0.tgz", + "integrity": "sha512-XFtUHPI/abFhm4cbCDc5Ykc8npOKBSJePY3a3s+lwumt7XWJuzP5cZcfZ610MIPHjQjNsOLlYK8ASPaNG8UiyA==", "dev": true, "engines": { "node": "^16.0.0 || >=18.0.0" @@ -1294,16 +1867,17 @@ } }, "node_modules/@typescript-eslint/typescript-estree": { - "version": "6.13.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-6.13.1.tgz", - "integrity": "sha512-sBLQsvOC0Q7LGcUHO5qpG1HxRgePbT6wwqOiGLpR8uOJvPJbfs0mW3jPA3ujsDvfiVwVlWUDESNXv44KtINkUQ==", + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-7.2.0.tgz", + "integrity": "sha512-cyxS5WQQCoBwSakpMrvMXuMDEbhOo9bNHHrNcEWis6XHx6KF518tkF1wBvKIn/tpq5ZpUYK7Bdklu8qY0MsFIA==", "dev": true, "dependencies": { - "@typescript-eslint/types": "6.13.1", - "@typescript-eslint/visitor-keys": "6.13.1", + "@typescript-eslint/types": "7.2.0", + "@typescript-eslint/visitor-keys": "7.2.0", "debug": "^4.3.4", "globby": "^11.1.0", "is-glob": "^4.0.3", + "minimatch": "9.0.3", "semver": "^7.5.4", "ts-api-utils": "^1.0.1" }, @@ -1320,13 +1894,37 @@ } } }, - "node_modules/@typescript-eslint/visitor-keys": { - "version": "6.13.1", - "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-6.13.1.tgz", - "integrity": "sha512-NDhQUy2tg6XGNBGDRm1XybOHSia8mcXmlbKWoQP+nm1BIIMxa55shyJfZkHpEBN62KNPLrocSM2PdPcaLgDKMQ==", + "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", "dev": true, "dependencies": { - "@typescript-eslint/types": "6.13.1", + "balanced-match": "^1.0.0" + } + }, + "node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": { + "version": "9.0.3", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.3.tgz", + "integrity": "sha512-RHiac9mvaRw0x3AYRgDC1CxAP7HTcNrrECeA8YYJeWnpo+2Q5CegtZjaotWTWxDG3UeGA1coE05iH1mPjT/2mg==", + "dev": true, + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@typescript-eslint/visitor-keys": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-7.2.0.tgz", + "integrity": "sha512-c6EIQRHhcpl6+tO8EMR+kjkkV+ugUNXOmeASA1rlzkd8EPIriavpWoiEz1HR/VLhbVIdhqnV6E7JZm00cBDx2A==", + "dev": true, + "dependencies": { + "@typescript-eslint/types": "7.2.0", "eslint-visitor-keys": "^3.4.1" }, "engines": { @@ -1340,13 +1938,12 @@ "node_modules/@ungap/structured-clone": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz", - "integrity": "sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==", - "dev": true + "integrity": "sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==" }, "node_modules/acorn": { - "version": "8.11.2", - "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.2.tgz", - "integrity": "sha512-nc0Axzp/0FILLEVsm4fNwLCwMttvhEI263QtVPQcbpfZZ3ts0hLsZGOpE6czNlid7CJ9MlyH8reXkpsf3YUY4w==", + "version": "8.11.3", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.3.tgz", + "integrity": "sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg==", "dev": true, "bin": { "acorn": "bin/acorn" @@ -1384,7 +1981,6 @@ "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, "engines": { "node": ">=8" } @@ -1393,7 +1989,6 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, "dependencies": { "color-convert": "^2.0.1" }, @@ -1433,9 +2028,9 @@ "dev": true }, "node_modules/aria-hidden": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/aria-hidden/-/aria-hidden-1.2.3.tgz", - "integrity": "sha512-xcLxITLe2HYa1cnYnwCjkOO1PqUHQpozB8x9AR0OgWN2woOBi5kSDVxKfd0b7sb1hw5qFeJhXm9H1nu3xSfLeQ==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/aria-hidden/-/aria-hidden-1.2.4.tgz", + "integrity": "sha512-y+CcFFwelSXpLZk/7fMB2mUbGtX9lKycf1MWJ7CaTIERyitVlyQx6C+sxcROU2BAJ24OiZyK+8wj2i8AlBoS3A==", "dependencies": { "tslib": "^2.0.0" }, @@ -1453,28 +2048,32 @@ } }, "node_modules/array-buffer-byte-length": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.0.tgz", - "integrity": "sha512-LPuwb2P+NrQw3XhxGc36+XSvuBPopovXYTR9Ew++Du9Yb/bx5AzBfrIsBoj0EZUifjQU+sHL21sseZ3jerWO/A==", + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.1.tgz", + "integrity": "sha512-ahC5W1xgou+KTXix4sAO8Ki12Q+jf4i0+tmk3sC+zgcynshkHxzpXdImBehiUYKKKDwvfFiJl1tZt6ewscS1Mg==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "is-array-buffer": "^3.0.1" + "call-bind": "^1.0.5", + "is-array-buffer": "^3.0.4" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" } }, "node_modules/array-includes": { - "version": "3.1.7", - "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.7.tgz", - "integrity": "sha512-dlcsNBIiWhPkHdOEEKnehA+RNUWDc4UqFtnIXU4uuYDPtA4LDkr7qip2p0VvFAEXNDr0yWZ9PJyIRiGjRLQzwQ==", + "version": "3.1.8", + "resolved": "https://registry.npmjs.org/array-includes/-/array-includes-3.1.8.tgz", + "integrity": "sha512-itaWrbYbqpGXkGhZPGUulwnhVf5Hpy1xiCFsGqyIGglbBxmG5vSjxQen3/WGOjPpNEv1RtBLKxbmVXm8HpJStQ==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.2.0", - "es-abstract": "^1.22.1", - "get-intrinsic": "^1.2.1", + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.2", + "es-object-atoms": "^1.0.0", + "get-intrinsic": "^1.2.4", "is-string": "^1.0.7" }, "engines": { @@ -1493,17 +2092,38 @@ "node": ">=8" } }, - "node_modules/array.prototype.findlastindex": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/array.prototype.findlastindex/-/array.prototype.findlastindex-1.2.3.tgz", - "integrity": "sha512-LzLoiOMAxvy+Gd3BAq3B7VeIgPdo+Q8hthvKtXybMvRV0jrXfJM/t8mw7nNlpEcVlVUnCnM2KSX4XU5HmpodOA==", + "node_modules/array.prototype.findlast": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/array.prototype.findlast/-/array.prototype.findlast-1.2.5.tgz", + "integrity": "sha512-CVvd6FHg1Z3POpBLxO6E6zr+rSKEQ9L6rZHAaY7lLfhKsWYUBBOuMs0e9o24oopj6H+geRCX0YJ+TJLBK2eHyQ==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.2.0", - "es-abstract": "^1.22.1", - "es-shim-unscopables": "^1.0.0", - "get-intrinsic": "^1.2.1" + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.2", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.0.0", + "es-shim-unscopables": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/array.prototype.findlastindex": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/array.prototype.findlastindex/-/array.prototype.findlastindex-1.2.5.tgz", + "integrity": "sha512-zfETvRFA8o7EiNn++N5f/kaCw221hrpGsDmcpndVupkPzEc1Wuf3VgC0qby1BbHs7f5DVYjgtEU2LLh5bqeGfQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.2", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.0.0", + "es-shim-unscopables": "^1.0.2" }, "engines": { "node": ">= 0.4" @@ -1548,31 +2168,44 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/array.prototype.tosorted": { + "node_modules/array.prototype.toreversed": { "version": "1.1.2", - "resolved": "https://registry.npmjs.org/array.prototype.tosorted/-/array.prototype.tosorted-1.1.2.tgz", - "integrity": "sha512-HuQCHOlk1Weat5jzStICBCd83NxiIMwqDg/dHEsoefabn/hJRj5pVdWcPUSpRrwhwxZOsQassMpgN/xRYFBMIg==", + "resolved": "https://registry.npmjs.org/array.prototype.toreversed/-/array.prototype.toreversed-1.1.2.tgz", + "integrity": "sha512-wwDCoT4Ck4Cz7sLtgUmzR5UV3YF5mFHUlbChCzZBQZ+0m2cl/DH3tKgvphv1nKgFsJ48oCSg6p91q2Vm0I/ZMA==", "dev": true, "dependencies": { "call-bind": "^1.0.2", "define-properties": "^1.2.0", "es-abstract": "^1.22.1", - "es-shim-unscopables": "^1.0.0", - "get-intrinsic": "^1.2.1" + "es-shim-unscopables": "^1.0.0" + } + }, + "node_modules/array.prototype.tosorted": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/array.prototype.tosorted/-/array.prototype.tosorted-1.1.3.tgz", + "integrity": "sha512-/DdH4TiTmOKzyQbp/eadcCVexiCb36xJg7HshYOYJnNZFDj33GEv0P7GxsynpShhq4OLYJzbGcBDkLsDt7MnNg==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.5", + "define-properties": "^1.2.1", + "es-abstract": "^1.22.3", + "es-errors": "^1.1.0", + "es-shim-unscopables": "^1.0.2" } }, "node_modules/arraybuffer.prototype.slice": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/arraybuffer.prototype.slice/-/arraybuffer.prototype.slice-1.0.2.tgz", - "integrity": "sha512-yMBKppFur/fbHu9/6USUe03bZ4knMYiwFBcyiaXB8Go0qNehwX6inYPzK9U0NeQvGxKthcmHcaR8P5MStSRBAw==", + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/arraybuffer.prototype.slice/-/arraybuffer.prototype.slice-1.0.3.tgz", + "integrity": "sha512-bMxMKAjg13EBSVscxTaYA4mRc5t1UAXa2kXiGTNfZ079HIWXEkKmkgFrh/nJqamaLSrXO5H4WFFkPEaLJWbs3A==", "dev": true, "dependencies": { - "array-buffer-byte-length": "^1.0.0", - "call-bind": "^1.0.2", - "define-properties": "^1.2.0", - "es-abstract": "^1.22.1", - "get-intrinsic": "^1.2.1", - "is-array-buffer": "^3.0.2", + "array-buffer-byte-length": "^1.0.1", + "call-bind": "^1.0.5", + "define-properties": "^1.2.1", + "es-abstract": "^1.22.3", + "es-errors": "^1.2.1", + "get-intrinsic": "^1.2.3", + "is-array-buffer": "^3.0.4", "is-shared-array-buffer": "^1.0.2" }, "engines": { @@ -1588,15 +2221,6 @@ "integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ==", "dev": true }, - "node_modules/asynciterator.prototype": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/asynciterator.prototype/-/asynciterator.prototype-1.0.0.tgz", - "integrity": "sha512-wwHYEIS0Q80f5mosx3L/dfG5t5rjEa9Ft51GTaNt862EnpyGHpgz2RkZvLPp1oF5TnAiTohkEKVEu8pQPJI7Vg==", - "dev": true, - "dependencies": { - "has-symbols": "^1.0.3" - } - }, "node_modules/attr-accept": { "version": "2.2.2", "resolved": "https://registry.npmjs.org/attr-accept/-/attr-accept-2.2.2.tgz", @@ -1606,9 +2230,9 @@ } }, "node_modules/autoprefixer": { - "version": "10.4.16", - "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.16.tgz", - "integrity": "sha512-7vd3UC6xKp0HLfua5IjZlcXvGAGy7cBAXTg2lyQ/8WpNhd6SiZ8Be+xm3FyBSYJx5GKcpRCzBh7RH4/0dnY+uQ==", + "version": "10.4.19", + "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.19.tgz", + "integrity": "sha512-BaENR2+zBZ8xXhM4pUaKUxlVdxZ0EZhjvbopwnXmxRUfqDmwSpC2lAi/QXvx7NRdPCo1WKEcEF6mV64si1z4Ew==", "funding": [ { "type": "opencollective", @@ -1624,9 +2248,9 @@ } ], "dependencies": { - "browserslist": "^4.21.10", - "caniuse-lite": "^1.0.30001538", - "fraction.js": "^4.3.6", + "browserslist": "^4.23.0", + "caniuse-lite": "^1.0.30001599", + "fraction.js": "^4.3.7", "normalize-range": "^0.1.2", "picocolors": "^1.0.0", "postcss-value-parser": "^4.2.0" @@ -1642,10 +2266,13 @@ } }, "node_modules/available-typed-arrays": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.5.tgz", - "integrity": "sha512-DMD0KiN46eipeziST1LPP/STfDU0sufISXmjSgvVsoU2tqxctQeASejWcfNtxYKqETM1UxQ8sp2OrSBWpHY6sw==", + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz", + "integrity": "sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ==", "dev": true, + "dependencies": { + "possible-typed-array-names": "^1.0.0" + }, "engines": { "node": ">= 0.4" }, @@ -1672,9 +2299,9 @@ } }, "node_modules/b4a": { - "version": "1.6.4", - "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.4.tgz", - "integrity": "sha512-fpWrvyVHEKyeEvbKZTVOeZF3VSKKWtJxFIxX/jaVPf+cLbGUSitjb49pHLqPV2BUNNZ0LcoeEGfE/YCpyDYHIw==" + "version": "1.6.6", + "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.6.tgz", + "integrity": "sha512-5Tk1HLk6b6ctmjIkAcU/Ujv/1WqiDl0F0JdRCR80VsOcUlHcu7pWeWRlOqQLHfDEsVx9YH/aif5AG4ehoCtTmg==" }, "node_modules/babel-plugin-styled-components": { "version": "2.1.4", @@ -1705,6 +2332,47 @@ "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" }, + "node_modules/bare-events": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.2.2.tgz", + "integrity": "sha512-h7z00dWdG0PYOQEvChhOSWvOfkIKsdZGkWr083FgN/HyoQuebSew/cgirYqh9SCuy/hRvxc5Vy6Fw8xAmYHLkQ==", + "optional": true + }, + "node_modules/bare-fs": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-2.3.0.tgz", + "integrity": "sha512-TNFqa1B4N99pds2a5NYHR15o0ZpdNKbAeKTE/+G6ED/UeOavv8RY3dr/Fu99HW3zU3pXpo2kDNO8Sjsm2esfOw==", + "optional": true, + "dependencies": { + "bare-events": "^2.0.0", + "bare-path": "^2.0.0", + "bare-stream": "^1.0.0" + } + }, + "node_modules/bare-os": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-2.3.0.tgz", + "integrity": "sha512-oPb8oMM1xZbhRQBngTgpcQ5gXw6kjOaRsSWsIeNyRxGed2w/ARyP7ScBYpWR1qfX2E5rS3gBw6OWcSQo+s+kUg==", + "optional": true + }, + "node_modules/bare-path": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-2.1.2.tgz", + "integrity": "sha512-o7KSt4prEphWUHa3QUwCxUI00R86VdjiuxmJK0iNVDHYPGo+HsDaVCnqCmPbf/MiW1ok8F4p3m8RTHlWk8K2ig==", + "optional": true, + "dependencies": { + "bare-os": "^2.1.0" + } + }, + "node_modules/bare-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-1.0.0.tgz", + "integrity": "sha512-KhNUoDL40iP4gFaLSsoGE479t0jHijfYdIcxRn/XtezA2BaUD0NRf/JGRpsMq6dMNM+SrCrB0YSSo/5wBY4rOQ==", + "optional": true, + "dependencies": { + "streamx": "^2.16.1" + } + }, "node_modules/base64-js": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", @@ -1725,11 +2393,14 @@ ] }, "node_modules/binary-extensions": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz", - "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==", + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", + "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==", "engines": { "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, "node_modules/bl": { @@ -1746,6 +2417,7 @@ "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, "dependencies": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" @@ -1763,9 +2435,9 @@ } }, "node_modules/browserslist": { - "version": "4.22.2", - "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.22.2.tgz", - "integrity": "sha512-0UgcrvQmBDvZHFGdYUehrCNIazki7/lUP3kkoi/r3YB2amZbFM9J43ZRkJTXBUZK4gmx56+Sqk9+Vs9mwZx9+A==", + "version": "4.23.0", + "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.23.0.tgz", + "integrity": "sha512-QW8HiM1shhT2GuzkvklfjcKDiWFXHOeFCIA/huJPwHsslwcydgk7X+z2zXpEijP98UCY7HbubZt5J2Zgvf0CaQ==", "funding": [ { "type": "opencollective", @@ -1781,8 +2453,8 @@ } ], "dependencies": { - "caniuse-lite": "^1.0.30001565", - "electron-to-chromium": "^1.4.601", + "caniuse-lite": "^1.0.30001587", + "electron-to-chromium": "^1.4.668", "node-releases": "^2.0.14", "update-browserslist-db": "^1.0.13" }, @@ -1828,14 +2500,19 @@ } }, "node_modules/call-bind": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.5.tgz", - "integrity": "sha512-C3nQxfFZxFRVoJoGKKI8y3MOEo129NQ+FgQ08iye+Mk4zNZZGdjfs06bVTr+DBSlA66Q2VEcMki/cUCP4SercQ==", + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz", + "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==", "dev": true, "dependencies": { + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", "function-bind": "^1.1.2", - "get-intrinsic": "^1.2.1", - "set-function-length": "^1.1.1" + "get-intrinsic": "^1.2.4", + "set-function-length": "^1.2.1" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -1867,9 +2544,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001566", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001566.tgz", - "integrity": "sha512-ggIhCsTxmITBAMmK8yZjEhCO5/47jKXPu6Dha/wuCS4JePVL+3uiDEBuhu2aIoT+bqTOR8L76Ip1ARL9xYsEJA==", + "version": "1.0.30001620", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001620.tgz", + "integrity": "sha512-WJvYsOjd1/BYUY6SNGUosK9DUidBPDTnOARHp3fSmFO1ekdxaY6nKRttEVrfMmYi80ctS0kz1wiWmm14fVc3ew==", "funding": [ { "type": "opencollective", @@ -1885,6 +2562,15 @@ } ] }, + "node_modules/ccount": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz", + "integrity": "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/chalk": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", @@ -1910,16 +2596,37 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/character-entities-html4": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/character-entities-html4/-/character-entities-html4-2.1.0.tgz", + "integrity": "sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/character-entities-legacy": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/character-entities-legacy/-/character-entities-legacy-3.0.0.tgz", + "integrity": "sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/character-reference-invalid": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/character-reference-invalid/-/character-reference-invalid-2.0.1.tgz", + "integrity": "sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/chokidar": { - "version": "3.5.3", - "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.5.3.tgz", - "integrity": "sha512-Dr3sfKRP6oTcjf2JmUmFJfeVMvXBdegxB0iVQ5eb2V10uFJUCAS8OByZdVAyVb8xXNz3GjjTgj9kLWsZTqE6kw==", - "funding": [ - { - "type": "individual", - "url": "https://paulmillr.com/funding/" - } - ], + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz", + "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==", "dependencies": { "anymatch": "~3.1.2", "braces": "~3.0.2", @@ -1932,6 +2639,9 @@ "engines": { "node": ">= 8.10.0" }, + "funding": { + "url": "https://paulmillr.com/funding/" + }, "optionalDependencies": { "fsevents": "~2.3.2" } @@ -1958,9 +2668,9 @@ "integrity": "sha512-IV3Ou0jSMzZrd3pZ48nLkT9DA7Ag1pnPzaiQhpW7c3RbcqqzvzzVu+L8gfqMp/8IM2MQtSiqaCxrrcfu8I8rMA==" }, "node_modules/clsx": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.0.0.tgz", - "integrity": "sha512-rQ1+kcj+ttHG0MKVGBUXwayCCF1oh39BF5COIpRzuCEv8Mwjv0XucrI2ExNTOn9IlLifGClWQcU9BrZORvtw6Q==", + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz", + "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==", "engines": { "node": ">=6" } @@ -2022,7 +2732,8 @@ "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", - "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==" + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true }, "node_modules/convert-source-map": { "version": "2.0.0", @@ -2034,7 +2745,6 @@ "version": "7.0.3", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", - "dev": true, "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", @@ -2074,9 +2784,9 @@ } }, "node_modules/csstype": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.2.tgz", - "integrity": "sha512-I7K1Uu0MBPzaFKg4nI5Q7Vs2t+3gWWW648spaF+Rg7pI9ds18Ugn+lvg4SHczUdKlHI5LWBXyqfS8+DufyBsgQ==" + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz", + "integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==" }, "node_modules/d3-array": { "version": "3.2.4", @@ -2194,19 +2904,64 @@ "integrity": "sha512-sdQSFB7+llfUcQHUQO3+B8ERRj0Oa4w9POWMI/puGtuf7gFywGmkaLCElnudfTiKZV+NvHqL0ifzdrI8Ro7ESA==", "dev": true }, - "node_modules/date-fns": { - "version": "2.30.0", - "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-2.30.0.tgz", - "integrity": "sha512-fnULvOpxnC5/Vg3NCiWelDsLiUc9bRwAPs/+LfTLNvetFCtCTN+yQz15C/fs4AwX1R9K5GLtLfn8QW+dWisaAw==", + "node_modules/data-view-buffer": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/data-view-buffer/-/data-view-buffer-1.0.1.tgz", + "integrity": "sha512-0lht7OugA5x3iJLOWFhWK/5ehONdprk0ISXqVFn/NFrDu+cuc8iADFrGQz5BnRK7LLU3JmkbXSxaqX+/mXYtUA==", + "dev": true, "dependencies": { - "@babel/runtime": "^7.21.0" + "call-bind": "^1.0.6", + "es-errors": "^1.3.0", + "is-data-view": "^1.0.1" }, "engines": { - "node": ">=0.11" + "node": ">= 0.4" }, "funding": { - "type": "opencollective", - "url": "https://opencollective.com/date-fns" + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/data-view-byte-length": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/data-view-byte-length/-/data-view-byte-length-1.0.1.tgz", + "integrity": "sha512-4J7wRJD3ABAzr8wP+OcIcqq2dlUKp4DVflx++hs5h5ZKydWMI6/D/fAot+yh6g2tHh8fLFTvNOaVN357NvSrOQ==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", + "es-errors": "^1.3.0", + "is-data-view": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/data-view-byte-offset": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/data-view-byte-offset/-/data-view-byte-offset-1.0.0.tgz", + "integrity": "sha512-t/Ygsytq+R995EJ5PZlD4Cu56sWa8InXySaViRzw9apusqsOO2bQP+SbYzAhR0pFKoB+43lYy8rWban9JSuXnA==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.6", + "es-errors": "^1.3.0", + "is-data-view": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/date-fns": { + "version": "3.6.0", + "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-3.6.0.tgz", + "integrity": "sha512-fRHTG8g/Gif+kSh50gaGEdToemgfj74aRX3swtiouboip5JDLAyDE9F11nHMIcvOaXeOC6D7SpNhi7uFyB7Uww==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/kossnocorp" } }, "node_modules/debug": { @@ -2279,17 +3034,20 @@ } }, "node_modules/define-data-property": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.1.tgz", - "integrity": "sha512-E7uGkTzkk1d0ByLeSc6ZsFS79Axg+m1P/VsgYsxHgiuc3tFSj+MjMIwe90FC4lOAZzNBdY7kkO2P2wKdsQ1vgQ==", + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", + "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", "dev": true, "dependencies": { - "get-intrinsic": "^1.2.1", - "gopd": "^1.0.1", - "has-property-descriptors": "^1.0.0" + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", + "gopd": "^1.0.1" }, "engines": { "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" } }, "node_modules/define-properties": { @@ -2318,26 +3076,35 @@ } }, "node_modules/detect-libc": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.2.tgz", - "integrity": "sha512-UX6sGumvvqSaXgdKGUsgZWqcUyIXZ/vZTrlRT/iobiKhGL0zL4d3osHj3uqllWJK+i+sixDS/3COVEOFbupFyw==", + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.3.tgz", + "integrity": "sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==", "engines": { "node": ">=8" } }, + "node_modules/detect-node-es": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/detect-node-es/-/detect-node-es-1.1.0.tgz", + "integrity": "sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ==" + }, + "node_modules/devlop": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz", + "integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==", + "dependencies": { + "dequal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/didyoumean": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/didyoumean/-/didyoumean-1.2.2.tgz", "integrity": "sha512-gxtyfqMg7GKyhQmb056K7M3xszy/myH8w+B4RT+QXBQsvAOdc3XymqDDPHx1BgPgsdAA5SIifona89YtRATDzw==" }, - "node_modules/diff": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/diff/-/diff-5.1.0.tgz", - "integrity": "sha512-D+mk+qE8VC/PAUrlAU34N+VfXev0ghe5ywmpqrawphmVZc1bEfn56uo9qpyGp1p4xpzOHkSW4ztBd6L7Xx4ACw==", - "engines": { - "node": ">=0.3.1" - } - }, "node_modules/dir-glob": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", @@ -2376,16 +3143,20 @@ "csstype": "^3.0.2" } }, + "node_modules/eastasianwidth": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", + "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==" + }, "node_modules/electron-to-chromium": { - "version": "1.4.601", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.601.tgz", - "integrity": "sha512-SpwUMDWe9tQu8JX5QCO1+p/hChAi9AE9UpoC3rcHVc+gdCGlbT3SGb5I1klgb952HRIyvt9wZhSz9bNBYz9swA==" + "version": "1.4.773", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.773.tgz", + "integrity": "sha512-87eHF+h3PlCRwbxVEAw9KtK3v7lWfc/sUDr0W76955AdYTG4bV/k0zrl585Qnj/skRMH2qOSiE+kqMeOQ+LOpw==" }, "node_modules/emoji-regex": { "version": "9.2.2", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", - "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", - "dev": true + "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==" }, "node_modules/end-of-stream": { "version": "1.4.4", @@ -2396,9 +3167,9 @@ } }, "node_modules/enhanced-resolve": { - "version": "5.15.0", - "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.15.0.tgz", - "integrity": "sha512-LXYT42KJ7lpIKECr2mAXIaMldcNCh/7E0KBKOu4KSfkHmP+mZmSs+8V5gBAqisWBy0OO4W5Oyys0GO1Y8KtdKg==", + "version": "5.16.1", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.16.1.tgz", + "integrity": "sha512-4U5pNsuDl0EhuZpq46M5xPslstkviJuhrdobaRDBk2Jy2KO37FDAJl4lb2KlNabxT0m4MTK2UHNrsAcphE8nyw==", "dev": true, "dependencies": { "graceful-fs": "^4.2.4", @@ -2408,51 +3179,69 @@ "node": ">=10.13.0" } }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/es-abstract": { - "version": "1.22.3", - "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.22.3.tgz", - "integrity": "sha512-eiiY8HQeYfYH2Con2berK+To6GrK2RxbPawDkGq4UiCQQfZHb6wX9qQqkbpPqaxQFcl8d9QzZqo0tGE0VcrdwA==", + "version": "1.23.3", + "resolved": "https://registry.npmjs.org/es-abstract/-/es-abstract-1.23.3.tgz", + "integrity": "sha512-e+HfNH61Bj1X9/jLc5v1owaLYuHdeHHSQlkhCBiTK8rBvKaULl/beGMxwrMXjpYrv4pz22BlY570vVePA2ho4A==", "dev": true, "dependencies": { - "array-buffer-byte-length": "^1.0.0", - "arraybuffer.prototype.slice": "^1.0.2", - "available-typed-arrays": "^1.0.5", - "call-bind": "^1.0.5", - "es-set-tostringtag": "^2.0.1", + "array-buffer-byte-length": "^1.0.1", + "arraybuffer.prototype.slice": "^1.0.3", + "available-typed-arrays": "^1.0.7", + "call-bind": "^1.0.7", + "data-view-buffer": "^1.0.1", + "data-view-byte-length": "^1.0.1", + "data-view-byte-offset": "^1.0.0", + "es-define-property": "^1.0.0", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.0.0", + "es-set-tostringtag": "^2.0.3", "es-to-primitive": "^1.2.1", "function.prototype.name": "^1.1.6", - "get-intrinsic": "^1.2.2", - "get-symbol-description": "^1.0.0", + "get-intrinsic": "^1.2.4", + "get-symbol-description": "^1.0.2", "globalthis": "^1.0.3", "gopd": "^1.0.1", - "has-property-descriptors": "^1.0.0", - "has-proto": "^1.0.1", + "has-property-descriptors": "^1.0.2", + "has-proto": "^1.0.3", "has-symbols": "^1.0.3", - "hasown": "^2.0.0", - "internal-slot": "^1.0.5", - "is-array-buffer": "^3.0.2", + "hasown": "^2.0.2", + "internal-slot": "^1.0.7", + "is-array-buffer": "^3.0.4", "is-callable": "^1.2.7", - "is-negative-zero": "^2.0.2", + "is-data-view": "^1.0.1", + "is-negative-zero": "^2.0.3", "is-regex": "^1.1.4", - "is-shared-array-buffer": "^1.0.2", + "is-shared-array-buffer": "^1.0.3", "is-string": "^1.0.7", - "is-typed-array": "^1.1.12", + "is-typed-array": "^1.1.13", "is-weakref": "^1.0.2", "object-inspect": "^1.13.1", "object-keys": "^1.1.1", - "object.assign": "^4.1.4", - "regexp.prototype.flags": "^1.5.1", - "safe-array-concat": "^1.0.1", - "safe-regex-test": "^1.0.0", - "string.prototype.trim": "^1.2.8", - "string.prototype.trimend": "^1.0.7", - "string.prototype.trimstart": "^1.0.7", - "typed-array-buffer": "^1.0.0", - "typed-array-byte-length": "^1.0.0", - "typed-array-byte-offset": "^1.0.0", - "typed-array-length": "^1.0.4", + "object.assign": "^4.1.5", + "regexp.prototype.flags": "^1.5.2", + "safe-array-concat": "^1.1.2", + "safe-regex-test": "^1.0.3", + "string.prototype.trim": "^1.2.9", + "string.prototype.trimend": "^1.0.8", + "string.prototype.trimstart": "^1.0.8", + "typed-array-buffer": "^1.0.2", + "typed-array-byte-length": "^1.0.1", + "typed-array-byte-offset": "^1.0.2", + "typed-array-length": "^1.0.6", "unbox-primitive": "^1.0.2", - "which-typed-array": "^1.1.13" + "which-typed-array": "^1.1.15" }, "engines": { "node": ">= 0.4" @@ -2461,37 +3250,73 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/es-iterator-helpers": { - "version": "1.0.15", - "resolved": "https://registry.npmjs.org/es-iterator-helpers/-/es-iterator-helpers-1.0.15.tgz", - "integrity": "sha512-GhoY8uYqd6iwUl2kgjTm4CZAf6oo5mHK7BPqx3rKgx893YSsy0LGHV6gfqqQvZt/8xM8xeOnfXBCfqclMKkJ5g==", + "node_modules/es-define-property": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz", + "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==", "dev": true, "dependencies": { - "asynciterator.prototype": "^1.0.0", - "call-bind": "^1.0.2", + "get-intrinsic": "^1.2.4" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "dev": true, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-iterator-helpers": { + "version": "1.0.19", + "resolved": "https://registry.npmjs.org/es-iterator-helpers/-/es-iterator-helpers-1.0.19.tgz", + "integrity": "sha512-zoMwbCcH5hwUkKJkT8kDIBZSz9I6mVG//+lDCinLCGov4+r7NIy0ld8o03M0cJxl2spVf6ESYVS6/gpIfq1FFw==", + "dev": true, + "dependencies": { + "call-bind": "^1.0.7", "define-properties": "^1.2.1", - "es-abstract": "^1.22.1", - "es-set-tostringtag": "^2.0.1", - "function-bind": "^1.1.1", - "get-intrinsic": "^1.2.1", + "es-abstract": "^1.23.3", + "es-errors": "^1.3.0", + "es-set-tostringtag": "^2.0.3", + "function-bind": "^1.1.2", + "get-intrinsic": "^1.2.4", "globalthis": "^1.0.3", - "has-property-descriptors": "^1.0.0", - "has-proto": "^1.0.1", + "has-property-descriptors": "^1.0.2", + "has-proto": "^1.0.3", "has-symbols": "^1.0.3", - "internal-slot": "^1.0.5", + "internal-slot": "^1.0.7", "iterator.prototype": "^1.1.2", - "safe-array-concat": "^1.0.1" + "safe-array-concat": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.0.0.tgz", + "integrity": "sha512-MZ4iQ6JwHOBQjahnjwaC1ZtIBH+2ohjamzAO3oaHcXYup7qxjF2fixyH+Q71voWHeOkI2q/TnJao/KfXYIZWbw==", + "dev": true, + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" } }, "node_modules/es-set-tostringtag": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.0.2.tgz", - "integrity": "sha512-BuDyupZt65P9D2D2vA/zqcI3G5xRsklm5N3xCwuiy+/vKy8i0ifdsQP1sLgO4tZDSCaQUSnmC48khknGMV3D2Q==", + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.0.3.tgz", + "integrity": "sha512-3T8uNMC3OQTHkFUsFq8r/BwAXLHvU/9O9mE0fBc/MY5iq/8H7ncvO947LmYA6ldWw9Uh8Yhf25zu6n7nML5QWQ==", "dev": true, "dependencies": { - "get-intrinsic": "^1.2.2", - "has-tostringtag": "^1.0.0", - "hasown": "^2.0.0" + "get-intrinsic": "^1.2.4", + "has-tostringtag": "^1.0.2", + "hasown": "^2.0.1" }, "engines": { "node": ">= 0.4" @@ -2524,9 +3349,9 @@ } }, "node_modules/escalade": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", + "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==", "engines": { "node": ">=6" } @@ -2544,16 +3369,16 @@ } }, "node_modules/eslint": { - "version": "8.55.0", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.55.0.tgz", - "integrity": "sha512-iyUUAM0PCKj5QpwGfmCAG9XXbZCWsqP/eWAWrG/W0umvjuLRBECwSFdt+rCntju0xEH7teIABPwXpahftIaTdA==", + "version": "8.57.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.57.0.tgz", + "integrity": "sha512-dZ6+mexnaTIbSBZWgou51U6OmzIhYM2VcNdtiTtI7qPNZm35Akpr0f6vtw3w1Kmn5PYo+tZVfh13WrhpS6oLqQ==", "dev": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", "@eslint/eslintrc": "^2.1.4", - "@eslint/js": "8.55.0", - "@humanwhocodes/config-array": "^0.11.13", + "@eslint/js": "8.57.0", + "@humanwhocodes/config-array": "^0.11.14", "@humanwhocodes/module-importer": "^1.0.1", "@nodelib/fs.walk": "^1.2.8", "@ungap/structured-clone": "^1.2.0", @@ -2599,14 +3424,14 @@ } }, "node_modules/eslint-config-next": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-14.0.3.tgz", - "integrity": "sha512-IKPhpLdpSUyKofmsXUfrvBC49JMUTdeaD8ZIH4v9Vk0sC1X6URTuTJCLtA0Vwuj7V/CQh0oISuSTvNn5//Buew==", + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/eslint-config-next/-/eslint-config-next-14.2.3.tgz", + "integrity": "sha512-ZkNztm3Q7hjqvB1rRlOX8P9E/cXRL9ajRcs8jufEtwMfTVYRqnmtnaSu57QqHyBlovMuiB8LEzfLBkh5RYV6Fg==", "dev": true, "dependencies": { - "@next/eslint-plugin-next": "14.0.3", + "@next/eslint-plugin-next": "14.2.3", "@rushstack/eslint-patch": "^1.3.3", - "@typescript-eslint/parser": "^5.4.2 || ^6.0.0", + "@typescript-eslint/parser": "^5.4.2 || ^6.0.0 || 7.0.0 - 7.2.0", "eslint-import-resolver-node": "^0.3.6", "eslint-import-resolver-typescript": "^3.5.2", "eslint-plugin-import": "^2.28.1", @@ -2670,9 +3495,9 @@ } }, "node_modules/eslint-module-utils": { - "version": "2.8.0", - "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.8.0.tgz", - "integrity": "sha512-aWajIYfsqCKRDgUfjEXNN/JlrzauMuSEy5sbd7WXbtW3EH6A6MpwEh42c7qD+MqQo9QMJ6fWLAeIJynx0g6OAw==", + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/eslint-module-utils/-/eslint-module-utils-2.8.1.tgz", + "integrity": "sha512-rXDXR3h7cs7dy9RNpUlQf80nX31XWJEyGq1tRMo+6GsO5VmTe4UTwtmonAD4ZkAsrfMVDA2wlGJ3790Ys+D49Q==", "dev": true, "dependencies": { "debug": "^3.2.7" @@ -2696,9 +3521,9 @@ } }, "node_modules/eslint-plugin-import": { - "version": "2.29.0", - "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.29.0.tgz", - "integrity": "sha512-QPOO5NO6Odv5lpoTkddtutccQjysJuFxoPS7fAHO+9m9udNHvTCPSAMW9zGAYj8lAIdr40I8yPCdUYrncXtrwg==", + "version": "2.29.1", + "resolved": "https://registry.npmjs.org/eslint-plugin-import/-/eslint-plugin-import-2.29.1.tgz", + "integrity": "sha512-BbPC0cuExzhiMo4Ff1BTVwHpjjv28C5R+btTOGaCRC7UEz801up0JadwkeSk5Ued6TG34uaczuVuH6qyy5YUxw==", "dev": true, "dependencies": { "array-includes": "^3.1.7", @@ -2717,7 +3542,7 @@ "object.groupby": "^1.0.1", "object.values": "^1.1.7", "semver": "^6.3.1", - "tsconfig-paths": "^3.14.2" + "tsconfig-paths": "^3.15.0" }, "engines": { "node": ">=4" @@ -2787,27 +3612,29 @@ } }, "node_modules/eslint-plugin-react": { - "version": "7.33.2", - "resolved": "https://registry.npmjs.org/eslint-plugin-react/-/eslint-plugin-react-7.33.2.tgz", - "integrity": "sha512-73QQMKALArI8/7xGLNI/3LylrEYrlKZSb5C9+q3OtOewTnMQi5cT+aE9E41sLCmli3I9PGGmD1yiZydyo4FEPw==", + "version": "7.34.1", + "resolved": "https://registry.npmjs.org/eslint-plugin-react/-/eslint-plugin-react-7.34.1.tgz", + "integrity": "sha512-N97CxlouPT1AHt8Jn0mhhN2RrADlUAsk1/atcT2KyA/l9Q/E6ll7OIGwNumFmWfZ9skV3XXccYS19h80rHtgkw==", "dev": true, "dependencies": { - "array-includes": "^3.1.6", - "array.prototype.flatmap": "^1.3.1", - "array.prototype.tosorted": "^1.1.1", + "array-includes": "^3.1.7", + "array.prototype.findlast": "^1.2.4", + "array.prototype.flatmap": "^1.3.2", + "array.prototype.toreversed": "^1.1.2", + "array.prototype.tosorted": "^1.1.3", "doctrine": "^2.1.0", - "es-iterator-helpers": "^1.0.12", + "es-iterator-helpers": "^1.0.17", "estraverse": "^5.3.0", "jsx-ast-utils": "^2.4.1 || ^3.0.0", "minimatch": "^3.1.2", - "object.entries": "^1.1.6", - "object.fromentries": "^2.0.6", - "object.hasown": "^1.1.2", - "object.values": "^1.1.6", + "object.entries": "^1.1.7", + "object.fromentries": "^2.0.7", + "object.hasown": "^1.1.3", + "object.values": "^1.1.7", "prop-types": "^15.8.1", - "resolve": "^2.0.0-next.4", + "resolve": "^2.0.0-next.5", "semver": "^6.3.1", - "string.prototype.matchall": "^4.0.8" + "string.prototype.matchall": "^4.0.10" }, "engines": { "node": ">=4" @@ -2817,9 +3644,9 @@ } }, "node_modules/eslint-plugin-react-hooks": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-4.6.0.tgz", - "integrity": "sha512-oFc7Itz9Qxh2x4gNHStv3BqJq54ExXmfC+a1NjAta66IAN87Wu0R/QArgIS9qKzX3dXKPI9H5crl9QchNMY9+g==", + "version": "4.6.2", + "resolved": "https://registry.npmjs.org/eslint-plugin-react-hooks/-/eslint-plugin-react-hooks-4.6.2.tgz", + "integrity": "sha512-QzliNJq4GinDBcD8gPB5v0wh6g8q3SUi6EFF0x8N/BL9PoVs0atuGc47ozMRyOWAKdwaZ5OnbOEa3WR+dSGKuQ==", "dev": true, "engines": { "node": ">=10" @@ -2944,6 +3771,15 @@ "node": ">=4.0" } }, + "node_modules/estree-util-is-identifier-name": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/estree-util-is-identifier-name/-/estree-util-is-identifier-name-3.0.0.tgz", + "integrity": "sha512-hFtqIDZTIUZ9BXLb8y4pYGyk6+wekIivNVTcmvk8NoOh+VeRn5y6cEHzbURrWbfp1fIqdVipilzj+lfaadNZmg==", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/esutils": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", @@ -3029,9 +3865,9 @@ "dev": true }, "node_modules/fastq": { - "version": "1.15.0", - "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.15.0.tgz", - "integrity": "sha512-wBrocU2LCXXa+lWBt8RoIRD89Fi8OdABODa/kEnyeyjS5aZO5/GNvI5sEINADqP/h8M29UHTHUb53sUu5Ihqdw==", + "version": "1.17.1", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.17.1.tgz", + "integrity": "sha512-sRVD3lWVIXWg6By68ZN7vho9a1pQcN/WBFaAAsDDFzlJjvoGx0P8z7V1t72grFJfJhu3YPZBuu25f7Kaw2jN1w==", "dependencies": { "reusify": "^1.0.4" } @@ -3101,9 +3937,9 @@ } }, "node_modules/flatted": { - "version": "3.2.9", - "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.2.9.tgz", - "integrity": "sha512-36yxDn5H7OFZQla0/jFJmbIKTdZAQHngCedGxiMmpNfEZM0sdEeT+WczLQrjK6D7o2aiyLYDnkw0R3JK0Qv1RQ==", + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz", + "integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw==", "dev": true }, "node_modules/for-each": { @@ -3115,10 +3951,25 @@ "is-callable": "^1.1.3" } }, + "node_modules/foreground-child": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.1.1.tgz", + "integrity": "sha512-TMKDUnIte6bfb5nWv7V/caI169OHgvwjb7V4WkeUvbQQdjr5rWKqHFiKWb/fcOwB+CzBT+qbWjvj+DVwRskpIg==", + "dependencies": { + "cross-spawn": "^7.0.0", + "signal-exit": "^4.0.1" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/formik": { - "version": "2.4.5", - "resolved": "https://registry.npmjs.org/formik/-/formik-2.4.5.tgz", - "integrity": "sha512-Gxlht0TD3vVdzMDHwkiNZqJ7Mvg77xQNfmBRrNtvzcHZs72TJppSTDKHpImCMJZwcWPBJ8jSQQ95GJzXFf1nAQ==", + "version": "2.4.6", + "resolved": "https://registry.npmjs.org/formik/-/formik-2.4.6.tgz", + "integrity": "sha512-A+2EI7U7aG296q2TLGvNapDNTZp1khVt5Vk0Q/fyfSROss0V/V6+txt2aJnwEos44IxTCW/LYAi/zgWzlevj+g==", "funding": [ { "type": "individual", @@ -3159,7 +4010,8 @@ "node_modules/fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", + "dev": true }, "node_modules/fsevents": { "version": "2.3.3", @@ -3219,28 +4071,41 @@ } }, "node_modules/get-intrinsic": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.2.tgz", - "integrity": "sha512-0gSo4ml/0j98Y3lngkFEot/zhiCeWsbYIlZ+uZOVgzLyLaUw7wxUL+nCTP0XJvJg1AXulJRI3UJi8GsbDuxdGA==", + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz", + "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==", "dev": true, "dependencies": { + "es-errors": "^1.3.0", "function-bind": "^1.1.2", "has-proto": "^1.0.1", "has-symbols": "^1.0.3", "hasown": "^2.0.0" }, + "engines": { + "node": ">= 0.4" + }, "funding": { "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/get-nonce": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-nonce/-/get-nonce-1.0.1.tgz", + "integrity": "sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==", + "engines": { + "node": ">=6" + } + }, "node_modules/get-symbol-description": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/get-symbol-description/-/get-symbol-description-1.0.0.tgz", - "integrity": "sha512-2EmdH1YvIQiZpltCNgkuiUnyukzxM/R6NDJX31Ke3BG1Nq5b0S2PhX59UKi9vZpPDQVdqn+1IcaAwnzTT5vCjw==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/get-symbol-description/-/get-symbol-description-1.0.2.tgz", + "integrity": "sha512-g0QYk1dZBxGwk+Ngc+ltRH2IBp2f7zBkBMBJZCDerh6EhlhSR6+9irMCuT/09zD6qkarHUSn529sK/yL4S27mg==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "get-intrinsic": "^1.1.1" + "call-bind": "^1.0.5", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.4" }, "engines": { "node": ">= 0.4" @@ -3250,9 +4115,9 @@ } }, "node_modules/get-tsconfig": { - "version": "4.7.2", - "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.2.tgz", - "integrity": "sha512-wuMsz4leaj5hbGgg4IvDU0bqJagpftG5l5cXIAvo8uZrqn0NJqwtfupTN00VnkQJPcIRrxYrm1Ue24btpCha2A==", + "version": "4.7.5", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.5.tgz", + "integrity": "sha512-ZCuZCnlqNzjb4QprAzXKdpp/gh6KTxSJuw3IBsPnV/7fV4NxC9ckB+vPTt8w7fJA0TaSD7c55BR47JD6MEDyDw==", "dev": true, "dependencies": { "resolve-pkg-maps": "^1.0.0" @@ -3267,20 +4132,21 @@ "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==" }, "node_modules/glob": { - "version": "7.1.7", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.7.tgz", - "integrity": "sha512-OvD9ENzPLbegENnYP5UUfJIirTg4+XwMWGaQfQTY0JenxNvvIKP3U3/tAQSPIu/lHxXYSZmpXlUHeqAIdKzBLQ==", - "dev": true, + "version": "10.3.10", + "resolved": "https://registry.npmjs.org/glob/-/glob-10.3.10.tgz", + "integrity": "sha512-fa46+tv1Ak0UPK1TOy/pZrIybNNt4HCv7SDzwyfiOZkvZLEbjsZkJBPtDHVshZjbecAoAGSC20MjLDG/qr679g==", "dependencies": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^3.0.4", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" + "foreground-child": "^3.1.0", + "jackspeak": "^2.3.5", + "minimatch": "^9.0.1", + "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0", + "path-scurry": "^1.10.1" + }, + "bin": { + "glob": "dist/esm/bin.mjs" }, "engines": { - "node": "*" + "node": ">=16 || 14 >=14.17" }, "funding": { "url": "https://github.com/sponsors/isaacs" @@ -3297,15 +4163,32 @@ "node": ">=10.13.0" } }, - "node_modules/glob-to-regexp": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/glob-to-regexp/-/glob-to-regexp-0.4.1.tgz", - "integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==" + "node_modules/glob/node_modules/brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/glob/node_modules/minimatch": { + "version": "9.0.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.4.tgz", + "integrity": "sha512-KqWh+VchfxcMNRAJjj2tnsSJdNbHsVgnkBhTNrW7AjVo6OvLtxw8zfT9oLw1JSohlFzJ8jCoTgaoXvJ+kHt6fw==", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } }, "node_modules/globals": { - "version": "13.23.0", - "resolved": "https://registry.npmjs.org/globals/-/globals-13.23.0.tgz", - "integrity": "sha512-XAmF0RjlrjY23MA51q3HltdlGxUpXPvg0GioKiD9X6HD28iMjo2dKC8Vqwm7lne4GNr78+RHTfliktR6ZH09wA==", + "version": "13.24.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-13.24.0.tgz", + "integrity": "sha512-AhO5QUcj8llrbG09iWhPU2B204J1xnPeL8kQmVorSsy+Sjj1sk8gIyh6cUocGmH4L0UuhAJy+hJMRA4mgA4mFQ==", "dev": true, "dependencies": { "type-fest": "^0.20.2" @@ -3318,12 +4201,13 @@ } }, "node_modules/globalthis": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.3.tgz", - "integrity": "sha512-sFdI5LyBiNTHjRd7cGPWapiHWMOXKyuBNX/cWJ3NfzrZQVa8GI/8cofCl74AOVqq9W5kNmguTIzJ/1s2gyI9wA==", + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz", + "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==", "dev": true, "dependencies": { - "define-properties": "^1.1.3" + "define-properties": "^1.2.1", + "gopd": "^1.0.1" }, "engines": { "node": ">= 0.4" @@ -3394,21 +4278,21 @@ } }, "node_modules/has-property-descriptors": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.1.tgz", - "integrity": "sha512-VsX8eaIewvas0xnvinAe9bw4WfIeODpGYikiWYLH+dma0Jw6KHYqWiWfhQlgOVK8D6PvjubK5Uc4P0iIhIcNVg==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", + "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", "dev": true, "dependencies": { - "get-intrinsic": "^1.2.2" + "es-define-property": "^1.0.0" }, "funding": { "url": "https://github.com/sponsors/ljharb" } }, "node_modules/has-proto": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.1.tgz", - "integrity": "sha512-7qE+iP+O+bgF9clE5+UoBFzE65mlBiVj3tKCrlNQ0Ogwm0BjpT/gK4SlLYDMybDh5I3TCTKnPPa0oMG7JDYrhg==", + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz", + "integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==", "dev": true, "engines": { "node": ">= 0.4" @@ -3430,12 +4314,12 @@ } }, "node_modules/has-tostringtag": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.0.tgz", - "integrity": "sha512-kFjcSNhnlGV1kyoGk7OXKSawH5JOb/LzUc5w9B02hOTO0dfFRjbHQKvg1d6cf3HbeUmtU9VbbV3qzZ2Teh97WQ==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", + "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", "dev": true, "dependencies": { - "has-symbols": "^1.0.2" + "has-symbols": "^1.0.3" }, "engines": { "node": ">= 0.4" @@ -3445,9 +4329,9 @@ } }, "node_modules/hasown": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.0.tgz", - "integrity": "sha512-vUptKVTpIJhcczKBbgnS+RtcuYMB8+oNzPK2/Hp3hanz8JmpATdmmgLgSaadVREkDm+e2giHwY3ZRkyjSIDDFA==", + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", "dependencies": { "function-bind": "^1.1.2" }, @@ -3455,15 +4339,174 @@ "node": ">= 0.4" } }, - "node_modules/hast-util-whitespace": { + "node_modules/hast-util-from-html": { "version": "2.0.1", - "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-2.0.1.tgz", - "integrity": "sha512-nAxA0v8+vXSBDt3AnRUNjyRIQ0rD+ntpbAp4LnPkumc5M9yUbSMa4XDU9Q6etY4f1Wp4bNgvc1yjiZtsTTrSng==", + "resolved": "https://registry.npmjs.org/hast-util-from-html/-/hast-util-from-html-2.0.1.tgz", + "integrity": "sha512-RXQBLMl9kjKVNkJTIO6bZyb2n+cUH8LFaSSzo82jiLT6Tfc+Pt7VQCS+/h3YwG4jaNE2TA2sdJisGWR+aJrp0g==", + "dependencies": { + "@types/hast": "^3.0.0", + "devlop": "^1.1.0", + "hast-util-from-parse5": "^8.0.0", + "parse5": "^7.0.0", + "vfile": "^6.0.0", + "vfile-message": "^4.0.0" + }, "funding": { "type": "opencollective", "url": "https://opencollective.com/unified" } }, + "node_modules/hast-util-from-parse5": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/hast-util-from-parse5/-/hast-util-from-parse5-8.0.1.tgz", + "integrity": "sha512-Er/Iixbc7IEa7r/XLtuG52zoqn/b3Xng/w6aZQ0xGVxzhw5xUFxcRqdPzP6yFi/4HBYRaifaI5fQ1RH8n0ZeOQ==", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/unist": "^3.0.0", + "devlop": "^1.0.0", + "hastscript": "^8.0.0", + "property-information": "^6.0.0", + "vfile": "^6.0.0", + "vfile-location": "^5.0.0", + "web-namespaces": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-parse5/node_modules/hast-util-parse-selector": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-4.0.0.tgz", + "integrity": "sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-parse5/node_modules/hastscript": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/hastscript/-/hastscript-8.0.0.tgz", + "integrity": "sha512-dMOtzCEd3ABUeSIISmrETiKuyydk1w0pa+gE/uormcTpSYuaNJPbX1NU3JLyscSLjwAQM8bWMhhIlnCqnRvDTw==", + "dependencies": { + "@types/hast": "^3.0.0", + "comma-separated-tokens": "^2.0.0", + "hast-util-parse-selector": "^4.0.0", + "property-information": "^6.0.0", + "space-separated-tokens": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-parse-selector": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-3.1.1.tgz", + "integrity": "sha512-jdlwBjEexy1oGz0aJ2f4GKMaVKkA9jwjr4MjAAI22E5fM/TXVZHuS5OpONtdeIkRKqAaryQ2E9xNQxijoThSZA==", + "dependencies": { + "@types/hast": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-parse-selector/node_modules/@types/hast": { + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-2.3.10.tgz", + "integrity": "sha512-McWspRw8xx8J9HurkVBfYj0xKoE25tOFlHGdx4MJ5xORQrMGZNqJhVQWaIbm6Oyla5kYOXtDiopzKRJzEOkwJw==", + "dependencies": { + "@types/unist": "^2" + } + }, + "node_modules/hast-util-parse-selector/node_modules/@types/unist": { + "version": "2.0.10", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.10.tgz", + "integrity": "sha512-IfYcSBWE3hLpBg8+X2SEa8LVkJdJEkT2Ese2aaLs3ptGdVtABxndrMaxuFlQ1qdFf9Q5rDvDpxI3WwgvKFAsQA==" + }, + "node_modules/hast-util-to-jsx-runtime": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.0.tgz", + "integrity": "sha512-H/y0+IWPdsLLS738P8tDnrQ8Z+dj12zQQ6WC11TIM21C8WFVoIxcqWXf2H3hiTVZjF1AWqoimGwrTWecWrnmRQ==", + "dependencies": { + "@types/estree": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/unist": "^3.0.0", + "comma-separated-tokens": "^2.0.0", + "devlop": "^1.0.0", + "estree-util-is-identifier-name": "^3.0.0", + "hast-util-whitespace": "^3.0.0", + "mdast-util-mdx-expression": "^2.0.0", + "mdast-util-mdx-jsx": "^3.0.0", + "mdast-util-mdxjs-esm": "^2.0.0", + "property-information": "^6.0.0", + "space-separated-tokens": "^2.0.0", + "style-to-object": "^1.0.0", + "unist-util-position": "^5.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-to-string": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/hast-util-to-string/-/hast-util-to-string-3.0.0.tgz", + "integrity": "sha512-OGkAxX1Ua3cbcW6EJ5pT/tslVb90uViVkcJ4ZZIMW/R33DX/AkcJcRrPebPwJkHYwlDHXz4aIwvAAaAdtrACFA==", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-whitespace": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz", + "integrity": "sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw==", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hastscript": { + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/hastscript/-/hastscript-7.2.0.tgz", + "integrity": "sha512-TtYPq24IldU8iKoJQqvZOuhi5CyCQRAbvDOX0x1eW6rsHSxa/1i2CCiptNTotGHJ3VoHRGmqiv6/D3q113ikkw==", + "dependencies": { + "@types/hast": "^2.0.0", + "comma-separated-tokens": "^2.0.0", + "hast-util-parse-selector": "^3.0.0", + "property-information": "^6.0.0", + "space-separated-tokens": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hastscript/node_modules/@types/hast": { + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-2.3.10.tgz", + "integrity": "sha512-McWspRw8xx8J9HurkVBfYj0xKoE25tOFlHGdx4MJ5xORQrMGZNqJhVQWaIbm6Oyla5kYOXtDiopzKRJzEOkwJw==", + "dependencies": { + "@types/unist": "^2" + } + }, + "node_modules/hastscript/node_modules/@types/unist": { + "version": "2.0.10", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.10.tgz", + "integrity": "sha512-IfYcSBWE3hLpBg8+X2SEa8LVkJdJEkT2Ese2aaLs3ptGdVtABxndrMaxuFlQ1qdFf9Q5rDvDpxI3WwgvKFAsQA==" + }, "node_modules/hoist-non-react-statics": { "version": "3.3.2", "resolved": "https://registry.npmjs.org/hoist-non-react-statics/-/hoist-non-react-statics-3.3.2.tgz", @@ -3472,6 +4515,15 @@ "react-is": "^16.7.0" } }, + "node_modules/html-url-attributes": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.0.tgz", + "integrity": "sha512-/sXbVCWayk6GDVg3ctOX6nxaVj7So40FcFAnWlWGNAB1LpYKcV5Cd10APjPjW80O7zYW2MsjBV4zZ7IZO5fVow==", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/ieee754": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", @@ -3492,9 +4544,9 @@ ] }, "node_modules/ignore": { - "version": "5.3.0", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.0.tgz", - "integrity": "sha512-g7dmpshy+gD7mh88OC9NwSGTKoc3kyLAZQRU1mt53Aw/vnvfXnbC+F/7F7QoYVKbV+KNvJx8wArewKy1vXMtlg==", + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.1.tgz", + "integrity": "sha512-5Fytz/IraMjqpwfd34ke28PTVMjZjJG2MPn5t7OE4eUCUNf8BAa7b5WUS9/Qvr6mwOQS7Mk6vdsMno5he+T8Xw==", "dev": true, "engines": { "node": ">= 4" @@ -3529,6 +4581,7 @@ "version": "1.0.6", "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "dev": true, "dependencies": { "once": "^1.3.0", "wrappy": "1" @@ -3545,17 +4598,17 @@ "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==" }, "node_modules/inline-style-parser": { - "version": "0.1.1", - "resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.1.1.tgz", - "integrity": "sha512-7NXolsK4CAS5+xvdj5OMMbI962hU/wvwoxk+LWR9Ek9bVtyuuYScDN6eS0rUm6TxApFpw7CX1o4uJzcd4AyD3Q==" + "version": "0.2.3", + "resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.2.3.tgz", + "integrity": "sha512-qlD8YNDqyTKTyuITrDOffsl6Tdhv+UC4hcdAVuQsK4IMQ99nSgd1MIA/Q+jQYoh9r3hVUXhYh7urSRmXPkW04g==" }, "node_modules/internal-slot": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.0.6.tgz", - "integrity": "sha512-Xj6dv+PsbtwyPpEflsejS+oIZxmMlV44zAhG479uYu89MsjcYOhCFnNyKrkJrihbsiasQyY0afoCl/9BLR65bg==", + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.0.7.tgz", + "integrity": "sha512-NGnrKwXzSms2qUUih/ILZ5JBqNTSa1+ZmP6flaIp6KmSElgE9qdndzS3cqjrDovwFdmwsGsLdeFgB6suw+1e9g==", "dev": true, "dependencies": { - "get-intrinsic": "^1.2.2", + "es-errors": "^1.3.0", "hasown": "^2.0.0", "side-channel": "^1.0.4" }, @@ -3571,15 +4624,47 @@ "node": ">=12" } }, + "node_modules/invariant": { + "version": "2.2.4", + "resolved": "https://registry.npmjs.org/invariant/-/invariant-2.2.4.tgz", + "integrity": "sha512-phJfQVBuaJM5raOpJjSfkiD6BpbCE4Ns//LaXl6wGYtUBY83nWS6Rf9tXm2e8VaK60JEjYldbPif/A2B1C2gNA==", + "dependencies": { + "loose-envify": "^1.0.0" + } + }, + "node_modules/is-alphabetical": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-alphabetical/-/is-alphabetical-2.0.1.tgz", + "integrity": "sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/is-alphanumerical": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-alphanumerical/-/is-alphanumerical-2.0.1.tgz", + "integrity": "sha512-hmbYhX/9MUMF5uh7tOXyK/n0ZvWpad5caBA17GsC6vyuCqaWliRG5K1qS9inmUhEMaOBIW7/whAnSwveW/LtZw==", + "dependencies": { + "is-alphabetical": "^2.0.0", + "is-decimal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/is-array-buffer": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/is-array-buffer/-/is-array-buffer-3.0.2.tgz", - "integrity": "sha512-y+FyyR/w8vfIRq4eQcM1EYgSTnmHXPqaF+IgzgraytCFq5Xh8lllDVmAZolPJiZttZLeFSINPYMaEJ7/vWUa1w==", + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/is-array-buffer/-/is-array-buffer-3.0.4.tgz", + "integrity": "sha512-wcjaerHw0ydZwfhiKbXJWLDY8A7yV7KhjQOpb83hGgGfId/aQa4TOvwyzn2PuswW2gPCYEL/nEAiSVpdOj1lXw==", "dev": true, "dependencies": { "call-bind": "^1.0.2", - "get-intrinsic": "^1.2.0", - "is-typed-array": "^1.1.10" + "get-intrinsic": "^1.2.1" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -3644,28 +4729,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/is-buffer": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-2.0.5.tgz", - "integrity": "sha512-i2R6zNFDwgEHJyQUtJEk0XFi1i0dPFn/oqjK3/vPCcDeJvW5NQ83V8QbicfF1SupOaB0h8ntgBC2YiE7dfyctQ==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "engines": { - "node": ">=4" - } - }, "node_modules/is-callable": { "version": "1.2.7", "resolved": "https://registry.npmjs.org/is-callable/-/is-callable-1.2.7.tgz", @@ -3689,6 +4752,21 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/is-data-view": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/is-data-view/-/is-data-view-1.0.1.tgz", + "integrity": "sha512-AHkaJrsUVW6wq6JS8y3JnM/GJF/9cf+k20+iDzlSaJrinEo5+7vRiteOSwBhHRiAyQATN1AmY4hwzxJKPmYf+w==", + "dev": true, + "dependencies": { + "is-typed-array": "^1.1.13" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/is-date-object": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/is-date-object/-/is-date-object-1.0.5.tgz", @@ -3704,6 +4782,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/is-decimal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-decimal/-/is-decimal-2.0.1.tgz", + "integrity": "sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", @@ -3724,6 +4811,14 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", + "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", + "engines": { + "node": ">=8" + } + }, "node_modules/is-generator-function": { "version": "1.0.10", "resolved": "https://registry.npmjs.org/is-generator-function/-/is-generator-function-1.0.10.tgz", @@ -3750,19 +4845,31 @@ "node": ">=0.10.0" } }, + "node_modules/is-hexadecimal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-hexadecimal/-/is-hexadecimal-2.0.1.tgz", + "integrity": "sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/is-map": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/is-map/-/is-map-2.0.2.tgz", - "integrity": "sha512-cOZFQQozTha1f4MxLFzlgKYPTyj26picdZTx82hbc/Xf4K/tZOOXSCkMvU4pKioRXGDLJRn0GM7Upe7kR721yg==", + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/is-map/-/is-map-2.0.3.tgz", + "integrity": "sha512-1Qed0/Hr2m+YqxnM09CjA2d/i6YZNfF6R2oRAOj36eUdS6qIV/huPJNSEpKbupewFs+ZsJlxsjjPbc0/afW6Lw==", "dev": true, + "engines": { + "node": ">= 0.4" + }, "funding": { "url": "https://github.com/sponsors/ljharb" } }, "node_modules/is-negative-zero": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/is-negative-zero/-/is-negative-zero-2.0.2.tgz", - "integrity": "sha512-dqJvarLawXsFbNDeJW7zAz8ItJ9cd28YufuuFzh0G8pNHjJMnY08Dv7sYX2uF5UpQOwieAeOExEYAWWfu7ZZUA==", + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/is-negative-zero/-/is-negative-zero-2.0.3.tgz", + "integrity": "sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw==", "dev": true, "engines": { "node": ">= 0.4" @@ -3831,21 +4938,27 @@ } }, "node_modules/is-set": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/is-set/-/is-set-2.0.2.tgz", - "integrity": "sha512-+2cnTEZeY5z/iXGbLhPrOAaK/Mau5k5eXq9j14CpRTftq0pAJu2MwVRSZhyZWBzx3o6X795Lz6Bpb6R0GKf37g==", + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/is-set/-/is-set-2.0.3.tgz", + "integrity": "sha512-iPAjerrse27/ygGLxw+EBR9agv9Y6uLeYVJMu+QNCoouJ1/1ri0mGrcWpfCqFZuzzx3WjtwxG098X+n4OuRkPg==", "dev": true, + "engines": { + "node": ">= 0.4" + }, "funding": { "url": "https://github.com/sponsors/ljharb" } }, "node_modules/is-shared-array-buffer": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/is-shared-array-buffer/-/is-shared-array-buffer-1.0.2.tgz", - "integrity": "sha512-sqN2UDu1/0y6uvXyStCOzyhAjCSlHceFoMKJW8W9EU9cvic/QdsZ0kEU93HEy3IUEFZIiH/3w+AH/UQbPHNdhA==", + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/is-shared-array-buffer/-/is-shared-array-buffer-1.0.3.tgz", + "integrity": "sha512-nA2hv5XIhLR3uVzDDfCIknerhx8XUKnstuOERPNNIinXG7v9u+ohXF67vxm4TPTEPU6lm61ZkwP3c9PCB97rhg==", "dev": true, "dependencies": { - "call-bind": "^1.0.2" + "call-bind": "^1.0.7" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -3882,12 +4995,12 @@ } }, "node_modules/is-typed-array": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.12.tgz", - "integrity": "sha512-Z14TF2JNG8Lss5/HMqt0//T9JeHXttXy5pH/DBU4vi98ozO2btxzq9MwYDZYnKwU8nRsz/+GVFVRDq3DkVuSPg==", + "version": "1.1.13", + "resolved": "https://registry.npmjs.org/is-typed-array/-/is-typed-array-1.1.13.tgz", + "integrity": "sha512-uZ25/bUAlUY5fR4OKT4rZQEBrzQWYV9ZJYGGsUmEJ6thodVJ1HX64ePQ6Z0qPWP+m+Uq6e9UugrE38jeYsDSMw==", "dev": true, "dependencies": { - "which-typed-array": "^1.1.11" + "which-typed-array": "^1.1.14" }, "engines": { "node": ">= 0.4" @@ -3897,10 +5010,13 @@ } }, "node_modules/is-weakmap": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.1.tgz", - "integrity": "sha512-NSBR4kH5oVj1Uwvv970ruUkCV7O1mzgVFO4/rev2cLRda9Tm9HrL70ZPut4rOHgY0FNrUu9BCbXA2sdQ+x0chA==", + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz", + "integrity": "sha512-K5pXYOm9wqY1RgjpL3YTkF39tni1XajUIkawTLUo9EZEVUFga5gSQJF8nNS7ZwJQ02y+1YCNYcMh+HIf1ZqE+w==", "dev": true, + "engines": { + "node": ">= 0.4" + }, "funding": { "url": "https://github.com/sponsors/ljharb" } @@ -3918,13 +5034,16 @@ } }, "node_modules/is-weakset": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/is-weakset/-/is-weakset-2.0.2.tgz", - "integrity": "sha512-t2yVvttHkQktwnNNmBQ98AhENLdPUTDTE21uPqAQ0ARwQfGeQKRVS0NNurH7bTf7RrvcVn1OOge45CnBeHCSmg==", + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/is-weakset/-/is-weakset-2.0.3.tgz", + "integrity": "sha512-LvIm3/KWzS9oRFHugab7d+M/GcBXuXX5xZkzPmN+NxihdQlZUQ4dWuSV1xR/sq6upL1TJEDrfBgRepHFdBtSNQ==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "get-intrinsic": "^1.1.1" + "call-bind": "^1.0.7", + "get-intrinsic": "^1.2.4" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -3939,8 +5058,7 @@ "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", - "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", - "dev": true + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==" }, "node_modules/iterator.prototype": { "version": "1.1.2", @@ -3955,6 +5073,23 @@ "set-function-name": "^2.0.1" } }, + "node_modules/jackspeak": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-2.3.6.tgz", + "integrity": "sha512-N3yCS/NegsOBokc8GAdM8UcmfsKiSS8cipheD/nivzr700H+nsMOxJjQnvwOcRYVuFkdH0wGUvW2WbXGmrZGbQ==", + "dependencies": { + "@isaacs/cliui": "^8.0.2" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + }, + "optionalDependencies": { + "@pkgjs/parseargs": "^0.11.0" + } + }, "node_modules/jiti": { "version": "1.21.0", "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.0.tgz", @@ -4053,14 +5188,6 @@ "json-buffer": "3.0.1" } }, - "node_modules/kleur": { - "version": "4.1.5", - "resolved": "https://registry.npmjs.org/kleur/-/kleur-4.1.5.tgz", - "integrity": "sha512-o+NO+8WrRiQEE4/7nwRJhN1HWpVmJm511pBHUxPLtp0BUISzlBplORYSmTclCnJvQq2tKu/sgl3xVpkc7ZWuQQ==", - "engines": { - "node": ">=6" - } - }, "node_modules/language-subtag-registry": { "version": "0.3.22", "resolved": "https://registry.npmjs.org/language-subtag-registry/-/language-subtag-registry-0.3.22.tgz", @@ -4148,6 +5275,15 @@ "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", "dev": true }, + "node_modules/longest-streak": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/longest-streak/-/longest-streak-3.1.0.tgz", + "integrity": "sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/loose-envify": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz", @@ -4160,47 +5296,231 @@ } }, "node_modules/lru-cache": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", - "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", - "dependencies": { - "yallist": "^4.0.0" - }, + "version": "10.2.2", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.2.2.tgz", + "integrity": "sha512-9hp3Vp2/hFQUiIwKo8XCeFVnrg8Pk3TYNPIR7tJADKi5YfcF7vEaK7avFHTlSy3kOKYaJQaalfEo6YuXdceBOQ==", "engines": { - "node": ">=10" + "node": "14 || >=16.14" } }, - "node_modules/mdast-util-definitions": { - "version": "5.1.2", - "resolved": "https://registry.npmjs.org/mdast-util-definitions/-/mdast-util-definitions-5.1.2.tgz", - "integrity": "sha512-8SVPMuHqlPME/z3gqVwWY4zVXn8lqKv/pAhC57FuJ40ImXyBpmO5ukh98zB2v7Blql2FiHjHv9LVztSIqjY+MA==", + "node_modules/markdown-table": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/markdown-table/-/markdown-table-3.0.3.tgz", + "integrity": "sha512-Z1NL3Tb1M9wH4XESsCDEksWoKTdlUafKc4pt0GRwjUyXaCFZ+dc3g2erqB6zm3szA2IUSi7VnPI+o/9jnxh9hw==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/mdast-util-find-and-replace": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/mdast-util-find-and-replace/-/mdast-util-find-and-replace-3.0.1.tgz", + "integrity": "sha512-SG21kZHGC3XRTSUhtofZkBzZTJNM5ecCi0SK2IMKmSXR8vO3peL+kb1O0z7Zl83jKtutG4k5Wv/W7V3/YHvzPA==", "dependencies": { - "@types/mdast": "^3.0.0", - "@types/unist": "^2.0.0", - "unist-util-visit": "^4.0.0" + "@types/mdast": "^4.0.0", + "escape-string-regexp": "^5.0.0", + "unist-util-is": "^6.0.0", + "unist-util-visit-parents": "^6.0.0" }, "funding": { "type": "opencollective", "url": "https://opencollective.com/unified" } }, + "node_modules/mdast-util-find-and-replace/node_modules/escape-string-regexp": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-5.0.0.tgz", + "integrity": "sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/mdast-util-from-markdown": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/mdast-util-from-markdown/-/mdast-util-from-markdown-1.3.1.tgz", - "integrity": "sha512-4xTO/M8c82qBcnQc1tgpNtubGUW/Y1tBQ1B0i5CtSoelOLKFYlElIr3bvgREYYO5iRqbMY1YuqZng0GVOI8Qww==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-from-markdown/-/mdast-util-from-markdown-2.0.0.tgz", + "integrity": "sha512-n7MTOr/z+8NAX/wmhhDji8O3bRvPTV/U0oTCaZJkjhPSKTPhS3xufVhKGF8s1pJ7Ox4QgoIU7KHseh09S+9rTA==", "dependencies": { - "@types/mdast": "^3.0.0", - "@types/unist": "^2.0.0", + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", "decode-named-character-reference": "^1.0.0", - "mdast-util-to-string": "^3.1.0", - "micromark": "^3.0.0", - "micromark-util-decode-numeric-character-reference": "^1.0.0", - "micromark-util-decode-string": "^1.0.0", - "micromark-util-normalize-identifier": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0", - "unist-util-stringify-position": "^3.0.0", - "uvu": "^0.5.0" + "devlop": "^1.0.0", + "mdast-util-to-string": "^4.0.0", + "micromark": "^4.0.0", + "micromark-util-decode-numeric-character-reference": "^2.0.0", + "micromark-util-decode-string": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0", + "unist-util-stringify-position": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm/-/mdast-util-gfm-3.0.0.tgz", + "integrity": "sha512-dgQEX5Amaq+DuUqf26jJqSK9qgixgd6rYDHAv4aTBuA92cTknZlKpPfa86Z/s8Dj8xsAQpFfBmPUHWJBWqS4Bw==", + "dependencies": { + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-gfm-autolink-literal": "^2.0.0", + "mdast-util-gfm-footnote": "^2.0.0", + "mdast-util-gfm-strikethrough": "^2.0.0", + "mdast-util-gfm-table": "^2.0.0", + "mdast-util-gfm-task-list-item": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-autolink-literal": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-autolink-literal/-/mdast-util-gfm-autolink-literal-2.0.0.tgz", + "integrity": "sha512-FyzMsduZZHSc3i0Px3PQcBT4WJY/X/RCtEJKuybiC6sjPqLv7h1yqAkmILZtuxMSsUyaLUWNp71+vQH2zqp5cg==", + "dependencies": { + "@types/mdast": "^4.0.0", + "ccount": "^2.0.0", + "devlop": "^1.0.0", + "mdast-util-find-and-replace": "^3.0.0", + "micromark-util-character": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-footnote": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-footnote/-/mdast-util-gfm-footnote-2.0.0.tgz", + "integrity": "sha512-5jOT2boTSVkMnQ7LTrd6n/18kqwjmuYqo7JUPe+tRCY6O7dAuTFMtTPauYYrMPpox9hlN0uOx/FL8XvEfG9/mQ==", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.1.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-strikethrough": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-strikethrough/-/mdast-util-gfm-strikethrough-2.0.0.tgz", + "integrity": "sha512-mKKb915TF+OC5ptj5bJ7WFRPdYtuHv0yTRxK2tJvi+BDqbkiG7h7u/9SI89nRAYcmap2xHQL9D+QG/6wSrTtXg==", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-table": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-table/-/mdast-util-gfm-table-2.0.0.tgz", + "integrity": "sha512-78UEvebzz/rJIxLvE7ZtDd/vIQ0RHv+3Mh5DR96p7cS7HsBhYIICDBCu8csTNWNO6tBWfqXPWekRuj2FNOGOZg==", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "markdown-table": "^3.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-task-list-item": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-task-list-item/-/mdast-util-gfm-task-list-item-2.0.0.tgz", + "integrity": "sha512-IrtvNvjxC1o06taBAVJznEnkiHxLFTzgonUdy8hzFVeDun0uTjxxrRGVaNFqkU1wJR3RBPEfsxmU6jDWPofrTQ==", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-mdx-expression": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-mdx-expression/-/mdast-util-mdx-expression-2.0.0.tgz", + "integrity": "sha512-fGCu8eWdKUKNu5mohVGkhBXCXGnOTLuFqOvGMvdikr+J1w7lDJgxThOKpwRWzzbyXAU2hhSwsmssOY4yTokluw==", + "dependencies": { + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-mdx-jsx": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.1.2.tgz", + "integrity": "sha512-eKMQDeywY2wlHc97k5eD8VC+9ASMjN8ItEZQNGwJ6E0XWKiW/Z0V5/H8pvoXUf+y+Mj0VIgeRRbujBmFn4FTyA==", + "dependencies": { + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", + "ccount": "^2.0.0", + "devlop": "^1.1.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0", + "parse-entities": "^4.0.0", + "stringify-entities": "^4.0.0", + "unist-util-remove-position": "^5.0.0", + "unist-util-stringify-position": "^4.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-mdxjs-esm": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/mdast-util-mdxjs-esm/-/mdast-util-mdxjs-esm-2.0.1.tgz", + "integrity": "sha512-EcmOpxsZ96CvlP03NghtH1EsLtr0n9Tm4lPUJUBccV9RwUOneqSycg19n5HGzCf+10LozMRSObtVr3ee1WoHtg==", + "dependencies": { + "@types/estree-jsx": "^1.0.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-phrasing": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-phrasing/-/mdast-util-phrasing-4.1.0.tgz", + "integrity": "sha512-TqICwyvJJpBwvGAMZjj4J2n0X8QWp21b9l0o7eXyVJ25YNWYbJDVIyD1bZXE6WtV6RmKJVYmQAKWa0zWOABz2w==", + "dependencies": { + "@types/mdast": "^4.0.0", + "unist-util-is": "^6.0.0" }, "funding": { "type": "opencollective", @@ -4208,18 +5528,38 @@ } }, "node_modules/mdast-util-to-hast": { - "version": "12.3.0", - "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-12.3.0.tgz", - "integrity": "sha512-pits93r8PhnIoU4Vy9bjW39M2jJ6/tdHyja9rrot9uujkN7UTU9SDnE6WNJz/IGyQk3XHX6yNNtrBH6cQzm8Hw==", + "version": "13.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.1.0.tgz", + "integrity": "sha512-/e2l/6+OdGp/FB+ctrJ9Avz71AN/GRH3oi/3KAx/kMnoUsD6q0woXlDT8lLEeViVKE7oZxE7RXzvO3T8kF2/sA==", "dependencies": { - "@types/hast": "^2.0.0", - "@types/mdast": "^3.0.0", - "mdast-util-definitions": "^5.0.0", - "micromark-util-sanitize-uri": "^1.1.0", + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "@ungap/structured-clone": "^1.0.0", + "devlop": "^1.0.0", + "micromark-util-sanitize-uri": "^2.0.0", "trim-lines": "^3.0.0", - "unist-util-generated": "^2.0.0", - "unist-util-position": "^4.0.0", - "unist-util-visit": "^4.0.0" + "unist-util-position": "^5.0.0", + "unist-util-visit": "^5.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-to-markdown": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-markdown/-/mdast-util-to-markdown-2.1.0.tgz", + "integrity": "sha512-SR2VnIEdVNCJbP6y7kVTJgPLifdr8WEU440fQec7qHoHOUz/oJ2jmNRqdDQ3rbiStOXb2mCDGTuwsK5OPUgYlQ==", + "dependencies": { + "@types/mdast": "^4.0.0", + "@types/unist": "^3.0.0", + "longest-streak": "^3.0.0", + "mdast-util-phrasing": "^4.0.0", + "mdast-util-to-string": "^4.0.0", + "micromark-util-decode-string": "^2.0.0", + "unist-util-visit": "^5.0.0", + "zwitch": "^2.0.0" }, "funding": { "type": "opencollective", @@ -4227,11 +5567,11 @@ } }, "node_modules/mdast-util-to-string": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-3.2.0.tgz", - "integrity": "sha512-V4Zn/ncyN1QNSqSBxTrMOLpjr+IKdHl2v3KVLoWmDPscP4r9GcCi71gjgvUV1SFSKh92AjAG4peFuBl2/YgCJg==", + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-4.0.0.tgz", + "integrity": "sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg==", "dependencies": { - "@types/mdast": "^3.0.0" + "@types/mdast": "^4.0.0" }, "funding": { "type": "opencollective", @@ -4247,9 +5587,9 @@ } }, "node_modules/micromark": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/micromark/-/micromark-3.2.0.tgz", - "integrity": "sha512-uD66tJj54JLYq0De10AhWycZWGQNUvDI55xPgk2sQM5kn1JYlhbCMTtEeT27+vAhW2FBQxLlOmS3pmA7/2z4aA==", + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/micromark/-/micromark-4.0.0.tgz", + "integrity": "sha512-o/sd0nMof8kYff+TqcDx3VSrgBTcZpSvYcAHIfHhv5VAuNmisCxjhx6YmxS8PFEpb9z5WKWKPdzf0jM23ro3RQ==", "funding": [ { "type": "GitHub Sponsors", @@ -4264,26 +5604,26 @@ "@types/debug": "^4.0.0", "debug": "^4.0.0", "decode-named-character-reference": "^1.0.0", - "micromark-core-commonmark": "^1.0.1", - "micromark-factory-space": "^1.0.0", - "micromark-util-character": "^1.0.0", - "micromark-util-chunked": "^1.0.0", - "micromark-util-combine-extensions": "^1.0.0", - "micromark-util-decode-numeric-character-reference": "^1.0.0", - "micromark-util-encode": "^1.0.0", - "micromark-util-normalize-identifier": "^1.0.0", - "micromark-util-resolve-all": "^1.0.0", - "micromark-util-sanitize-uri": "^1.0.0", - "micromark-util-subtokenize": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.1", - "uvu": "^0.5.0" + "devlop": "^1.0.0", + "micromark-core-commonmark": "^2.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-combine-extensions": "^2.0.0", + "micromark-util-decode-numeric-character-reference": "^2.0.0", + "micromark-util-encode": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-resolve-all": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-subtokenize": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" } }, "node_modules/micromark-core-commonmark": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-core-commonmark/-/micromark-core-commonmark-1.1.0.tgz", - "integrity": "sha512-BgHO1aRbolh2hcrzL2d1La37V0Aoz73ymF8rAcKnohLy93titmv62E0gP8Hrx9PKcKrqCZ1BbLGbP3bEhoXYlw==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-core-commonmark/-/micromark-core-commonmark-2.0.1.tgz", + "integrity": "sha512-CUQyKr1e///ZODyD1U3xit6zXwy1a8q2a1S1HKtIlmgvurrEpaw/Y9y6KSIbF8P59cn/NjzHyO+Q2fAyYLQrAA==", "funding": [ { "type": "GitHub Sponsors", @@ -4296,27 +5636,141 @@ ], "dependencies": { "decode-named-character-reference": "^1.0.0", - "micromark-factory-destination": "^1.0.0", - "micromark-factory-label": "^1.0.0", - "micromark-factory-space": "^1.0.0", - "micromark-factory-title": "^1.0.0", - "micromark-factory-whitespace": "^1.0.0", - "micromark-util-character": "^1.0.0", - "micromark-util-chunked": "^1.0.0", - "micromark-util-classify-character": "^1.0.0", - "micromark-util-html-tag-name": "^1.0.0", - "micromark-util-normalize-identifier": "^1.0.0", - "micromark-util-resolve-all": "^1.0.0", - "micromark-util-subtokenize": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.1", - "uvu": "^0.5.0" + "devlop": "^1.0.0", + "micromark-factory-destination": "^2.0.0", + "micromark-factory-label": "^2.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-factory-title": "^2.0.0", + "micromark-factory-whitespace": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-classify-character": "^2.0.0", + "micromark-util-html-tag-name": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-resolve-all": "^2.0.0", + "micromark-util-subtokenize": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + } + }, + "node_modules/micromark-extension-gfm": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm/-/micromark-extension-gfm-3.0.0.tgz", + "integrity": "sha512-vsKArQsicm7t0z2GugkCKtZehqUm31oeGBV/KVSorWSy8ZlNAv7ytjFhvaryUiCUJYqs+NoE6AFhpQvBTM6Q4w==", + "dependencies": { + "micromark-extension-gfm-autolink-literal": "^2.0.0", + "micromark-extension-gfm-footnote": "^2.0.0", + "micromark-extension-gfm-strikethrough": "^2.0.0", + "micromark-extension-gfm-table": "^2.0.0", + "micromark-extension-gfm-tagfilter": "^2.0.0", + "micromark-extension-gfm-task-list-item": "^2.0.0", + "micromark-util-combine-extensions": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-autolink-literal": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-autolink-literal/-/micromark-extension-gfm-autolink-literal-2.0.0.tgz", + "integrity": "sha512-rTHfnpt/Q7dEAK1Y5ii0W8bhfJlVJFnJMHIPisfPK3gpVNuOP0VnRl96+YJ3RYWV/P4gFeQoGKNlT3RhuvpqAg==", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-footnote": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-footnote/-/micromark-extension-gfm-footnote-2.0.0.tgz", + "integrity": "sha512-6Rzu0CYRKDv3BfLAUnZsSlzx3ak6HAoI85KTiijuKIz5UxZxbUI+pD6oHgw+6UtQuiRwnGRhzMmPRv4smcz0fg==", + "dependencies": { + "devlop": "^1.0.0", + "micromark-core-commonmark": "^2.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-strikethrough": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-strikethrough/-/micromark-extension-gfm-strikethrough-2.0.0.tgz", + "integrity": "sha512-c3BR1ClMp5fxxmwP6AoOY2fXO9U8uFMKs4ADD66ahLTNcwzSCyRVU4k7LPV5Nxo/VJiR4TdzxRQY2v3qIUceCw==", + "dependencies": { + "devlop": "^1.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-classify-character": "^2.0.0", + "micromark-util-resolve-all": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-table": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-table/-/micromark-extension-gfm-table-2.0.0.tgz", + "integrity": "sha512-PoHlhypg1ItIucOaHmKE8fbin3vTLpDOUg8KAr8gRCF1MOZI9Nquq2i/44wFvviM4WuxJzc3demT8Y3dkfvYrw==", + "dependencies": { + "devlop": "^1.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-tagfilter": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-tagfilter/-/micromark-extension-gfm-tagfilter-2.0.0.tgz", + "integrity": "sha512-xHlTOmuCSotIA8TW1mDIM6X2O1SiX5P9IuDtqGonFhEK0qgRI4yeC6vMxEV2dgyr2TiD+2PQ10o+cOhdVAcwfg==", + "dependencies": { + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-task-list-item": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-task-list-item/-/micromark-extension-gfm-task-list-item-2.0.1.tgz", + "integrity": "sha512-cY5PzGcnULaN5O7T+cOzfMoHjBW7j+T9D2sucA5d/KbsBTPcYdebm9zUd9zzdgJGCwahV+/W78Z3nbulBYVbTw==", + "dependencies": { + "devlop": "^1.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" } }, "node_modules/micromark-factory-destination": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-1.1.0.tgz", - "integrity": "sha512-XaNDROBgx9SgSChd69pjiGKbV+nfHGDPVYFs5dOoDd7ZnMAE+Cuu91BCpsY8RT2NP9vo/B8pds2VQNCLiu0zhg==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-2.0.0.tgz", + "integrity": "sha512-j9DGrQLm/Uhl2tCzcbLhy5kXsgkHUrjJHg4fFAeoMRwJmJerT9aw4FEhIbZStWN8A3qMwOp1uzHr4UL8AInxtA==", "funding": [ { "type": "GitHub Sponsors", @@ -4328,15 +5782,15 @@ } ], "dependencies": { - "micromark-util-character": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0" + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" } }, "node_modules/micromark-factory-label": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-factory-label/-/micromark-factory-label-1.1.0.tgz", - "integrity": "sha512-OLtyez4vZo/1NjxGhcpDSbHQ+m0IIGnT8BoPamh+7jVlzLJBH98zzuCoUeMxvM6WsNeh8wx8cKvqLiPHEACn0w==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-factory-label/-/micromark-factory-label-2.0.0.tgz", + "integrity": "sha512-RR3i96ohZGde//4WSe/dJsxOX6vxIg9TimLAS3i4EhBAFx8Sm5SmqVfR8E87DPSR31nEAjZfbt91OMZWcNgdZw==", "funding": [ { "type": "GitHub Sponsors", @@ -4348,16 +5802,16 @@ } ], "dependencies": { - "micromark-util-character": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0", - "uvu": "^0.5.0" + "devlop": "^1.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" } }, "node_modules/micromark-factory-space": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-factory-space/-/micromark-factory-space-1.1.0.tgz", - "integrity": "sha512-cRzEj7c0OL4Mw2v6nwzttyOZe8XY/Z8G0rzmWQZTBi/jjwyw/U4uqKtUORXQrR5bAZZnbTI/feRV/R7hc4jQYQ==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-factory-space/-/micromark-factory-space-2.0.0.tgz", + "integrity": "sha512-TKr+LIDX2pkBJXFLzpyPyljzYK3MtmllMUMODTQJIUfDGncESaqB90db9IAUcz4AZAJFdd8U9zOp9ty1458rxg==", "funding": [ { "type": "GitHub Sponsors", @@ -4369,14 +5823,14 @@ } ], "dependencies": { - "micromark-util-character": "^1.0.0", - "micromark-util-types": "^1.0.0" + "micromark-util-character": "^2.0.0", + "micromark-util-types": "^2.0.0" } }, "node_modules/micromark-factory-title": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-factory-title/-/micromark-factory-title-1.1.0.tgz", - "integrity": "sha512-J7n9R3vMmgjDOCY8NPw55jiyaQnH5kBdV2/UXCtZIpnHH3P6nHUKaH7XXEYuWwx/xUJcawa8plLBEjMPU24HzQ==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-factory-title/-/micromark-factory-title-2.0.0.tgz", + "integrity": "sha512-jY8CSxmpWLOxS+t8W+FG3Xigc0RDQA9bKMY/EwILvsesiRniiVMejYTE4wumNc2f4UbAa4WsHqe3J1QS1sli+A==", "funding": [ { "type": "GitHub Sponsors", @@ -4388,16 +5842,16 @@ } ], "dependencies": { - "micromark-factory-space": "^1.0.0", - "micromark-util-character": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0" + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" } }, "node_modules/micromark-factory-whitespace": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-factory-whitespace/-/micromark-factory-whitespace-1.1.0.tgz", - "integrity": "sha512-v2WlmiymVSp5oMg+1Q0N1Lxmt6pMhIHD457whWM7/GUlEks1hI9xj5w3zbc4uuMKXGisksZk8DzP2UyGbGqNsQ==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-factory-whitespace/-/micromark-factory-whitespace-2.0.0.tgz", + "integrity": "sha512-28kbwaBjc5yAI1XadbdPYHX/eDnqaUFVikLwrO7FDnKG7lpgxnvk/XGRhX/PN0mOZ+dBSZ+LgunHS+6tYQAzhA==", "funding": [ { "type": "GitHub Sponsors", @@ -4409,16 +5863,16 @@ } ], "dependencies": { - "micromark-factory-space": "^1.0.0", - "micromark-util-character": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0" + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" } }, "node_modules/micromark-util-character": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/micromark-util-character/-/micromark-util-character-1.2.0.tgz", - "integrity": "sha512-lXraTwcX3yH/vMDaFWCQJP1uIszLVebzUa3ZHdrgxr7KEU/9mL4mVgCpGbyhvNLNlauROiNUq7WN5u7ndbY6xg==", + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-util-character/-/micromark-util-character-2.1.0.tgz", + "integrity": "sha512-KvOVV+X1yLBfs9dCBSopq/+G1PcgT3lAK07mC4BzXi5E7ahzMAF8oIupDDJ6mievI6F+lAATkbQQlQixJfT3aQ==", "funding": [ { "type": "GitHub Sponsors", @@ -4430,14 +5884,14 @@ } ], "dependencies": { - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0" + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" } }, "node_modules/micromark-util-chunked": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-chunked/-/micromark-util-chunked-1.1.0.tgz", - "integrity": "sha512-Ye01HXpkZPNcV6FiyoW2fGZDUw4Yc7vT0E9Sad83+bEDiCJ1uXu0S3mr8WLpsz3HaG3x2q0HM6CTuPdcZcluFQ==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-chunked/-/micromark-util-chunked-2.0.0.tgz", + "integrity": "sha512-anK8SWmNphkXdaKgz5hJvGa7l00qmcaUQoMYsBwDlSKFKjc6gjGXPDw3FNL3Nbwq5L8gE+RCbGqTw49FK5Qyvg==", "funding": [ { "type": "GitHub Sponsors", @@ -4449,13 +5903,13 @@ } ], "dependencies": { - "micromark-util-symbol": "^1.0.0" + "micromark-util-symbol": "^2.0.0" } }, "node_modules/micromark-util-classify-character": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-classify-character/-/micromark-util-classify-character-1.1.0.tgz", - "integrity": "sha512-SL0wLxtKSnklKSUplok1WQFoGhUdWYKggKUiqhX+Swala+BtptGCu5iPRc+xvzJ4PXE/hwM3FNXsfEVgoZsWbw==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-classify-character/-/micromark-util-classify-character-2.0.0.tgz", + "integrity": "sha512-S0ze2R9GH+fu41FA7pbSqNWObo/kzwf8rN/+IGlW/4tC6oACOs8B++bh+i9bVyNnwCcuksbFwsBme5OCKXCwIw==", "funding": [ { "type": "GitHub Sponsors", @@ -4467,15 +5921,15 @@ } ], "dependencies": { - "micromark-util-character": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0" + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" } }, "node_modules/micromark-util-combine-extensions": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-combine-extensions/-/micromark-util-combine-extensions-1.1.0.tgz", - "integrity": "sha512-Q20sp4mfNf9yEqDL50WwuWZHUrCO4fEyeDCnMGmG5Pr0Cz15Uo7KBs6jq+dq0EgX4DPwwrh9m0X+zPV1ypFvUA==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-combine-extensions/-/micromark-util-combine-extensions-2.0.0.tgz", + "integrity": "sha512-vZZio48k7ON0fVS3CUgFatWHoKbbLTK/rT7pzpJ4Bjp5JjkZeasRfrS9wsBdDJK2cJLHMckXZdzPSSr1B8a4oQ==", "funding": [ { "type": "GitHub Sponsors", @@ -4487,14 +5941,14 @@ } ], "dependencies": { - "micromark-util-chunked": "^1.0.0", - "micromark-util-types": "^1.0.0" + "micromark-util-chunked": "^2.0.0", + "micromark-util-types": "^2.0.0" } }, "node_modules/micromark-util-decode-numeric-character-reference": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-decode-numeric-character-reference/-/micromark-util-decode-numeric-character-reference-1.1.0.tgz", - "integrity": "sha512-m9V0ExGv0jB1OT21mrWcuf4QhP46pH1KkfWy9ZEezqHKAxkj4mPCy3nIH1rkbdMlChLHX531eOrymlwyZIf2iw==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-decode-numeric-character-reference/-/micromark-util-decode-numeric-character-reference-2.0.1.tgz", + "integrity": "sha512-bmkNc7z8Wn6kgjZmVHOX3SowGmVdhYS7yBpMnuMnPzDq/6xwVA604DuOXMZTO1lvq01g+Adfa0pE2UKGlxL1XQ==", "funding": [ { "type": "GitHub Sponsors", @@ -4506,13 +5960,13 @@ } ], "dependencies": { - "micromark-util-symbol": "^1.0.0" + "micromark-util-symbol": "^2.0.0" } }, "node_modules/micromark-util-decode-string": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-decode-string/-/micromark-util-decode-string-1.1.0.tgz", - "integrity": "sha512-YphLGCK8gM1tG1bd54azwyrQRjCFcmgj2S2GoJDNnh4vYtnL38JS8M4gpxzOPNyHdNEpheyWXCTnnTDY3N+NVQ==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-decode-string/-/micromark-util-decode-string-2.0.0.tgz", + "integrity": "sha512-r4Sc6leeUTn3P6gk20aFMj2ntPwn6qpDZqWvYmAG6NgvFTIlj4WtrAudLi65qYoaGdXYViXYw2pkmn7QnIFasA==", "funding": [ { "type": "GitHub Sponsors", @@ -4525,15 +5979,15 @@ ], "dependencies": { "decode-named-character-reference": "^1.0.0", - "micromark-util-character": "^1.0.0", - "micromark-util-decode-numeric-character-reference": "^1.0.0", - "micromark-util-symbol": "^1.0.0" + "micromark-util-character": "^2.0.0", + "micromark-util-decode-numeric-character-reference": "^2.0.0", + "micromark-util-symbol": "^2.0.0" } }, "node_modules/micromark-util-encode": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-encode/-/micromark-util-encode-1.1.0.tgz", - "integrity": "sha512-EuEzTWSTAj9PA5GOAs992GzNh2dGQO52UvAbtSOMvXTxv3Criqb6IOzJUBCmEqrrXSblJIJBbFFv6zPxpreiJw==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-encode/-/micromark-util-encode-2.0.0.tgz", + "integrity": "sha512-pS+ROfCXAGLWCOc8egcBvT0kf27GoWMqtdarNfDcjb6YLuV5cM3ioG45Ys2qOVqeqSbjaKg72vU+Wby3eddPsA==", "funding": [ { "type": "GitHub Sponsors", @@ -4546,9 +6000,9 @@ ] }, "node_modules/micromark-util-html-tag-name": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/micromark-util-html-tag-name/-/micromark-util-html-tag-name-1.2.0.tgz", - "integrity": "sha512-VTQzcuQgFUD7yYztuQFKXT49KghjtETQ+Wv/zUjGSGBioZnkA4P1XXZPT1FHeJA6RwRXSF47yvJ1tsJdoxwO+Q==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-html-tag-name/-/micromark-util-html-tag-name-2.0.0.tgz", + "integrity": "sha512-xNn4Pqkj2puRhKdKTm8t1YHC/BAjx6CEwRFXntTaRf/x16aqka6ouVoutm+QdkISTlT7e2zU7U4ZdlDLJd2Mcw==", "funding": [ { "type": "GitHub Sponsors", @@ -4561,9 +6015,9 @@ ] }, "node_modules/micromark-util-normalize-identifier": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-normalize-identifier/-/micromark-util-normalize-identifier-1.1.0.tgz", - "integrity": "sha512-N+w5vhqrBihhjdpM8+5Xsxy71QWqGn7HYNUvch71iV2PM7+E3uWGox1Qp90loa1ephtCxG2ftRV/Conitc6P2Q==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-normalize-identifier/-/micromark-util-normalize-identifier-2.0.0.tgz", + "integrity": "sha512-2xhYT0sfo85FMrUPtHcPo2rrp1lwbDEEzpx7jiH2xXJLqBuy4H0GgXk5ToU8IEwoROtXuL8ND0ttVa4rNqYK3w==", "funding": [ { "type": "GitHub Sponsors", @@ -4575,13 +6029,13 @@ } ], "dependencies": { - "micromark-util-symbol": "^1.0.0" + "micromark-util-symbol": "^2.0.0" } }, "node_modules/micromark-util-resolve-all": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-resolve-all/-/micromark-util-resolve-all-1.1.0.tgz", - "integrity": "sha512-b/G6BTMSg+bX+xVCshPTPyAu2tmA0E4X98NSR7eIbeC6ycCqCeE7wjfDIgzEbkzdEVJXRtOG4FbEm/uGbCRouA==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-resolve-all/-/micromark-util-resolve-all-2.0.0.tgz", + "integrity": "sha512-6KU6qO7DZ7GJkaCgwBNtplXCvGkJToU86ybBAUdavvgsCiG8lSSvYxr9MhwmQ+udpzywHsl4RpGJsYWG1pDOcA==", "funding": [ { "type": "GitHub Sponsors", @@ -4593,13 +6047,13 @@ } ], "dependencies": { - "micromark-util-types": "^1.0.0" + "micromark-util-types": "^2.0.0" } }, "node_modules/micromark-util-sanitize-uri": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/micromark-util-sanitize-uri/-/micromark-util-sanitize-uri-1.2.0.tgz", - "integrity": "sha512-QO4GXv0XZfWey4pYFndLUKEAktKkG5kZTdUNaTAkzbuJxn2tNBOr+QtxR2XpWaMhbImT2dPzyLrPXLlPhph34A==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-sanitize-uri/-/micromark-util-sanitize-uri-2.0.0.tgz", + "integrity": "sha512-WhYv5UEcZrbAtlsnPuChHUAsu/iBPOVaEVsntLBIdpibO0ddy8OzavZz3iL2xVvBZOpolujSliP65Kq0/7KIYw==", "funding": [ { "type": "GitHub Sponsors", @@ -4611,15 +6065,15 @@ } ], "dependencies": { - "micromark-util-character": "^1.0.0", - "micromark-util-encode": "^1.0.0", - "micromark-util-symbol": "^1.0.0" + "micromark-util-character": "^2.0.0", + "micromark-util-encode": "^2.0.0", + "micromark-util-symbol": "^2.0.0" } }, "node_modules/micromark-util-subtokenize": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-subtokenize/-/micromark-util-subtokenize-1.1.0.tgz", - "integrity": "sha512-kUQHyzRoxvZO2PuLzMt2P/dwVsTiivCK8icYTeR+3WgbuPqfHgPPy7nFKbeqRivBvn/3N3GBiNC+JRTMSxEC7A==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/micromark-util-subtokenize/-/micromark-util-subtokenize-2.0.1.tgz", + "integrity": "sha512-jZNtiFl/1aY73yS3UGQkutD0UbhTt68qnRpw2Pifmz5wV9h8gOVsN70v+Lq/f1rKaU/W8pxRe8y8Q9FX1AOe1Q==", "funding": [ { "type": "GitHub Sponsors", @@ -4631,16 +6085,16 @@ } ], "dependencies": { - "micromark-util-chunked": "^1.0.0", - "micromark-util-symbol": "^1.0.0", - "micromark-util-types": "^1.0.0", - "uvu": "^0.5.0" + "devlop": "^1.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" } }, "node_modules/micromark-util-symbol": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-symbol/-/micromark-util-symbol-1.1.0.tgz", - "integrity": "sha512-uEjpEYY6KMs1g7QfJ2eX1SQEV+ZT4rUD3UcF6l57acZvLNK7PBZL+ty82Z1qhK1/yXIY4bdx04FKMgR0g4IAag==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-symbol/-/micromark-util-symbol-2.0.0.tgz", + "integrity": "sha512-8JZt9ElZ5kyTnO94muPxIGS8oyElRJaiJO8EzV6ZSyGQ1Is8xwl4Q45qU5UOg+bGH4AikWziz0iN4sFLWs8PGw==", "funding": [ { "type": "GitHub Sponsors", @@ -4653,9 +6107,9 @@ ] }, "node_modules/micromark-util-types": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/micromark-util-types/-/micromark-util-types-1.1.0.tgz", - "integrity": "sha512-ukRBgie8TIAcacscVHSiddHjO4k/q3pnedmzMQ4iwDcK0FtFCohKOlFbaOL/mPgfnPsL3C1ZyxJa4sbWrBl3jg==", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-util-types/-/micromark-util-types-2.0.0.tgz", + "integrity": "sha512-oNh6S2WMHWRZrmutsRmDDfkzKtxF+bc2VxLC9dvtrDIRFln627VsFP6fLMgTryGDljgLPjkrzQSDcPrjPyDJ5w==", "funding": [ { "type": "GitHub Sponsors", @@ -4694,6 +6148,7 @@ "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, "dependencies": { "brace-expansion": "^1.1.7" }, @@ -4709,19 +6164,19 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/minipass": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.1.tgz", + "integrity": "sha512-UZ7eQ+h8ywIRAW1hIEl2AqdwzJucU/Kp59+8kkZeSvafXhZjul247BvIJjEVFVeON6d7lM46XX1HXCduKAS8VA==", + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, "node_modules/mkdirp-classic": { "version": "0.5.3", "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==" }, - "node_modules/mri": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/mri/-/mri-1.2.0.tgz", - "integrity": "sha512-tzzskb3bG8LvYGFF/mDTpq3jpI6Q9wc3LEmBaghu+DdCssd1FakN7Bc0hVNmEyGq1bq3RgfkCb3cmQLpNPOroA==", - "engines": { - "node": ">=4" - } - }, "node_modules/ms": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", @@ -4766,17 +6221,17 @@ "dev": true }, "node_modules/next": { - "version": "14.0.3", - "resolved": "https://registry.npmjs.org/next/-/next-14.0.3.tgz", - "integrity": "sha512-AbYdRNfImBr3XGtvnwOxq8ekVCwbFTv/UJoLwmaX89nk9i051AEY4/HAWzU0YpaTDw8IofUpmuIlvzWF13jxIw==", + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/next/-/next-14.2.3.tgz", + "integrity": "sha512-dowFkFTR8v79NPJO4QsBUtxv0g9BrS/phluVpMAt2ku7H+cbcBJlopXjkWlwxrk/xGqMemr7JkGPGemPrLLX7A==", "dependencies": { - "@next/env": "14.0.3", - "@swc/helpers": "0.5.2", + "@next/env": "14.2.3", + "@swc/helpers": "0.5.5", "busboy": "1.6.0", - "caniuse-lite": "^1.0.30001406", + "caniuse-lite": "^1.0.30001579", + "graceful-fs": "^4.2.11", "postcss": "8.4.31", - "styled-jsx": "5.1.1", - "watchpack": "2.4.0" + "styled-jsx": "5.1.1" }, "bin": { "next": "dist/bin/next" @@ -4785,18 +6240,19 @@ "node": ">=18.17.0" }, "optionalDependencies": { - "@next/swc-darwin-arm64": "14.0.3", - "@next/swc-darwin-x64": "14.0.3", - "@next/swc-linux-arm64-gnu": "14.0.3", - "@next/swc-linux-arm64-musl": "14.0.3", - "@next/swc-linux-x64-gnu": "14.0.3", - "@next/swc-linux-x64-musl": "14.0.3", - "@next/swc-win32-arm64-msvc": "14.0.3", - "@next/swc-win32-ia32-msvc": "14.0.3", - "@next/swc-win32-x64-msvc": "14.0.3" + "@next/swc-darwin-arm64": "14.2.3", + "@next/swc-darwin-x64": "14.2.3", + "@next/swc-linux-arm64-gnu": "14.2.3", + "@next/swc-linux-arm64-musl": "14.2.3", + "@next/swc-linux-x64-gnu": "14.2.3", + "@next/swc-linux-x64-musl": "14.2.3", + "@next/swc-win32-arm64-msvc": "14.2.3", + "@next/swc-win32-ia32-msvc": "14.2.3", + "@next/swc-win32-x64-msvc": "14.2.3" }, "peerDependencies": { "@opentelemetry/api": "^1.1.0", + "@playwright/test": "^1.41.2", "react": "^18.2.0", "react-dom": "^18.2.0", "sass": "^1.3.0" @@ -4805,6 +6261,9 @@ "@opentelemetry/api": { "optional": true }, + "@playwright/test": { + "optional": true + }, "sass": { "optional": true } @@ -4838,9 +6297,9 @@ } }, "node_modules/node-abi": { - "version": "3.51.0", - "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.51.0.tgz", - "integrity": "sha512-SQkEP4hmNWjlniS5zdnfIXTk1x7Ome85RDzHlTbBtzE97Gfwz/Ipw4v/Ryk20DWIy3yCNVLVlGKApCnmvYoJbA==", + "version": "3.62.0", + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.62.0.tgz", + "integrity": "sha512-CPMcGa+y33xuL1E0TcNIu4YyaZCxnnvkVaEXrsosR3FxN+fV8xvb7Mzpb7IgKler10qeMkE6+Dp8qJhpzdq35g==", "dependencies": { "semver": "^7.3.5" }, @@ -4874,6 +6333,2400 @@ "node": ">=0.10.0" } }, + "node_modules/npm": { + "version": "10.8.0", + "resolved": "https://registry.npmjs.org/npm/-/npm-10.8.0.tgz", + "integrity": "sha512-wh93uRczgp7HDnPMiLXcCkv2hagdJS0zJ9KT/31d0FoXP02+qgN2AOwpaW85fxRWkinl2rELfPw+CjBXW48/jQ==", + "bundleDependencies": [ + "@isaacs/string-locale-compare", + "@npmcli/arborist", + "@npmcli/config", + "@npmcli/fs", + "@npmcli/map-workspaces", + "@npmcli/package-json", + "@npmcli/promise-spawn", + "@npmcli/redact", + "@npmcli/run-script", + "@sigstore/tuf", + "abbrev", + "archy", + "cacache", + "chalk", + "ci-info", + "cli-columns", + "fastest-levenshtein", + "fs-minipass", + "glob", + "graceful-fs", + "hosted-git-info", + "ini", + "init-package-json", + "is-cidr", + "json-parse-even-better-errors", + "libnpmaccess", + "libnpmdiff", + "libnpmexec", + "libnpmfund", + "libnpmhook", + "libnpmorg", + "libnpmpack", + "libnpmpublish", + "libnpmsearch", + "libnpmteam", + "libnpmversion", + "make-fetch-happen", + "minimatch", + "minipass", + "minipass-pipeline", + "ms", + "node-gyp", + "nopt", + "normalize-package-data", + "npm-audit-report", + "npm-install-checks", + "npm-package-arg", + "npm-pick-manifest", + "npm-profile", + "npm-registry-fetch", + "npm-user-validate", + "p-map", + "pacote", + "parse-conflict-json", + "proc-log", + "qrcode-terminal", + "read", + "semver", + "spdx-expression-parse", + "ssri", + "supports-color", + "tar", + "text-table", + "tiny-relative-date", + "treeverse", + "validate-npm-package-name", + "which", + "write-file-atomic" + ], + "dependencies": { + "@isaacs/string-locale-compare": "^1.1.0", + "@npmcli/arborist": "^7.5.2", + "@npmcli/config": "^8.3.2", + "@npmcli/fs": "^3.1.1", + "@npmcli/map-workspaces": "^3.0.6", + "@npmcli/package-json": "^5.1.0", + "@npmcli/promise-spawn": "^7.0.2", + "@npmcli/redact": "^2.0.0", + "@npmcli/run-script": "^8.1.0", + "@sigstore/tuf": "^2.3.3", + "abbrev": "^2.0.0", + "archy": "~1.0.0", + "cacache": "^18.0.3", + "chalk": "^5.3.0", + "ci-info": "^4.0.0", + "cli-columns": "^4.0.0", + "fastest-levenshtein": "^1.0.16", + "fs-minipass": "^3.0.3", + "glob": "^10.3.15", + "graceful-fs": "^4.2.11", + "hosted-git-info": "^7.0.2", + "ini": "^4.1.2", + "init-package-json": "^6.0.3", + "is-cidr": "^5.0.5", + "json-parse-even-better-errors": "^3.0.2", + "libnpmaccess": "^8.0.6", + "libnpmdiff": "^6.1.2", + "libnpmexec": "^8.1.1", + "libnpmfund": "^5.0.10", + "libnpmhook": "^10.0.5", + "libnpmorg": "^6.0.6", + "libnpmpack": "^7.0.2", + "libnpmpublish": "^9.0.8", + "libnpmsearch": "^7.0.5", + "libnpmteam": "^6.0.5", + "libnpmversion": "^6.0.2", + "make-fetch-happen": "^13.0.1", + "minimatch": "^9.0.4", + "minipass": "^7.1.1", + "minipass-pipeline": "^1.2.4", + "ms": "^2.1.2", + "node-gyp": "^10.1.0", + "nopt": "^7.2.1", + "normalize-package-data": "^6.0.1", + "npm-audit-report": "^5.0.0", + "npm-install-checks": "^6.3.0", + "npm-package-arg": "^11.0.2", + "npm-pick-manifest": "^9.0.1", + "npm-profile": "^10.0.0", + "npm-registry-fetch": "^17.0.1", + "npm-user-validate": "^2.0.1", + "p-map": "^4.0.0", + "pacote": "^18.0.6", + "parse-conflict-json": "^3.0.1", + "proc-log": "^4.2.0", + "qrcode-terminal": "^0.12.0", + "read": "^3.0.1", + "semver": "^7.6.2", + "spdx-expression-parse": "^4.0.0", + "ssri": "^10.0.6", + "supports-color": "^9.4.0", + "tar": "^6.2.1", + "text-table": "~0.2.0", + "tiny-relative-date": "^1.3.0", + "treeverse": "^3.0.0", + "validate-npm-package-name": "^5.0.1", + "which": "^4.0.0", + "write-file-atomic": "^5.0.1" + }, + "bin": { + "npm": "bin/npm-cli.js", + "npx": "bin/npx-cli.js" + }, + "engines": { + "node": "^18.17.0 || >=20.5.0" + } + }, + "node_modules/npm/node_modules/@isaacs/cliui": { + "version": "8.0.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "string-width": "^5.1.2", + "string-width-cjs": "npm:string-width@^4.2.0", + "strip-ansi": "^7.0.1", + "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", + "wrap-ansi": "^8.1.0", + "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/npm/node_modules/@isaacs/cliui/node_modules/ansi-regex": { + "version": "6.0.1", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/npm/node_modules/@isaacs/cliui/node_modules/emoji-regex": { + "version": "9.2.2", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/@isaacs/cliui/node_modules/string-width": { + "version": "5.1.2", + "inBundle": true, + "license": "MIT", + "dependencies": { + "eastasianwidth": "^0.2.0", + "emoji-regex": "^9.2.2", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/npm/node_modules/@isaacs/cliui/node_modules/strip-ansi": { + "version": "7.1.0", + "inBundle": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/npm/node_modules/@isaacs/string-locale-compare": { + "version": "1.1.0", + "inBundle": true, + "license": "ISC" + }, + "node_modules/npm/node_modules/@npmcli/agent": { + "version": "2.2.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "agent-base": "^7.1.0", + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.1", + "lru-cache": "^10.0.1", + "socks-proxy-agent": "^8.0.3" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/arborist": { + "version": "7.5.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@isaacs/string-locale-compare": "^1.1.0", + "@npmcli/fs": "^3.1.1", + "@npmcli/installed-package-contents": "^2.1.0", + "@npmcli/map-workspaces": "^3.0.2", + "@npmcli/metavuln-calculator": "^7.1.1", + "@npmcli/name-from-folder": "^2.0.0", + "@npmcli/node-gyp": "^3.0.0", + "@npmcli/package-json": "^5.1.0", + "@npmcli/query": "^3.1.0", + "@npmcli/redact": "^2.0.0", + "@npmcli/run-script": "^8.1.0", + "bin-links": "^4.0.4", + "cacache": "^18.0.3", + "common-ancestor-path": "^1.0.1", + "hosted-git-info": "^7.0.2", + "json-parse-even-better-errors": "^3.0.2", + "json-stringify-nice": "^1.1.4", + "lru-cache": "^10.2.2", + "minimatch": "^9.0.4", + "nopt": "^7.2.1", + "npm-install-checks": "^6.2.0", + "npm-package-arg": "^11.0.2", + "npm-pick-manifest": "^9.0.1", + "npm-registry-fetch": "^17.0.1", + "pacote": "^18.0.6", + "parse-conflict-json": "^3.0.0", + "proc-log": "^4.2.0", + "proggy": "^2.0.0", + "promise-all-reject-late": "^1.0.0", + "promise-call-limit": "^3.0.1", + "read-package-json-fast": "^3.0.2", + "semver": "^7.3.7", + "ssri": "^10.0.6", + "treeverse": "^3.0.0", + "walk-up-path": "^3.0.1" + }, + "bin": { + "arborist": "bin/index.js" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/config": { + "version": "8.3.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/map-workspaces": "^3.0.2", + "ci-info": "^4.0.0", + "ini": "^4.1.2", + "nopt": "^7.2.1", + "proc-log": "^4.2.0", + "read-package-json-fast": "^3.0.2", + "semver": "^7.3.5", + "walk-up-path": "^3.0.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/fs": { + "version": "3.1.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "semver": "^7.3.5" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/git": { + "version": "5.0.7", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/promise-spawn": "^7.0.0", + "lru-cache": "^10.0.1", + "npm-pick-manifest": "^9.0.0", + "proc-log": "^4.0.0", + "promise-inflight": "^1.0.1", + "promise-retry": "^2.0.1", + "semver": "^7.3.5", + "which": "^4.0.0" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/installed-package-contents": { + "version": "2.1.0", + "inBundle": true, + "license": "ISC", + "dependencies": { + "npm-bundled": "^3.0.0", + "npm-normalize-package-bin": "^3.0.0" + }, + "bin": { + "installed-package-contents": "bin/index.js" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/map-workspaces": { + "version": "3.0.6", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/name-from-folder": "^2.0.0", + "glob": "^10.2.2", + "minimatch": "^9.0.0", + "read-package-json-fast": "^3.0.0" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/metavuln-calculator": { + "version": "7.1.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "cacache": "^18.0.0", + "json-parse-even-better-errors": "^3.0.0", + "pacote": "^18.0.0", + "proc-log": "^4.1.0", + "semver": "^7.3.5" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/name-from-folder": { + "version": "2.0.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/node-gyp": { + "version": "3.0.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/package-json": { + "version": "5.1.0", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/git": "^5.0.0", + "glob": "^10.2.2", + "hosted-git-info": "^7.0.0", + "json-parse-even-better-errors": "^3.0.0", + "normalize-package-data": "^6.0.0", + "proc-log": "^4.0.0", + "semver": "^7.5.3" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/promise-spawn": { + "version": "7.0.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "which": "^4.0.0" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/query": { + "version": "3.1.0", + "inBundle": true, + "license": "ISC", + "dependencies": { + "postcss-selector-parser": "^6.0.10" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/redact": { + "version": "2.0.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@npmcli/run-script": { + "version": "8.1.0", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/node-gyp": "^3.0.0", + "@npmcli/package-json": "^5.0.0", + "@npmcli/promise-spawn": "^7.0.0", + "node-gyp": "^10.0.0", + "proc-log": "^4.0.0", + "which": "^4.0.0" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@pkgjs/parseargs": { + "version": "0.11.0", + "inBundle": true, + "license": "MIT", + "optional": true, + "engines": { + "node": ">=14" + } + }, + "node_modules/npm/node_modules/@sigstore/bundle": { + "version": "2.3.1", + "inBundle": true, + "license": "Apache-2.0", + "dependencies": { + "@sigstore/protobuf-specs": "^0.3.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@sigstore/core": { + "version": "1.1.0", + "inBundle": true, + "license": "Apache-2.0", + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@sigstore/protobuf-specs": { + "version": "0.3.2", + "inBundle": true, + "license": "Apache-2.0", + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@sigstore/sign": { + "version": "2.3.1", + "inBundle": true, + "license": "Apache-2.0", + "dependencies": { + "@sigstore/bundle": "^2.3.0", + "@sigstore/core": "^1.0.0", + "@sigstore/protobuf-specs": "^0.3.1", + "make-fetch-happen": "^13.0.1", + "proc-log": "^4.2.0", + "promise-retry": "^2.0.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@sigstore/tuf": { + "version": "2.3.3", + "inBundle": true, + "license": "Apache-2.0", + "dependencies": { + "@sigstore/protobuf-specs": "^0.3.0", + "tuf-js": "^2.2.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@sigstore/verify": { + "version": "1.2.0", + "inBundle": true, + "license": "Apache-2.0", + "dependencies": { + "@sigstore/bundle": "^2.3.1", + "@sigstore/core": "^1.1.0", + "@sigstore/protobuf-specs": "^0.3.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@tufjs/canonical-json": { + "version": "2.0.0", + "inBundle": true, + "license": "MIT", + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/@tufjs/models": { + "version": "2.0.1", + "inBundle": true, + "license": "MIT", + "dependencies": { + "@tufjs/canonical-json": "2.0.0", + "minimatch": "^9.0.4" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/abbrev": { + "version": "2.0.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/agent-base": { + "version": "7.1.1", + "inBundle": true, + "license": "MIT", + "dependencies": { + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/npm/node_modules/aggregate-error": { + "version": "3.1.0", + "inBundle": true, + "license": "MIT", + "dependencies": { + "clean-stack": "^2.0.0", + "indent-string": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/ansi-regex": { + "version": "5.0.1", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/ansi-styles": { + "version": "6.2.1", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/npm/node_modules/aproba": { + "version": "2.0.0", + "inBundle": true, + "license": "ISC" + }, + "node_modules/npm/node_modules/archy": { + "version": "1.0.0", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/balanced-match": { + "version": "1.0.2", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/bin-links": { + "version": "4.0.4", + "inBundle": true, + "license": "ISC", + "dependencies": { + "cmd-shim": "^6.0.0", + "npm-normalize-package-bin": "^3.0.0", + "read-cmd-shim": "^4.0.0", + "write-file-atomic": "^5.0.0" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/binary-extensions": { + "version": "2.3.0", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/npm/node_modules/brace-expansion": { + "version": "2.0.1", + "inBundle": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/npm/node_modules/cacache": { + "version": "18.0.3", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/fs": "^3.1.0", + "fs-minipass": "^3.0.0", + "glob": "^10.2.2", + "lru-cache": "^10.0.1", + "minipass": "^7.0.3", + "minipass-collect": "^2.0.1", + "minipass-flush": "^1.0.5", + "minipass-pipeline": "^1.2.4", + "p-map": "^4.0.0", + "ssri": "^10.0.0", + "tar": "^6.1.11", + "unique-filename": "^3.0.0" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/chalk": { + "version": "5.3.0", + "inBundle": true, + "license": "MIT", + "engines": { + "node": "^12.17.0 || ^14.13 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, + "node_modules/npm/node_modules/chownr": { + "version": "2.0.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": ">=10" + } + }, + "node_modules/npm/node_modules/ci-info": { + "version": "4.0.0", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/sibiraj-s" + } + ], + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/cidr-regex": { + "version": "4.0.5", + "inBundle": true, + "license": "BSD-2-Clause", + "dependencies": { + "ip-regex": "^5.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/npm/node_modules/clean-stack": { + "version": "2.2.0", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/npm/node_modules/cli-columns": { + "version": "4.0.0", + "inBundle": true, + "license": "MIT", + "dependencies": { + "string-width": "^4.2.3", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">= 10" + } + }, + "node_modules/npm/node_modules/cmd-shim": { + "version": "6.0.3", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/color-convert": { + "version": "2.0.1", + "inBundle": true, + "license": "MIT", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/npm/node_modules/color-name": { + "version": "1.1.4", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/common-ancestor-path": { + "version": "1.0.1", + "inBundle": true, + "license": "ISC" + }, + "node_modules/npm/node_modules/cross-spawn": { + "version": "7.0.3", + "inBundle": true, + "license": "MIT", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/npm/node_modules/cross-spawn/node_modules/which": { + "version": "2.0.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/npm/node_modules/cssesc": { + "version": "3.0.0", + "inBundle": true, + "license": "MIT", + "bin": { + "cssesc": "bin/cssesc" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/npm/node_modules/debug": { + "version": "4.3.4", + "inBundle": true, + "license": "MIT", + "dependencies": { + "ms": "2.1.2" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/npm/node_modules/debug/node_modules/ms": { + "version": "2.1.2", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/diff": { + "version": "5.2.0", + "inBundle": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/npm/node_modules/eastasianwidth": { + "version": "0.2.0", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/emoji-regex": { + "version": "8.0.0", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/encoding": { + "version": "0.1.13", + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "iconv-lite": "^0.6.2" + } + }, + "node_modules/npm/node_modules/env-paths": { + "version": "2.2.1", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + }, + "node_modules/npm/node_modules/err-code": { + "version": "2.0.3", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/exponential-backoff": { + "version": "3.1.1", + "inBundle": true, + "license": "Apache-2.0" + }, + "node_modules/npm/node_modules/fastest-levenshtein": { + "version": "1.0.16", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">= 4.9.1" + } + }, + "node_modules/npm/node_modules/foreground-child": { + "version": "3.1.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "cross-spawn": "^7.0.0", + "signal-exit": "^4.0.1" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/npm/node_modules/fs-minipass": { + "version": "3.0.3", + "inBundle": true, + "license": "ISC", + "dependencies": { + "minipass": "^7.0.3" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/function-bind": { + "version": "1.1.2", + "inBundle": true, + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/npm/node_modules/glob": { + "version": "10.3.15", + "inBundle": true, + "license": "ISC", + "dependencies": { + "foreground-child": "^3.1.0", + "jackspeak": "^2.3.6", + "minimatch": "^9.0.1", + "minipass": "^7.0.4", + "path-scurry": "^1.11.0" + }, + "bin": { + "glob": "dist/esm/bin.mjs" + }, + "engines": { + "node": ">=16 || 14 >=14.18" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/npm/node_modules/graceful-fs": { + "version": "4.2.11", + "inBundle": true, + "license": "ISC" + }, + "node_modules/npm/node_modules/hasown": { + "version": "2.0.2", + "inBundle": true, + "license": "MIT", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/npm/node_modules/hosted-git-info": { + "version": "7.0.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "lru-cache": "^10.0.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/http-cache-semantics": { + "version": "4.1.1", + "inBundle": true, + "license": "BSD-2-Clause" + }, + "node_modules/npm/node_modules/http-proxy-agent": { + "version": "7.0.2", + "inBundle": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.0", + "debug": "^4.3.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/npm/node_modules/https-proxy-agent": { + "version": "7.0.4", + "inBundle": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.0.2", + "debug": "4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/npm/node_modules/iconv-lite": { + "version": "0.6.3", + "inBundle": true, + "license": "MIT", + "optional": true, + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/npm/node_modules/ignore-walk": { + "version": "6.0.5", + "inBundle": true, + "license": "ISC", + "dependencies": { + "minimatch": "^9.0.0" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/imurmurhash": { + "version": "0.1.4", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=0.8.19" + } + }, + "node_modules/npm/node_modules/indent-string": { + "version": "4.0.0", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/ini": { + "version": "4.1.2", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/init-package-json": { + "version": "6.0.3", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/package-json": "^5.0.0", + "npm-package-arg": "^11.0.0", + "promzard": "^1.0.0", + "read": "^3.0.1", + "semver": "^7.3.5", + "validate-npm-package-license": "^3.0.4", + "validate-npm-package-name": "^5.0.0" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/ip-address": { + "version": "9.0.5", + "inBundle": true, + "license": "MIT", + "dependencies": { + "jsbn": "1.1.0", + "sprintf-js": "^1.1.3" + }, + "engines": { + "node": ">= 12" + } + }, + "node_modules/npm/node_modules/ip-regex": { + "version": "5.0.0", + "inBundle": true, + "license": "MIT", + "engines": { + "node": "^12.20.0 || ^14.13.1 || >=16.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/npm/node_modules/is-cidr": { + "version": "5.0.5", + "inBundle": true, + "license": "BSD-2-Clause", + "dependencies": { + "cidr-regex": "^4.0.4" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/npm/node_modules/is-core-module": { + "version": "2.13.1", + "inBundle": true, + "license": "MIT", + "dependencies": { + "hasown": "^2.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/npm/node_modules/is-fullwidth-code-point": { + "version": "3.0.0", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/is-lambda": { + "version": "1.0.1", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/isexe": { + "version": "2.0.0", + "inBundle": true, + "license": "ISC" + }, + "node_modules/npm/node_modules/jackspeak": { + "version": "2.3.6", + "inBundle": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "@isaacs/cliui": "^8.0.2" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + }, + "optionalDependencies": { + "@pkgjs/parseargs": "^0.11.0" + } + }, + "node_modules/npm/node_modules/jsbn": { + "version": "1.1.0", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/json-parse-even-better-errors": { + "version": "3.0.2", + "inBundle": true, + "license": "MIT", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/json-stringify-nice": { + "version": "1.1.4", + "inBundle": true, + "license": "ISC", + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/npm/node_modules/jsonparse": { + "version": "1.3.1", + "engines": [ + "node >= 0.2.0" + ], + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/just-diff": { + "version": "6.0.2", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/just-diff-apply": { + "version": "5.5.0", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/libnpmaccess": { + "version": "8.0.6", + "inBundle": true, + "license": "ISC", + "dependencies": { + "npm-package-arg": "^11.0.2", + "npm-registry-fetch": "^17.0.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/libnpmdiff": { + "version": "6.1.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/arborist": "^7.5.2", + "@npmcli/installed-package-contents": "^2.1.0", + "binary-extensions": "^2.3.0", + "diff": "^5.1.0", + "minimatch": "^9.0.4", + "npm-package-arg": "^11.0.2", + "pacote": "^18.0.6", + "tar": "^6.2.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/libnpmexec": { + "version": "8.1.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/arborist": "^7.5.2", + "@npmcli/run-script": "^8.1.0", + "ci-info": "^4.0.0", + "npm-package-arg": "^11.0.2", + "pacote": "^18.0.6", + "proc-log": "^4.2.0", + "read": "^3.0.1", + "read-package-json-fast": "^3.0.2", + "semver": "^7.3.7", + "walk-up-path": "^3.0.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/libnpmfund": { + "version": "5.0.10", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/arborist": "^7.5.2" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/libnpmhook": { + "version": "10.0.5", + "inBundle": true, + "license": "ISC", + "dependencies": { + "aproba": "^2.0.0", + "npm-registry-fetch": "^17.0.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/libnpmorg": { + "version": "6.0.6", + "inBundle": true, + "license": "ISC", + "dependencies": { + "aproba": "^2.0.0", + "npm-registry-fetch": "^17.0.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/libnpmpack": { + "version": "7.0.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/arborist": "^7.5.2", + "@npmcli/run-script": "^8.1.0", + "npm-package-arg": "^11.0.2", + "pacote": "^18.0.6" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/libnpmpublish": { + "version": "9.0.8", + "inBundle": true, + "license": "ISC", + "dependencies": { + "ci-info": "^4.0.0", + "normalize-package-data": "^6.0.1", + "npm-package-arg": "^11.0.2", + "npm-registry-fetch": "^17.0.1", + "proc-log": "^4.2.0", + "semver": "^7.3.7", + "sigstore": "^2.2.0", + "ssri": "^10.0.6" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/libnpmsearch": { + "version": "7.0.5", + "inBundle": true, + "license": "ISC", + "dependencies": { + "npm-registry-fetch": "^17.0.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/libnpmteam": { + "version": "6.0.5", + "inBundle": true, + "license": "ISC", + "dependencies": { + "aproba": "^2.0.0", + "npm-registry-fetch": "^17.0.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/libnpmversion": { + "version": "6.0.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/git": "^5.0.7", + "@npmcli/run-script": "^8.1.0", + "json-parse-even-better-errors": "^3.0.2", + "proc-log": "^4.2.0", + "semver": "^7.3.7" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/lru-cache": { + "version": "10.2.2", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "14 || >=16.14" + } + }, + "node_modules/npm/node_modules/make-fetch-happen": { + "version": "13.0.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/agent": "^2.0.0", + "cacache": "^18.0.0", + "http-cache-semantics": "^4.1.1", + "is-lambda": "^1.0.1", + "minipass": "^7.0.2", + "minipass-fetch": "^3.0.0", + "minipass-flush": "^1.0.5", + "minipass-pipeline": "^1.2.4", + "negotiator": "^0.6.3", + "proc-log": "^4.2.0", + "promise-retry": "^2.0.1", + "ssri": "^10.0.0" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/minimatch": { + "version": "9.0.4", + "inBundle": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/npm/node_modules/minipass": { + "version": "7.1.1", + "inBundle": true, + "license": "ISC", + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, + "node_modules/npm/node_modules/minipass-collect": { + "version": "2.0.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "minipass": "^7.0.3" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, + "node_modules/npm/node_modules/minipass-fetch": { + "version": "3.0.5", + "inBundle": true, + "license": "MIT", + "dependencies": { + "minipass": "^7.0.3", + "minipass-sized": "^1.0.3", + "minizlib": "^2.1.2" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + }, + "optionalDependencies": { + "encoding": "^0.1.13" + } + }, + "node_modules/npm/node_modules/minipass-flush": { + "version": "1.0.5", + "inBundle": true, + "license": "ISC", + "dependencies": { + "minipass": "^3.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/npm/node_modules/minipass-flush/node_modules/minipass": { + "version": "3.3.6", + "inBundle": true, + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/minipass-json-stream": { + "version": "1.0.1", + "inBundle": true, + "license": "MIT", + "dependencies": { + "jsonparse": "^1.3.1", + "minipass": "^3.0.0" + } + }, + "node_modules/npm/node_modules/minipass-json-stream/node_modules/minipass": { + "version": "3.3.6", + "inBundle": true, + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/minipass-pipeline": { + "version": "1.2.4", + "inBundle": true, + "license": "ISC", + "dependencies": { + "minipass": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/minipass-pipeline/node_modules/minipass": { + "version": "3.3.6", + "inBundle": true, + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/minipass-sized": { + "version": "1.0.3", + "inBundle": true, + "license": "ISC", + "dependencies": { + "minipass": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/minipass-sized/node_modules/minipass": { + "version": "3.3.6", + "inBundle": true, + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/minizlib": { + "version": "2.1.2", + "inBundle": true, + "license": "MIT", + "dependencies": { + "minipass": "^3.0.0", + "yallist": "^4.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/npm/node_modules/minizlib/node_modules/minipass": { + "version": "3.3.6", + "inBundle": true, + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/mkdirp": { + "version": "1.0.4", + "inBundle": true, + "license": "MIT", + "bin": { + "mkdirp": "bin/cmd.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/npm/node_modules/ms": { + "version": "2.1.3", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/mute-stream": { + "version": "1.0.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/negotiator": { + "version": "0.6.3", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/npm/node_modules/node-gyp": { + "version": "10.1.0", + "inBundle": true, + "license": "MIT", + "dependencies": { + "env-paths": "^2.2.0", + "exponential-backoff": "^3.1.1", + "glob": "^10.3.10", + "graceful-fs": "^4.2.6", + "make-fetch-happen": "^13.0.0", + "nopt": "^7.0.0", + "proc-log": "^3.0.0", + "semver": "^7.3.5", + "tar": "^6.1.2", + "which": "^4.0.0" + }, + "bin": { + "node-gyp": "bin/node-gyp.js" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/node-gyp/node_modules/proc-log": { + "version": "3.0.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/nopt": { + "version": "7.2.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "abbrev": "^2.0.0" + }, + "bin": { + "nopt": "bin/nopt.js" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/normalize-package-data": { + "version": "6.0.1", + "inBundle": true, + "license": "BSD-2-Clause", + "dependencies": { + "hosted-git-info": "^7.0.0", + "is-core-module": "^2.8.1", + "semver": "^7.3.5", + "validate-npm-package-license": "^3.0.4" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/npm-audit-report": { + "version": "5.0.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/npm-bundled": { + "version": "3.0.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "npm-normalize-package-bin": "^3.0.0" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/npm-install-checks": { + "version": "6.3.0", + "inBundle": true, + "license": "BSD-2-Clause", + "dependencies": { + "semver": "^7.1.1" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/npm-normalize-package-bin": { + "version": "3.0.1", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/npm-package-arg": { + "version": "11.0.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "hosted-git-info": "^7.0.0", + "proc-log": "^4.0.0", + "semver": "^7.3.5", + "validate-npm-package-name": "^5.0.0" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/npm-packlist": { + "version": "8.0.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "ignore-walk": "^6.0.4" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/npm-pick-manifest": { + "version": "9.0.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "npm-install-checks": "^6.0.0", + "npm-normalize-package-bin": "^3.0.0", + "npm-package-arg": "^11.0.0", + "semver": "^7.3.5" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/npm-profile": { + "version": "10.0.0", + "inBundle": true, + "license": "ISC", + "dependencies": { + "npm-registry-fetch": "^17.0.1", + "proc-log": "^4.0.0" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/npm/node_modules/npm-registry-fetch": { + "version": "17.0.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/redact": "^2.0.0", + "make-fetch-happen": "^13.0.0", + "minipass": "^7.0.2", + "minipass-fetch": "^3.0.0", + "minipass-json-stream": "^1.0.1", + "minizlib": "^2.1.2", + "npm-package-arg": "^11.0.0", + "proc-log": "^4.0.0" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/npm-user-validate": { + "version": "2.0.1", + "inBundle": true, + "license": "BSD-2-Clause", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/p-map": { + "version": "4.0.0", + "inBundle": true, + "license": "MIT", + "dependencies": { + "aggregate-error": "^3.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/npm/node_modules/pacote": { + "version": "18.0.6", + "inBundle": true, + "license": "ISC", + "dependencies": { + "@npmcli/git": "^5.0.0", + "@npmcli/installed-package-contents": "^2.0.1", + "@npmcli/package-json": "^5.1.0", + "@npmcli/promise-spawn": "^7.0.0", + "@npmcli/run-script": "^8.0.0", + "cacache": "^18.0.0", + "fs-minipass": "^3.0.0", + "minipass": "^7.0.2", + "npm-package-arg": "^11.0.0", + "npm-packlist": "^8.0.0", + "npm-pick-manifest": "^9.0.0", + "npm-registry-fetch": "^17.0.0", + "proc-log": "^4.0.0", + "promise-retry": "^2.0.1", + "sigstore": "^2.2.0", + "ssri": "^10.0.0", + "tar": "^6.1.11" + }, + "bin": { + "pacote": "bin/index.js" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/parse-conflict-json": { + "version": "3.0.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "json-parse-even-better-errors": "^3.0.0", + "just-diff": "^6.0.0", + "just-diff-apply": "^5.2.0" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/path-key": { + "version": "3.1.1", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/path-scurry": { + "version": "1.11.1", + "inBundle": true, + "license": "BlueOak-1.0.0", + "dependencies": { + "lru-cache": "^10.2.0", + "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" + }, + "engines": { + "node": ">=16 || 14 >=14.18" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/npm/node_modules/postcss-selector-parser": { + "version": "6.0.16", + "inBundle": true, + "license": "MIT", + "dependencies": { + "cssesc": "^3.0.0", + "util-deprecate": "^1.0.2" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/npm/node_modules/proc-log": { + "version": "4.2.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/proggy": { + "version": "2.0.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/promise-all-reject-late": { + "version": "1.0.1", + "inBundle": true, + "license": "ISC", + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/npm/node_modules/promise-call-limit": { + "version": "3.0.1", + "inBundle": true, + "license": "ISC", + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/npm/node_modules/promise-inflight": { + "version": "1.0.1", + "inBundle": true, + "license": "ISC" + }, + "node_modules/npm/node_modules/promise-retry": { + "version": "2.0.1", + "inBundle": true, + "license": "MIT", + "dependencies": { + "err-code": "^2.0.2", + "retry": "^0.12.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/npm/node_modules/promzard": { + "version": "1.0.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "read": "^3.0.1" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/qrcode-terminal": { + "version": "0.12.0", + "inBundle": true, + "bin": { + "qrcode-terminal": "bin/qrcode-terminal.js" + } + }, + "node_modules/npm/node_modules/read": { + "version": "3.0.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "mute-stream": "^1.0.0" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/read-cmd-shim": { + "version": "4.0.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/read-package-json-fast": { + "version": "3.0.2", + "inBundle": true, + "license": "ISC", + "dependencies": { + "json-parse-even-better-errors": "^3.0.0", + "npm-normalize-package-bin": "^3.0.0" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/retry": { + "version": "0.12.0", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, + "node_modules/npm/node_modules/safer-buffer": { + "version": "2.1.2", + "inBundle": true, + "license": "MIT", + "optional": true + }, + "node_modules/npm/node_modules/semver": { + "version": "7.6.2", + "inBundle": true, + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/npm/node_modules/shebang-command": { + "version": "2.0.0", + "inBundle": true, + "license": "MIT", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/shebang-regex": { + "version": "3.0.0", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/signal-exit": { + "version": "4.1.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/npm/node_modules/sigstore": { + "version": "2.3.0", + "inBundle": true, + "license": "Apache-2.0", + "dependencies": { + "@sigstore/bundle": "^2.3.1", + "@sigstore/core": "^1.0.0", + "@sigstore/protobuf-specs": "^0.3.1", + "@sigstore/sign": "^2.3.0", + "@sigstore/tuf": "^2.3.1", + "@sigstore/verify": "^1.2.0" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/smart-buffer": { + "version": "4.2.0", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">= 6.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/npm/node_modules/socks": { + "version": "2.8.3", + "inBundle": true, + "license": "MIT", + "dependencies": { + "ip-address": "^9.0.5", + "smart-buffer": "^4.2.0" + }, + "engines": { + "node": ">= 10.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/npm/node_modules/socks-proxy-agent": { + "version": "8.0.3", + "inBundle": true, + "license": "MIT", + "dependencies": { + "agent-base": "^7.1.1", + "debug": "^4.3.4", + "socks": "^2.7.1" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/npm/node_modules/spdx-correct": { + "version": "3.2.0", + "inBundle": true, + "license": "Apache-2.0", + "dependencies": { + "spdx-expression-parse": "^3.0.0", + "spdx-license-ids": "^3.0.0" + } + }, + "node_modules/npm/node_modules/spdx-correct/node_modules/spdx-expression-parse": { + "version": "3.0.1", + "inBundle": true, + "license": "MIT", + "dependencies": { + "spdx-exceptions": "^2.1.0", + "spdx-license-ids": "^3.0.0" + } + }, + "node_modules/npm/node_modules/spdx-exceptions": { + "version": "2.5.0", + "inBundle": true, + "license": "CC-BY-3.0" + }, + "node_modules/npm/node_modules/spdx-expression-parse": { + "version": "4.0.0", + "inBundle": true, + "license": "MIT", + "dependencies": { + "spdx-exceptions": "^2.1.0", + "spdx-license-ids": "^3.0.0" + } + }, + "node_modules/npm/node_modules/spdx-license-ids": { + "version": "3.0.17", + "inBundle": true, + "license": "CC0-1.0" + }, + "node_modules/npm/node_modules/sprintf-js": { + "version": "1.1.3", + "inBundle": true, + "license": "BSD-3-Clause" + }, + "node_modules/npm/node_modules/ssri": { + "version": "10.0.6", + "inBundle": true, + "license": "ISC", + "dependencies": { + "minipass": "^7.0.3" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/string-width": { + "version": "4.2.3", + "inBundle": true, + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/string-width-cjs": { + "name": "string-width", + "version": "4.2.3", + "inBundle": true, + "license": "MIT", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/strip-ansi": { + "version": "6.0.1", + "inBundle": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/strip-ansi-cjs": { + "name": "strip-ansi", + "version": "6.0.1", + "inBundle": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/supports-color": { + "version": "9.4.0", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/supports-color?sponsor=1" + } + }, + "node_modules/npm/node_modules/tar": { + "version": "6.2.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "chownr": "^2.0.0", + "fs-minipass": "^2.0.0", + "minipass": "^5.0.0", + "minizlib": "^2.1.1", + "mkdirp": "^1.0.3", + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/npm/node_modules/tar/node_modules/fs-minipass": { + "version": "2.1.0", + "inBundle": true, + "license": "ISC", + "dependencies": { + "minipass": "^3.0.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/npm/node_modules/tar/node_modules/fs-minipass/node_modules/minipass": { + "version": "3.3.6", + "inBundle": true, + "license": "ISC", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/tar/node_modules/minipass": { + "version": "5.0.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": ">=8" + } + }, + "node_modules/npm/node_modules/text-table": { + "version": "0.2.0", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/tiny-relative-date": { + "version": "1.3.0", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/treeverse": { + "version": "3.0.0", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/tuf-js": { + "version": "2.2.1", + "inBundle": true, + "license": "MIT", + "dependencies": { + "@tufjs/models": "2.0.1", + "debug": "^4.3.4", + "make-fetch-happen": "^13.0.1" + }, + "engines": { + "node": "^16.14.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/unique-filename": { + "version": "3.0.0", + "inBundle": true, + "license": "ISC", + "dependencies": { + "unique-slug": "^4.0.0" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/unique-slug": { + "version": "4.0.0", + "inBundle": true, + "license": "ISC", + "dependencies": { + "imurmurhash": "^0.1.4" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/util-deprecate": { + "version": "1.0.2", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/validate-npm-package-license": { + "version": "3.0.4", + "inBundle": true, + "license": "Apache-2.0", + "dependencies": { + "spdx-correct": "^3.0.0", + "spdx-expression-parse": "^3.0.0" + } + }, + "node_modules/npm/node_modules/validate-npm-package-license/node_modules/spdx-expression-parse": { + "version": "3.0.1", + "inBundle": true, + "license": "MIT", + "dependencies": { + "spdx-exceptions": "^2.1.0", + "spdx-license-ids": "^3.0.0" + } + }, + "node_modules/npm/node_modules/validate-npm-package-name": { + "version": "5.0.1", + "inBundle": true, + "license": "ISC", + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/walk-up-path": { + "version": "3.0.1", + "inBundle": true, + "license": "ISC" + }, + "node_modules/npm/node_modules/which": { + "version": "4.0.0", + "inBundle": true, + "license": "ISC", + "dependencies": { + "isexe": "^3.1.1" + }, + "bin": { + "node-which": "bin/which.js" + }, + "engines": { + "node": "^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/which/node_modules/isexe": { + "version": "3.1.1", + "inBundle": true, + "license": "ISC", + "engines": { + "node": ">=16" + } + }, + "node_modules/npm/node_modules/wrap-ansi": { + "version": "8.1.0", + "inBundle": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^6.1.0", + "string-width": "^5.0.1", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/npm/node_modules/wrap-ansi-cjs": { + "name": "wrap-ansi", + "version": "7.0.0", + "inBundle": true, + "license": "MIT", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/npm/node_modules/wrap-ansi-cjs/node_modules/ansi-styles": { + "version": "4.3.0", + "inBundle": true, + "license": "MIT", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/npm/node_modules/wrap-ansi/node_modules/ansi-regex": { + "version": "6.0.1", + "inBundle": true, + "license": "MIT", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/npm/node_modules/wrap-ansi/node_modules/emoji-regex": { + "version": "9.2.2", + "inBundle": true, + "license": "MIT" + }, + "node_modules/npm/node_modules/wrap-ansi/node_modules/string-width": { + "version": "5.1.2", + "inBundle": true, + "license": "MIT", + "dependencies": { + "eastasianwidth": "^0.2.0", + "emoji-regex": "^9.2.2", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/npm/node_modules/wrap-ansi/node_modules/strip-ansi": { + "version": "7.1.0", + "inBundle": true, + "license": "MIT", + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/npm/node_modules/write-file-atomic": { + "version": "5.0.1", + "inBundle": true, + "license": "ISC", + "dependencies": { + "imurmurhash": "^0.1.4", + "signal-exit": "^4.0.1" + }, + "engines": { + "node": "^14.17.0 || ^16.13.0 || >=18.0.0" + } + }, + "node_modules/npm/node_modules/yallist": { + "version": "4.0.0", + "inBundle": true, + "license": "ISC" + }, "node_modules/object-assign": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", @@ -4927,28 +8780,29 @@ } }, "node_modules/object.entries": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/object.entries/-/object.entries-1.1.7.tgz", - "integrity": "sha512-jCBs/0plmPsOnrKAfFQXRG2NFjlhZgjjcBLSmTnEhU8U6vVTsVe8ANeQJCHTl3gSsI4J+0emOoCgoKlmQPMgmA==", + "version": "1.1.8", + "resolved": "https://registry.npmjs.org/object.entries/-/object.entries-1.1.8.tgz", + "integrity": "sha512-cmopxi8VwRIAw/fkijJohSfpef5PdN0pMQJN6VC/ZKvn0LIknWD8KtgY6KlQdEc4tIjcQ3HxSMmnvtzIscdaYQ==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.2.0", - "es-abstract": "^1.22.1" + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-object-atoms": "^1.0.0" }, "engines": { "node": ">= 0.4" } }, "node_modules/object.fromentries": { - "version": "2.0.7", - "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.7.tgz", - "integrity": "sha512-UPbPHML6sL8PI/mOqPwsH4G6iyXcCGzLin8KvEPenOZN5lpCNBZZQ+V62vdjB1mQHrmqGQt5/OJzemUA+KJmEA==", + "version": "2.0.8", + "resolved": "https://registry.npmjs.org/object.fromentries/-/object.fromentries-2.0.8.tgz", + "integrity": "sha512-k6E21FzySsSK5a21KRADBd/NGneRegFO5pLHfdQLpRDETUNJueLXs3WCzyQ3tFRDYgbq3KHGXfTbi2bs8WQ6rQ==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.2.0", - "es-abstract": "^1.22.1" + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.2", + "es-object-atoms": "^1.0.0" }, "engines": { "node": ">= 0.4" @@ -4958,39 +8812,45 @@ } }, "node_modules/object.groupby": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/object.groupby/-/object.groupby-1.0.1.tgz", - "integrity": "sha512-HqaQtqLnp/8Bn4GL16cj+CUYbnpe1bh0TtEaWvybszDG4tgxCJuRpV8VGuvNaI1fAnI4lUJzDG55MXcOH4JZcQ==", + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/object.groupby/-/object.groupby-1.0.3.tgz", + "integrity": "sha512-+Lhy3TQTuzXI5hevh8sBGqbmurHbbIjAi0Z4S63nthVLmLxfbj4T54a4CfZrXIrt9iP4mVAPYMo/v99taj3wjQ==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.2.0", - "es-abstract": "^1.22.1", - "get-intrinsic": "^1.2.1" + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.2" + }, + "engines": { + "node": ">= 0.4" } }, "node_modules/object.hasown": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/object.hasown/-/object.hasown-1.1.3.tgz", - "integrity": "sha512-fFI4VcYpRHvSLXxP7yiZOMAd331cPfd2p7PFDVbgUsYOfCT3tICVqXWngbjr4m49OvsBwUBQ6O2uQoJvy3RexA==", + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/object.hasown/-/object.hasown-1.1.4.tgz", + "integrity": "sha512-FZ9LZt9/RHzGySlBARE3VF+gE26TxR38SdmqOqliuTnl9wrKulaQs+4dee1V+Io8VfxqzAfHu6YuRgUy8OHoTg==", "dev": true, "dependencies": { - "define-properties": "^1.2.0", - "es-abstract": "^1.22.1" + "define-properties": "^1.2.1", + "es-abstract": "^1.23.2", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" } }, "node_modules/object.values": { - "version": "1.1.7", - "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.1.7.tgz", - "integrity": "sha512-aU6xnDFYT3x17e/f0IiiwlGPTy2jzMySGfUB4fq6z7CV8l85CWHDk5ErhyhpfDHhrOMwGFhSQkhMGHaIotA6Ng==", + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/object.values/-/object.values-1.2.0.tgz", + "integrity": "sha512-yBYjY9QX2hnRmZHAjG/f13MzmBzxzYgQhFrke06TTyKY5zSTEqkOeukBzIdVA3j3ulu8Qa3MbVFShV7T2RmGtQ==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.2.0", - "es-abstract": "^1.22.1" + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-object-atoms": "^1.0.0" }, "engines": { "node": ">= 0.4" @@ -5008,17 +8868,17 @@ } }, "node_modules/optionator": { - "version": "0.9.3", - "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.3.tgz", - "integrity": "sha512-JjCoypp+jKn1ttEFExxhetCKeJt9zhAgAve5FXHixTvFDW/5aEktX9bufBKLRRMdU7bNtpLfcGu94B3cdEJgjg==", + "version": "0.9.4", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", + "integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==", "dev": true, "dependencies": { - "@aashutoshrathi/word-wrap": "^1.2.3", "deep-is": "^0.1.3", "fast-levenshtein": "^2.0.6", "levn": "^0.4.1", "prelude-ls": "^1.2.1", - "type-check": "^0.4.0" + "type-check": "^0.4.0", + "word-wrap": "^1.2.5" }, "engines": { "node": ">= 0.8.0" @@ -5066,6 +8926,46 @@ "node": ">=6" } }, + "node_modules/parse-entities": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/parse-entities/-/parse-entities-4.0.1.tgz", + "integrity": "sha512-SWzvYcSJh4d/SGLIOQfZ/CoNv6BTlI6YEQ7Nj82oDVnRpwe/Z/F1EMx42x3JAOwGBlCjeCH0BRJQbQ/opHL17w==", + "dependencies": { + "@types/unist": "^2.0.0", + "character-entities": "^2.0.0", + "character-entities-legacy": "^3.0.0", + "character-reference-invalid": "^2.0.0", + "decode-named-character-reference": "^1.0.0", + "is-alphanumerical": "^2.0.0", + "is-decimal": "^2.0.0", + "is-hexadecimal": "^2.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/parse-entities/node_modules/@types/unist": { + "version": "2.0.10", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.10.tgz", + "integrity": "sha512-IfYcSBWE3hLpBg8+X2SEa8LVkJdJEkT2Ese2aaLs3ptGdVtABxndrMaxuFlQ1qdFf9Q5rDvDpxI3WwgvKFAsQA==" + }, + "node_modules/parse-numeric-range": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/parse-numeric-range/-/parse-numeric-range-1.3.0.tgz", + "integrity": "sha512-twN+njEipszzlMJd4ONUYgSfZPDxgHhT9Ahed5uTigpQn90FggW4SA/AIPq/6a149fTbE9qBEcSwE3FAEp6wQQ==" + }, + "node_modules/parse5": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz", + "integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==", + "dependencies": { + "entities": "^4.4.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -5079,6 +8979,7 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "dev": true, "engines": { "node": ">=0.10.0" } @@ -5087,7 +8988,6 @@ "version": "3.1.1", "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "dev": true, "engines": { "node": ">=8" } @@ -5097,6 +8997,21 @@ "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz", "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==" }, + "node_modules/path-scurry": { + "version": "1.11.1", + "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", + "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", + "dependencies": { + "lru-cache": "^10.2.0", + "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" + }, + "engines": { + "node": ">=16 || 14 >=14.18" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/path-type": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", @@ -5107,9 +9022,9 @@ } }, "node_modules/picocolors": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz", - "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==" + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.1.tgz", + "integrity": "sha512-anP1Z8qwhkbmu7MFP5iTt+wQKXgwzf7zTyGlcdzabySa9vd0Xt392U0rVmz9poOaBj0uHJKyyo9/upk0HrEQew==" }, "node_modules/picomatch": { "version": "2.3.1", @@ -5138,10 +9053,19 @@ "node": ">= 6" } }, + "node_modules/possible-typed-array-names": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/possible-typed-array-names/-/possible-typed-array-names-1.0.0.tgz", + "integrity": "sha512-d7Uw+eZoloe0EHDIYoe+bQ5WXnGMOpmiZFTuMWCwpjzzkL2nTjcKiAk4hh8TjnGye2TwWOk3UXucZ+3rbmBa8Q==", + "dev": true, + "engines": { + "node": ">= 0.4" + } + }, "node_modules/postcss": { - "version": "8.4.32", - "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.32.tgz", - "integrity": "sha512-D/kj5JNu6oo2EIy+XL/26JEDTlIbB8hw85G8StOE6L74RQAVVP5rej6wxCNqyMbR4RkPfqvezVbPw81Ngd6Kcw==", + "version": "8.4.38", + "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.38.tgz", + "integrity": "sha512-Wglpdk03BSfXkHoQa3b/oulrotAkwrlLDRSOb9D0bN86FdRyE9lppSp33aHNPgBa0JKCoB+drFLZkQoRRYae5A==", "funding": [ { "type": "opencollective", @@ -5159,7 +9083,7 @@ "dependencies": { "nanoid": "^3.3.7", "picocolors": "^1.0.0", - "source-map-js": "^1.0.2" + "source-map-js": "^1.2.0" }, "engines": { "node": "^10 || ^12 || >=14" @@ -5234,11 +9158,14 @@ } }, "node_modules/postcss-load-config/node_modules/lilconfig": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.0.0.tgz", - "integrity": "sha512-K2U4W2Ff5ibV7j7ydLr+zLAkIg5JJ4lPn1Ltsdt+Tz/IjQ8buJ55pZAxoP34lqIiwtF9iAvtLv3JGv7CAyAg+g==", + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/lilconfig/-/lilconfig-3.1.1.tgz", + "integrity": "sha512-O18pf7nyvHTckunPWCV1XUNXU1piu01y2b7ATJ0ppkUkk8ocqVWBrYjJBCwHDjD/ZWcfyrA0P4gKhzWGi5EINQ==", "engines": { "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/antonk52" } }, "node_modules/postcss-nested": { @@ -5260,9 +9187,9 @@ } }, "node_modules/postcss-nested/node_modules/postcss-selector-parser": { - "version": "6.0.13", - "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.0.13.tgz", - "integrity": "sha512-EaV1Gl4mUEV4ddhDnv/xtj7sxwrwxdetHdWUGnT4VJQf+4d05v6lHYZr8N573k5Z0BViss7BDhfWtKS3+sfAqQ==", + "version": "6.0.16", + "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.0.16.tgz", + "integrity": "sha512-A0RVJrX+IUkVZbW3ClroRWurercFhieevHB38sr2+l9eUClMqome3LmEmnhlNy+5Mr2EYN6B2Kaw9wYdd+VHiw==", "dependencies": { "cssesc": "^3.0.0", "util-deprecate": "^1.0.2" @@ -5290,9 +9217,9 @@ "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==" }, "node_modules/prebuild-install": { - "version": "7.1.1", - "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.1.tgz", - "integrity": "sha512-jAXscXWMcCK8GgCoHOfIr0ODh5ai8mj63L2nWrjuAgXE6tDyYGnx4/8o/rCgU+B4JSyZBKbeZqzhtwtC3ovxjw==", + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.2.tgz", + "integrity": "sha512-UnNke3IQb6sgarcZIDU3gbMeTp/9SSU1DAIkil7PrqG1vZlBtY5msYccSKSHDqa3hNg436IXK+SNImReuA1wEQ==", "dependencies": { "detect-libc": "^2.0.0", "expand-template": "^2.0.3", @@ -5364,6 +9291,14 @@ "url": "https://github.com/prettier/prettier?sponsor=1" } }, + "node_modules/prismjs": { + "version": "1.29.0", + "resolved": "https://registry.npmjs.org/prismjs/-/prismjs-1.29.0.tgz", + "integrity": "sha512-Kx/1w86q/epKcmte75LNrEoT+lX8pBpavuAbvJWRXar7Hz8jrtF+e3vY751p0R8H9HdArwaCTNDDzHg/ScJK1Q==", + "engines": { + "node": ">=6" + } + }, "node_modules/prop-types": { "version": "15.8.1", "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz", @@ -5380,9 +9315,9 @@ "integrity": "sha512-SVtmxhRE/CGkn3eZY1T6pC8Nln6Fr/lu1mKSgRud0eC73whjGfoAogbn78LkD8aFL0zz3bAFerKSnOl7NlErBA==" }, "node_modules/property-information": { - "version": "6.4.0", - "resolved": "https://registry.npmjs.org/property-information/-/property-information-6.4.0.tgz", - "integrity": "sha512-9t5qARVofg2xQqKtytzt+lZ4d1Qvj8t5B8fEwXK6qOfgRLgH/b13QlgEyDh033NOS31nXeFbYv7CLUDG1CeifQ==", + "version": "6.5.0", + "resolved": "https://registry.npmjs.org/property-information/-/property-information-6.5.0.tgz", + "integrity": "sha512-PgTgs/BlvHxOu8QuEN7wi5A0OmXaBcHpmCSTehcs6Uuu9IkDIEo13Hy7n898RHfrQ49vKCoGeWZSaAK01nwVig==", "funding": { "type": "github", "url": "https://github.com/sponsors/wooorm" @@ -5453,9 +9388,9 @@ } }, "node_modules/react": { - "version": "18.2.0", - "resolved": "https://registry.npmjs.org/react/-/react-18.2.0.tgz", - "integrity": "sha512-/3IjMdb2L9QbBdWiW5e3P2/npwMBaU9mHCSCUzNln0ZCYbcfTsGbTJrU/kGemdH2IWmB2ioZ+zkxtmq6g09fGQ==", + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", + "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", "dependencies": { "loose-envify": "^1.1.0" }, @@ -5464,28 +9399,28 @@ } }, "node_modules/react-day-picker": { - "version": "8.9.1", - "resolved": "https://registry.npmjs.org/react-day-picker/-/react-day-picker-8.9.1.tgz", - "integrity": "sha512-W0SPApKIsYq+XCtfGeMYDoU0KbsG3wfkYtlw8l+vZp6KoBXGOlhzBUp4tNx1XiwiOZwhfdGOlj7NGSCKGSlg5Q==", + "version": "8.10.1", + "resolved": "https://registry.npmjs.org/react-day-picker/-/react-day-picker-8.10.1.tgz", + "integrity": "sha512-TMx7fNbhLk15eqcMt+7Z7S2KF7mfTId/XJDjKE8f+IUcFn0l08/kI4FiYTL/0yuOLmEcbR4Fwe3GJf/NiiMnPA==", "funding": { "type": "individual", "url": "https://github.com/sponsors/gpbl" }, "peerDependencies": { - "date-fns": "^2.28.0", + "date-fns": "^2.28.0 || ^3.0.0", "react": "^16.8.0 || ^17.0.0 || ^18.0.0" } }, "node_modules/react-dom": { - "version": "18.2.0", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.2.0.tgz", - "integrity": "sha512-6IMTriUmvsjHUjNtEDudZfuDQUoWXVxKHhlEGSk81n4YFS+r/Kl99wXiwlVXtPBtJenozv2P+hxDsw9eA7Xo6g==", + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", + "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", "dependencies": { "loose-envify": "^1.1.0", - "scheduler": "^0.23.0" + "scheduler": "^0.23.2" }, "peerDependencies": { - "react": "^18.2.0" + "react": "^18.3.1" } }, "node_modules/react-dropzone": { @@ -5522,11 +9457,6 @@ "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==" }, - "node_modules/react-lifecycles-compat": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/react-lifecycles-compat/-/react-lifecycles-compat-3.0.4.tgz", - "integrity": "sha512-fBASbA6LnOU9dOU2eW7aQ8xmYBSXUIWr+UmF9b1efZBazGNO+rcXT/icdKnYm2pTwcRylVUYwW7H1PHfLekVzA==" - }, "node_modules/react-loader-spinner": { "version": "5.4.5", "resolved": "https://registry.npmjs.org/react-loader-spinner/-/react-loader-spinner-5.4.5.tgz", @@ -5542,80 +9472,114 @@ } }, "node_modules/react-loader-spinner/node_modules/react-is": { - "version": "18.2.0", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.2.0.tgz", - "integrity": "sha512-xWGDIW6x921xtzPkhiULtthJHoJvBbF3q26fzloPCK0hsvxtPVelvftw3zjbHWSkR2km9Z+4uxbDDK/6Zw9B8w==" + "version": "18.3.1", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", + "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==" }, "node_modules/react-markdown": { - "version": "8.0.7", - "resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-8.0.7.tgz", - "integrity": "sha512-bvWbzG4MtOU62XqBx3Xx+zB2raaFFsq4mYiAzfjXJMEz2sixgeAfraA3tvzULF02ZdOMUOKTBFFaZJDDrq+BJQ==", + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-9.0.1.tgz", + "integrity": "sha512-186Gw/vF1uRkydbsOIkcGXw7aHq0sZOCRFFjGrr7b9+nVZg4UfA4enXCaxm4fUzecU38sWfrNDitGhshuU7rdg==", "dependencies": { - "@types/hast": "^2.0.0", - "@types/prop-types": "^15.0.0", - "@types/unist": "^2.0.0", - "comma-separated-tokens": "^2.0.0", - "hast-util-whitespace": "^2.0.0", - "prop-types": "^15.0.0", - "property-information": "^6.0.0", - "react-is": "^18.0.0", - "remark-parse": "^10.0.0", - "remark-rehype": "^10.0.0", - "space-separated-tokens": "^2.0.0", - "style-to-object": "^0.4.0", - "unified": "^10.0.0", - "unist-util-visit": "^4.0.0", - "vfile": "^5.0.0" + "@types/hast": "^3.0.0", + "devlop": "^1.0.0", + "hast-util-to-jsx-runtime": "^2.0.0", + "html-url-attributes": "^3.0.0", + "mdast-util-to-hast": "^13.0.0", + "remark-parse": "^11.0.0", + "remark-rehype": "^11.0.0", + "unified": "^11.0.0", + "unist-util-visit": "^5.0.0", + "vfile": "^6.0.0" }, "funding": { "type": "opencollective", "url": "https://opencollective.com/unified" }, "peerDependencies": { - "@types/react": ">=16", - "react": ">=16" + "@types/react": ">=18", + "react": ">=18" } }, - "node_modules/react-markdown/node_modules/react-is": { - "version": "18.2.0", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.2.0.tgz", - "integrity": "sha512-xWGDIW6x921xtzPkhiULtthJHoJvBbF3q26fzloPCK0hsvxtPVelvftw3zjbHWSkR2km9Z+4uxbDDK/6Zw9B8w==" + "node_modules/react-remove-scroll": { + "version": "2.5.5", + "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.5.5.tgz", + "integrity": "sha512-ImKhrzJJsyXJfBZ4bzu8Bwpka14c/fQt0k+cyFp/PBhTfyDnU5hjOtM4AG/0AMyy8oKzOTR0lDgJIM7pYXI0kw==", + "dependencies": { + "react-remove-scroll-bar": "^2.3.3", + "react-style-singleton": "^2.2.1", + "tslib": "^2.1.0", + "use-callback-ref": "^1.3.0", + "use-sidecar": "^1.1.2" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/react-remove-scroll-bar": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.6.tgz", + "integrity": "sha512-DtSYaao4mBmX+HDo5YWYdBWQwYIQQshUV/dVxFxK+KM26Wjwp1gZ6rv6OC3oujI6Bfu6Xyg3TwK533AQutsn/g==", + "dependencies": { + "react-style-singleton": "^2.2.1", + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } }, "node_modules/react-smooth": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/react-smooth/-/react-smooth-2.0.5.tgz", - "integrity": "sha512-BMP2Ad42tD60h0JW6BFaib+RJuV5dsXJK9Baxiv/HlNFjvRLqA9xrNKxVWnUIZPQfzUwGXIlU/dSYLU+54YGQA==", + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/react-smooth/-/react-smooth-4.0.1.tgz", + "integrity": "sha512-OE4hm7XqR0jNOq3Qmk9mFLyd6p2+j6bvbPJ7qlB7+oo0eNcL2l7WQzG6MBnT3EXY6xzkLMUBec3AfewJdA0J8w==", "dependencies": { - "fast-equals": "^5.0.0", - "react-transition-group": "2.9.0" + "fast-equals": "^5.0.1", + "prop-types": "^15.8.1", + "react-transition-group": "^4.4.5" }, "peerDependencies": { - "prop-types": "^15.6.0", - "react": "^15.0.0 || ^16.0.0 || ^17.0.0 || ^18.0.0", - "react-dom": "^15.0.0 || ^16.0.0 || ^17.0.0 || ^18.0.0" + "react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0" } }, - "node_modules/react-smooth/node_modules/dom-helpers": { - "version": "3.4.0", - "resolved": "https://registry.npmjs.org/dom-helpers/-/dom-helpers-3.4.0.tgz", - "integrity": "sha512-LnuPJ+dwqKDIyotW1VzmOZ5TONUN7CwkCR5hrgawTUbkBGYdeoNLZo6nNfGkCrjtE1nXXaj7iMMpDa8/d9WoIA==", + "node_modules/react-style-singleton": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.1.tgz", + "integrity": "sha512-ZWj0fHEMyWkHzKYUr2Bs/4zU6XLmq9HsgBURm7g5pAVfyn49DgUiNgY2d4lXRlYSiCif9YBGpQleewkcqddc7g==", "dependencies": { - "@babel/runtime": "^7.1.2" - } - }, - "node_modules/react-smooth/node_modules/react-transition-group": { - "version": "2.9.0", - "resolved": "https://registry.npmjs.org/react-transition-group/-/react-transition-group-2.9.0.tgz", - "integrity": "sha512-+HzNTCHpeQyl4MJ/bdE0u6XRMe9+XG/+aL4mCxVN4DnPBQ0/5bfHWPDuOZUzYdMj94daZaZdCCc1Dzt9R/xSSg==", - "dependencies": { - "dom-helpers": "^3.4.0", - "loose-envify": "^1.4.0", - "prop-types": "^15.6.2", - "react-lifecycles-compat": "^3.0.4" + "get-nonce": "^1.0.0", + "invariant": "^2.2.4", + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" }, "peerDependencies": { - "react": ">=15.0.0", - "react-dom": ">=15.0.0" + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } } }, "node_modules/react-transition-group": { @@ -5633,6 +9597,15 @@ "react-dom": ">=16.6.0" } }, + "node_modules/react-transition-state": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/react-transition-state/-/react-transition-state-2.1.1.tgz", + "integrity": "sha512-kQx5g1FVu9knoz1T1WkapjUgFz08qQ/g1OmuWGi3/AoEFfS0kStxrPlZx81urjCXdz2d+1DqLpU6TyLW/Ro04Q==", + "peerDependencies": { + "react": ">=16.8.0", + "react-dom": ">=16.8.0" + } + }, "node_modules/read-cache": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz", @@ -5666,15 +9639,15 @@ } }, "node_modules/recharts": { - "version": "2.10.3", - "resolved": "https://registry.npmjs.org/recharts/-/recharts-2.10.3.tgz", - "integrity": "sha512-G4J96fKTZdfFQd6aQnZjo2nVNdXhp+uuLb00+cBTGLo85pChvm1+E67K3wBOHDE/77spcYb2Cy9gYWVqiZvQCg==", + "version": "2.12.7", + "resolved": "https://registry.npmjs.org/recharts/-/recharts-2.12.7.tgz", + "integrity": "sha512-hlLJMhPQfv4/3NBSAyq3gzGg4h2v69RJh6KU7b3pXYNNAELs9kEoXOjbkxdXpALqKBoVmVptGfLpxdaVYqjmXQ==", "dependencies": { "clsx": "^2.0.0", "eventemitter3": "^4.0.1", - "lodash": "^4.17.19", + "lodash": "^4.17.21", "react-is": "^16.10.2", - "react-smooth": "^2.0.5", + "react-smooth": "^4.0.0", "recharts-scale": "^0.4.4", "tiny-invariant": "^1.3.1", "victory-vendor": "^36.6.8" @@ -5683,7 +9656,6 @@ "node": ">=14" }, "peerDependencies": { - "prop-types": "^15.6.0", "react": "^16.0.0 || ^17.0.0 || ^18.0.0", "react-dom": "^16.0.0 || ^17.0.0 || ^18.0.0" } @@ -5697,15 +9669,16 @@ } }, "node_modules/reflect.getprototypeof": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/reflect.getprototypeof/-/reflect.getprototypeof-1.0.4.tgz", - "integrity": "sha512-ECkTw8TmJwW60lOTR+ZkODISW6RQ8+2CL3COqtiJKLd6MmB45hN51HprHFziKLGkAuTGQhBb91V8cy+KHlaCjw==", + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/reflect.getprototypeof/-/reflect.getprototypeof-1.0.6.tgz", + "integrity": "sha512-fmfw4XgoDke3kdI6h4xcUz1dG8uaiv5q9gcEwLS4Pnth2kxT+GZ7YehS1JTMGBQmtV7Y4GFGbs2re2NqhdozUg==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.2.0", - "es-abstract": "^1.22.1", - "get-intrinsic": "^1.2.1", + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.1", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.4", "globalthis": "^1.0.3", "which-builtin-type": "^1.1.3" }, @@ -5716,20 +9689,49 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/refractor": { + "version": "4.8.1", + "resolved": "https://registry.npmjs.org/refractor/-/refractor-4.8.1.tgz", + "integrity": "sha512-/fk5sI0iTgFYlmVGYVew90AoYnNMP6pooClx/XKqyeeCQXrL0Kvgn8V0VEht5ccdljbzzF1i3Q213gcntkRExg==", + "dependencies": { + "@types/hast": "^2.0.0", + "@types/prismjs": "^1.0.0", + "hastscript": "^7.0.0", + "parse-entities": "^4.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, + "node_modules/refractor/node_modules/@types/hast": { + "version": "2.3.10", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-2.3.10.tgz", + "integrity": "sha512-McWspRw8xx8J9HurkVBfYj0xKoE25tOFlHGdx4MJ5xORQrMGZNqJhVQWaIbm6Oyla5kYOXtDiopzKRJzEOkwJw==", + "dependencies": { + "@types/unist": "^2" + } + }, + "node_modules/refractor/node_modules/@types/unist": { + "version": "2.0.10", + "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.10.tgz", + "integrity": "sha512-IfYcSBWE3hLpBg8+X2SEa8LVkJdJEkT2Ese2aaLs3ptGdVtABxndrMaxuFlQ1qdFf9Q5rDvDpxI3WwgvKFAsQA==" + }, "node_modules/regenerator-runtime": { - "version": "0.14.0", - "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.14.0.tgz", - "integrity": "sha512-srw17NI0TUWHuGa5CFGGmhfNIeja30WMBfbslPNhf6JrqQlLN5gcrvig1oqPxiVaXb0oW0XRKtH6Nngs5lKCIA==" + "version": "0.14.1", + "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz", + "integrity": "sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==" }, "node_modules/regexp.prototype.flags": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.1.tgz", - "integrity": "sha512-sy6TXMN+hnP/wMy+ISxg3krXx7BAtWVO4UouuCN/ziM9UEne0euamVNafDfvC83bRNr95y0V5iijeDQFUNpvrg==", + "version": "1.5.2", + "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.2.tgz", + "integrity": "sha512-NcDiDkTLuPR+++OCKB0nWafEmhg/Da8aUPLPMQbK+bxKKCm1/S5he+AqYa4PlMCVBalb4/yxIRub6qkEx5yJbw==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.2.0", - "set-function-name": "^2.0.0" + "call-bind": "^1.0.6", + "define-properties": "^1.2.1", + "es-errors": "^1.3.0", + "set-function-name": "^2.0.1" }, "engines": { "node": ">= 0.4" @@ -5738,14 +9740,59 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/remark-parse": { - "version": "10.0.2", - "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-10.0.2.tgz", - "integrity": "sha512-3ydxgHa/ZQzG8LvC7jTXccARYDcRld3VfcgIIFs7bI6vbRSxJJmzgLEIIoYKyrfhaY+ujuWaf/PJiMZXoiCXgw==", + "node_modules/rehype-parse": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/rehype-parse/-/rehype-parse-9.0.0.tgz", + "integrity": "sha512-WG7nfvmWWkCR++KEkZevZb/uw41E8TsH4DsY9UxsTbIXCVGbAs4S+r8FrQ+OtH5EEQAs+5UxKC42VinkmpA1Yw==", "dependencies": { - "@types/mdast": "^3.0.0", - "mdast-util-from-markdown": "^1.0.0", - "unified": "^10.0.0" + "@types/hast": "^3.0.0", + "hast-util-from-html": "^2.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/rehype-prism-plus": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/rehype-prism-plus/-/rehype-prism-plus-2.0.0.tgz", + "integrity": "sha512-FeM/9V2N7EvDZVdR2dqhAzlw5YI49m9Tgn7ZrYJeYHIahM6gcXpH0K1y2gNnKanZCydOMluJvX2cB9z3lhY8XQ==", + "dependencies": { + "hast-util-to-string": "^3.0.0", + "parse-numeric-range": "^1.3.0", + "refractor": "^4.8.0", + "rehype-parse": "^9.0.0", + "unist-util-filter": "^5.0.0", + "unist-util-visit": "^5.0.0" + } + }, + "node_modules/remark-gfm": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/remark-gfm/-/remark-gfm-4.0.0.tgz", + "integrity": "sha512-U92vJgBPkbw4Zfu/IiW2oTZLSL3Zpv+uI7My2eq8JxKgqraFdU8YUGicEJCEgSbeaG+QDFqIcwwfMTOEelPxuA==", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-gfm": "^3.0.0", + "micromark-extension-gfm": "^3.0.0", + "remark-parse": "^11.0.0", + "remark-stringify": "^11.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-parse": { + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz", + "integrity": "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA==", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-from-markdown": "^2.0.0", + "micromark-util-types": "^2.0.0", + "unified": "^11.0.0" }, "funding": { "type": "opencollective", @@ -5753,14 +9800,29 @@ } }, "node_modules/remark-rehype": { - "version": "10.1.0", - "resolved": "https://registry.npmjs.org/remark-rehype/-/remark-rehype-10.1.0.tgz", - "integrity": "sha512-EFmR5zppdBp0WQeDVZ/b66CWJipB2q2VLNFMabzDSGR66Z2fQii83G5gTBbgGEnEEA0QRussvrFHxk1HWGJskw==", + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/remark-rehype/-/remark-rehype-11.1.0.tgz", + "integrity": "sha512-z3tJrAs2kIs1AqIIy6pzHmAHlF1hWQ+OdY4/hv+Wxe35EhyLKcajL33iUEn3ScxtFox9nUvRufR/Zre8Q08H/g==", "dependencies": { - "@types/hast": "^2.0.0", - "@types/mdast": "^3.0.0", - "mdast-util-to-hast": "^12.1.0", - "unified": "^10.0.0" + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "mdast-util-to-hast": "^13.0.0", + "unified": "^11.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/remark-stringify": { + "version": "11.0.0", + "resolved": "https://registry.npmjs.org/remark-stringify/-/remark-stringify-11.0.0.tgz", + "integrity": "sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw==", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-to-markdown": "^2.0.0", + "unified": "^11.0.0" }, "funding": { "type": "opencollective", @@ -5825,6 +9887,26 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/rimraf/node_modules/glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "dev": true, + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/run-parallel": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", @@ -5847,25 +9929,14 @@ "queue-microtask": "^1.2.2" } }, - "node_modules/sade": { - "version": "1.8.1", - "resolved": "https://registry.npmjs.org/sade/-/sade-1.8.1.tgz", - "integrity": "sha512-xal3CZX1Xlo/k4ApwCFrHVACi9fBqJ7V+mwhBsuf/1IOKbBy098Fex+Wa/5QMubw09pSZ/u8EY8PWgevJsXp1A==", - "dependencies": { - "mri": "^1.1.0" - }, - "engines": { - "node": ">=6" - } - }, "node_modules/safe-array-concat": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/safe-array-concat/-/safe-array-concat-1.0.1.tgz", - "integrity": "sha512-6XbUAseYE2KtOuGueyeobCySj9L4+66Tn6KQMOPQJrAJEowYKW/YR/MGJZl7FdydUdaFu4LYyDZjxf4/Nmo23Q==", + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/safe-array-concat/-/safe-array-concat-1.1.2.tgz", + "integrity": "sha512-vj6RsCsWBCf19jIeHEfkRMw8DPiBb+DMXklQ/1SGDHOMlHdPUkZXFQ2YdplS23zESTijAcurb1aSgJA3AgMu1Q==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "get-intrinsic": "^1.2.1", + "call-bind": "^1.0.7", + "get-intrinsic": "^1.2.4", "has-symbols": "^1.0.3", "isarray": "^2.0.5" }, @@ -5896,34 +9967,34 @@ ] }, "node_modules/safe-regex-test": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/safe-regex-test/-/safe-regex-test-1.0.0.tgz", - "integrity": "sha512-JBUUzyOgEwXQY1NuPtvcj/qcBDbDmEvWufhlnXZIm75DEHp+afM1r1ujJpJsV/gSM4t59tpDyPi1sd6ZaPFfsA==", + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/safe-regex-test/-/safe-regex-test-1.0.3.tgz", + "integrity": "sha512-CdASjNJPvRa7roO6Ra/gLYBTzYzzPyyBXxIMdGW3USQLyjWEls2RgW5UBTXaQVp+OrpeCK3bLem8smtmheoRuw==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "get-intrinsic": "^1.1.3", + "call-bind": "^1.0.6", + "es-errors": "^1.3.0", "is-regex": "^1.1.4" }, + "engines": { + "node": ">= 0.4" + }, "funding": { "url": "https://github.com/sponsors/ljharb" } }, "node_modules/scheduler": { - "version": "0.23.0", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.0.tgz", - "integrity": "sha512-CtuThmgHNg7zIZWAXi3AsyIzA3n4xx7aNyjwC2VJldO2LMVDhFK+63xGqq6CsJH4rTAt6/M+N4GhZiDYPx9eUw==", + "version": "0.23.2", + "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.23.2.tgz", + "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==", "dependencies": { "loose-envify": "^1.1.0" } }, "node_modules/semver": { - "version": "7.5.4", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz", - "integrity": "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==", - "dependencies": { - "lru-cache": "^6.0.0" - }, + "version": "7.6.2", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.2.tgz", + "integrity": "sha512-FNAIBWCx9qcRhoHcgcJ0gvU7SN1lYU2ZXuSfl04bSC5OpvDHFyJCjdNHomPXxjQlCBU67YW64PzY7/VIEH7F2w==", "bin": { "semver": "bin/semver.js" }, @@ -5932,29 +10003,32 @@ } }, "node_modules/set-function-length": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.1.1.tgz", - "integrity": "sha512-VoaqjbBJKiWtg4yRcKBQ7g7wnGnLV3M8oLvVWwOk2PdYY6PEFegR1vezXR0tw6fZGF9csVakIRjrJiy2veSBFQ==", + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", + "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==", "dev": true, "dependencies": { - "define-data-property": "^1.1.1", - "get-intrinsic": "^1.2.1", + "define-data-property": "^1.1.4", + "es-errors": "^1.3.0", + "function-bind": "^1.1.2", + "get-intrinsic": "^1.2.4", "gopd": "^1.0.1", - "has-property-descriptors": "^1.0.0" + "has-property-descriptors": "^1.0.2" }, "engines": { "node": ">= 0.4" } }, "node_modules/set-function-name": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/set-function-name/-/set-function-name-2.0.1.tgz", - "integrity": "sha512-tMNCiqYVkXIZgc2Hnoy2IvC/f8ezc5koaRFkCjrpWzGpCd3qbZXPzVy9MAZzK1ch/X0jvSkojys3oqJN0qCmdA==", + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/set-function-name/-/set-function-name-2.0.2.tgz", + "integrity": "sha512-7PGFlmtwsEADb0WYyvCMa1t+yke6daIG4Wirafur5kcf+MhUnPms1UeR0CKQdTZD81yESwMHbtn+TR+dMviakQ==", "dev": true, "dependencies": { - "define-data-property": "^1.0.1", + "define-data-property": "^1.1.4", + "es-errors": "^1.3.0", "functions-have-names": "^1.2.3", - "has-property-descriptors": "^1.0.0" + "has-property-descriptors": "^1.0.2" }, "engines": { "node": ">= 0.4" @@ -5991,7 +10065,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dev": true, "dependencies": { "shebang-regex": "^3.0.0" }, @@ -6003,25 +10076,39 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "dev": true, "engines": { "node": ">=8" } }, "node_modules/side-channel": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.4.tgz", - "integrity": "sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw==", + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz", + "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==", "dev": true, "dependencies": { - "call-bind": "^1.0.0", - "get-intrinsic": "^1.0.2", - "object-inspect": "^1.9.0" + "call-bind": "^1.0.7", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.4", + "object-inspect": "^1.13.1" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/signal-exit": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", + "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/simple-concat": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", @@ -6083,9 +10170,9 @@ } }, "node_modules/source-map-js": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.0.2.tgz", - "integrity": "sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw==", + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.0.tgz", + "integrity": "sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg==", "engines": { "node": ">=0.10.0" } @@ -6108,12 +10195,15 @@ } }, "node_modules/streamx": { - "version": "2.15.5", - "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.15.5.tgz", - "integrity": "sha512-9thPGMkKC2GctCzyCUjME3yR03x2xNo0GPKGkRw2UMYN+gqWa9uqpyNWhmsNCutU5zHmkUum0LsCRQTXUgUCAg==", + "version": "2.16.1", + "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.16.1.tgz", + "integrity": "sha512-m9QYj6WygWyWa3H1YY69amr4nVgy61xfjys7xO7kviL5rfIEc2naf+ewFiOA+aEJD7y0JO3h2GoiUv4TDwEGzQ==", "dependencies": { "fast-fifo": "^1.1.0", "queue-tick": "^1.0.1" + }, + "optionalDependencies": { + "bare-events": "^2.2.0" } }, "node_modules/string_decoder": { @@ -6124,35 +10214,102 @@ "safe-buffer": "~5.2.0" } }, + "node_modules/string-width": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", + "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", + "dependencies": { + "eastasianwidth": "^0.2.0", + "emoji-regex": "^9.2.2", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/string-width-cjs": { + "name": "string-width", + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/string-width-cjs/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" + }, + "node_modules/string-width/node_modules/ansi-regex": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", + "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/string-width/node_modules/strip-ansi": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, "node_modules/string.prototype.matchall": { - "version": "4.0.10", - "resolved": "https://registry.npmjs.org/string.prototype.matchall/-/string.prototype.matchall-4.0.10.tgz", - "integrity": "sha512-rGXbGmOEosIQi6Qva94HUjgPs9vKW+dkG7Y8Q5O2OYkWL6wFaTRZO8zM4mhP94uX55wgyrXzfS2aGtGzUL7EJQ==", + "version": "4.0.11", + "resolved": "https://registry.npmjs.org/string.prototype.matchall/-/string.prototype.matchall-4.0.11.tgz", + "integrity": "sha512-NUdh0aDavY2og7IbBPenWqR9exH+E26Sv8e0/eTe1tltDGZL+GtBkDAnnyBtmekfK6/Dq3MkcGtzXFEd1LQrtg==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.2.0", - "es-abstract": "^1.22.1", - "get-intrinsic": "^1.2.1", + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.2", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.0.0", + "get-intrinsic": "^1.2.4", + "gopd": "^1.0.1", "has-symbols": "^1.0.3", - "internal-slot": "^1.0.5", - "regexp.prototype.flags": "^1.5.0", - "set-function-name": "^2.0.0", - "side-channel": "^1.0.4" + "internal-slot": "^1.0.7", + "regexp.prototype.flags": "^1.5.2", + "set-function-name": "^2.0.2", + "side-channel": "^1.0.6" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" } }, "node_modules/string.prototype.trim": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/string.prototype.trim/-/string.prototype.trim-1.2.8.tgz", - "integrity": "sha512-lfjY4HcixfQXOfaqCvcBuOIapyaroTXhbkfJN3gcB1OtyupngWK4sEET9Knd0cXd28kTUqu/kHoV4HKSJdnjiQ==", + "version": "1.2.9", + "resolved": "https://registry.npmjs.org/string.prototype.trim/-/string.prototype.trim-1.2.9.tgz", + "integrity": "sha512-klHuCNxiMZ8MlsOihJhJEBJAiMVqU3Z2nEXWfWnIqjN0gEFS9J9+IxKozWWtQGcgoa1WUZzLjKPTr4ZHNFTFxw==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.2.0", - "es-abstract": "^1.22.1" + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-abstract": "^1.23.0", + "es-object-atoms": "^1.0.0" }, "engines": { "node": ">= 0.4" @@ -6162,38 +10319,65 @@ } }, "node_modules/string.prototype.trimend": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.7.tgz", - "integrity": "sha512-Ni79DqeB72ZFq1uH/L6zJ+DKZTkOtPIHovb3YZHQViE+HDouuU4mBrLOLDn5Dde3RF8qw5qVETEjhu9locMLvA==", + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/string.prototype.trimend/-/string.prototype.trimend-1.0.8.tgz", + "integrity": "sha512-p73uL5VCHCO2BZZ6krwwQE3kCzM7NKmis8S//xEC6fQonchbum4eP6kR4DLEjQFO3Wnj3Fuo8NM0kOSjVdHjZQ==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.2.0", - "es-abstract": "^1.22.1" + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-object-atoms": "^1.0.0" }, "funding": { "url": "https://github.com/sponsors/ljharb" } }, "node_modules/string.prototype.trimstart": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.7.tgz", - "integrity": "sha512-NGhtDFu3jCEm7B4Fy0DpLewdJQOZcQ0rGbwQ/+stjnrp2i+rlKeCvos9hOIeCmqwratM47OBxY7uFZzjxHXmrg==", + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/string.prototype.trimstart/-/string.prototype.trimstart-1.0.8.tgz", + "integrity": "sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "define-properties": "^1.2.0", - "es-abstract": "^1.22.1" + "call-bind": "^1.0.7", + "define-properties": "^1.2.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/stringify-entities": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz", + "integrity": "sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==", + "dependencies": { + "character-entities-html4": "^2.0.0", + "character-entities-legacy": "^3.0.0" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/strip-ansi": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, + "dependencies": { + "ansi-regex": "^5.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/strip-ansi-cjs": { + "name": "strip-ansi", + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", + "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", "dependencies": { "ansi-regex": "^5.0.1" }, @@ -6223,11 +10407,11 @@ } }, "node_modules/style-to-object": { - "version": "0.4.4", - "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-0.4.4.tgz", - "integrity": "sha512-HYNoHZa2GorYNyqiCaBgsxvcJIn7OHq6inEga+E6Ke3m5JkoqpQbnFssk4jwe+K7AhGa2fcha4wSOf1Kn01dMg==", + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-1.0.6.tgz", + "integrity": "sha512-khxq+Qm3xEyZfKd/y9L3oIWQimxuc4STrQKtQn8aSDRHb8mFgpukgX1hdzfrMEW6JCjyJ8p89x+IUMVnCBI1PA==", "dependencies": { - "inline-style-parser": "0.1.1" + "inline-style-parser": "0.2.3" } }, "node_modules/styled-components": { @@ -6306,13 +10490,13 @@ "integrity": "sha512-IjLxzM20RMwAsx8M1QoRlCG/Kmq8lKzCGyospjtSXt/BTIIcvgTonaxQAsKnBrsZNwhpHzO9ADx5te0h76ILVg==" }, "node_modules/sucrase": { - "version": "3.34.0", - "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.34.0.tgz", - "integrity": "sha512-70/LQEZ07TEcxiU2dz51FKaE6hCTWC6vr7FOk3Gr0U60C3shtAN+H+BFr9XlYe5xqf3RA8nrc+VIwzCfnxuXJw==", + "version": "3.35.0", + "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.0.tgz", + "integrity": "sha512-8EbVDiu9iN/nESwxeSxDKe0dunta1GOlHufmSSXxMD2z2/tMZpDMpvXQGsc+ajGo8y2uYUmixaSRUc/QPoQ0GA==", "dependencies": { "@jridgewell/gen-mapping": "^0.3.2", "commander": "^4.0.0", - "glob": "7.1.6", + "glob": "^10.3.10", "lines-and-columns": "^1.1.6", "mz": "^2.7.0", "pirates": "^4.0.1", @@ -6323,26 +10507,7 @@ "sucrase-node": "bin/sucrase-node" }, "engines": { - "node": ">=8" - } - }, - "node_modules/sucrase/node_modules/glob": { - "version": "7.1.6", - "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz", - "integrity": "sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA==", - "dependencies": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^3.0.4", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" - }, - "engines": { - "node": "*" - }, - "funding": { - "url": "https://github.com/sponsors/isaacs" + "node": ">=16 || 14 >=14.17" } }, "node_modules/supports-color": { @@ -6369,9 +10534,9 @@ } }, "node_modules/swr": { - "version": "2.2.4", - "resolved": "https://registry.npmjs.org/swr/-/swr-2.2.4.tgz", - "integrity": "sha512-njiZ/4RiIhoOlAaLYDqwz5qH/KZXVilRLvomrx83HjzCWTfa+InyfAjv05PSFxnmLzZkNO9ZfvgoqzAaEI4sGQ==", + "version": "2.2.5", + "resolved": "https://registry.npmjs.org/swr/-/swr-2.2.5.tgz", + "integrity": "sha512-QtxqyclFeAsxEUeZIYmsaQ0UjimSq1RZ9Un7I68/0ClKK/U3LoyQunwkQfJZr2fc22DfIXLNDc2wFyTEikCUpg==", "dependencies": { "client-only": "^0.0.1", "use-sync-external-store": "^1.2.0" @@ -6395,9 +10560,9 @@ } }, "node_modules/tailwindcss": { - "version": "3.3.5", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.3.5.tgz", - "integrity": "sha512-5SEZU4J7pxZgSkv7FP1zY8i2TIAOooNZ1e/OGtxIEv6GltpoiXUqWvLy89+a10qYTB1N5Ifkuw9lqQkN9sscvA==", + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-3.4.3.tgz", + "integrity": "sha512-U7sxQk/n397Bmx4JHbJx/iSOOv5G+II3f1kpLpY2QeUv5DcPdcTsYLlusZfq1NthHS1c1cZoyFmmkex1rzke0A==", "dependencies": { "@alloc/quick-lru": "^5.2.0", "arg": "^5.0.2", @@ -6407,7 +10572,7 @@ "fast-glob": "^3.3.0", "glob-parent": "^6.0.2", "is-glob": "^4.0.3", - "jiti": "^1.19.1", + "jiti": "^1.21.0", "lilconfig": "^2.1.0", "micromatch": "^4.0.5", "normalize-path": "^3.0.0", @@ -6431,9 +10596,9 @@ } }, "node_modules/tailwindcss/node_modules/postcss-selector-parser": { - "version": "6.0.13", - "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.0.13.tgz", - "integrity": "sha512-EaV1Gl4mUEV4ddhDnv/xtj7sxwrwxdetHdWUGnT4VJQf+4d05v6lHYZr8N573k5Z0BViss7BDhfWtKS3+sfAqQ==", + "version": "6.0.16", + "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.0.16.tgz", + "integrity": "sha512-A0RVJrX+IUkVZbW3ClroRWurercFhieevHB38sr2+l9eUClMqome3LmEmnhlNy+5Mr2EYN6B2Kaw9wYdd+VHiw==", "dependencies": { "cssesc": "^3.0.0", "util-deprecate": "^1.0.2" @@ -6452,19 +10617,22 @@ } }, "node_modules/tar-fs": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.4.tgz", - "integrity": "sha512-5AFQU8b9qLfZCX9zp2duONhPmZv0hGYiBPJsyUdqMjzq/mqVpy/rEUSeHk1+YitmxugaptgBh5oDGU3VsAJq4w==", + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.6.tgz", + "integrity": "sha512-iokBDQQkUyeXhgPYaZxmczGPhnhXZ0CmrqI+MOb/WFGS9DW5wnfrLgtjUJBvz50vQ3qfRwJ62QVoCFu8mPVu5w==", "dependencies": { - "mkdirp-classic": "^0.5.2", "pump": "^3.0.0", "tar-stream": "^3.1.5" + }, + "optionalDependencies": { + "bare-fs": "^2.1.1", + "bare-path": "^2.1.0" } }, "node_modules/tar-stream": { - "version": "3.1.6", - "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.6.tgz", - "integrity": "sha512-B/UyjYwPpMBv+PaFSWAmtYjwdrlEaZQEhMIBFNC5oEG8lpiW8XjcSdmEaClj28ArfKScKHs2nshz3k2le6crsg==", + "version": "3.1.7", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz", + "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==", "dependencies": { "b4a": "^1.6.4", "fast-fifo": "^1.2.0", @@ -6502,9 +10670,9 @@ "integrity": "sha512-Eet/eeMhkO6TX8mnUteS9zgPbUMQa4I6Kkp5ORiBD5476/m+PIRiumP5tmh5ioJpH7k51Kehawy2UDfsnxxY8Q==" }, "node_modules/tiny-invariant": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.1.tgz", - "integrity": "sha512-AD5ih2NlSssTCwsMznbvwMZpJ1cbhkGd2uueNxzv2jDlEeZdU04JQfRnggJQ8DrcVBGjAsCKwFBbDlVNtEMlzw==" + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz", + "integrity": "sha512-+FbBPE1o9QAYvviau/qC5SE3caw21q3xkvWKBtja5vgqOWIHHJ3ioaq1VPfn/Szqctz2bU/oYeKd9/z5BL+PVg==" }, "node_modules/tiny-warning": { "version": "1.0.3", @@ -6545,21 +10713,21 @@ } }, "node_modules/trough": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/trough/-/trough-2.1.0.tgz", - "integrity": "sha512-AqTiAOLcj85xS7vQ8QkAV41hPDIJ71XJB4RCUrzo/1GM2CQwhkJGaf9Hgr7BOugMRpgGUrqRg/DrBDl4H40+8g==", + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/trough/-/trough-2.2.0.tgz", + "integrity": "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==", "funding": { "type": "github", "url": "https://github.com/sponsors/wooorm" } }, "node_modules/ts-api-utils": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-1.0.3.tgz", - "integrity": "sha512-wNMeqtMz5NtwpT/UZGY5alT+VoKdSsOOP/kqHFcUW1P/VRhH2wJ48+DN2WwUliNbQ976ETwDL0Ifd2VVvgonvg==", + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-1.3.0.tgz", + "integrity": "sha512-UQMIo7pb8WRomKR1/+MFVLTroIvDVtMX3K6OUir8ynLyzB8Jeriont2bTAtmNPa1ekAgN7YPDyf6V+ygrdU+eQ==", "dev": true, "engines": { - "node": ">=16.13.0" + "node": ">=16" }, "peerDependencies": { "typescript": ">=4.2.0" @@ -6571,9 +10739,9 @@ "integrity": "sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==" }, "node_modules/tsconfig-paths": { - "version": "3.14.2", - "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-3.14.2.tgz", - "integrity": "sha512-o/9iXgCYc5L/JxCHPe3Hvh8Q/2xm5Z+p18PESBU6Ff33695QnCHBEjcytY2q19ua7Mbl/DavtBOLq+oG0RCL+g==", + "version": "3.15.0", + "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-3.15.0.tgz", + "integrity": "sha512-2Ac2RgzDe/cn48GvOe3M+o82pEFewD3UPbyoUHHdKasHwJKjds4fLXWf/Ux5kATBKN20oaFGu+jbElp1pos0mg==", "dev": true, "dependencies": { "@types/json5": "^0.0.29", @@ -6635,29 +10803,30 @@ } }, "node_modules/typed-array-buffer": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.0.tgz", - "integrity": "sha512-Y8KTSIglk9OZEr8zywiIHG/kmQ7KWyjseXs1CbSo8vC42w7hg2HgYTxSWwP0+is7bWDc1H+Fo026CpHFwm8tkw==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/typed-array-buffer/-/typed-array-buffer-1.0.2.tgz", + "integrity": "sha512-gEymJYKZtKXzzBzM4jqa9w6Q1Jjm7x2d+sh19AdsD4wqnMPDYyvwpsIc2Q/835kHuo3BEQ7CjelGhfTsoBb2MQ==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", - "get-intrinsic": "^1.2.1", - "is-typed-array": "^1.1.10" + "call-bind": "^1.0.7", + "es-errors": "^1.3.0", + "is-typed-array": "^1.1.13" }, "engines": { "node": ">= 0.4" } }, "node_modules/typed-array-byte-length": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/typed-array-byte-length/-/typed-array-byte-length-1.0.0.tgz", - "integrity": "sha512-Or/+kvLxNpeQ9DtSydonMxCx+9ZXOswtwJn17SNLvhptaXYDJvkFFP5zbfU/uLmvnBJlI4yrnXRxpdWH/M5tNA==", + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/typed-array-byte-length/-/typed-array-byte-length-1.0.1.tgz", + "integrity": "sha512-3iMJ9q0ao7WE9tWcaYKIptkNBuOIcZCCT0d4MRvuuH88fEoEH62IuQe0OtraD3ebQEoTRk8XCBoknUNc1Y67pw==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", + "call-bind": "^1.0.7", "for-each": "^0.3.3", - "has-proto": "^1.0.1", - "is-typed-array": "^1.1.10" + "gopd": "^1.0.1", + "has-proto": "^1.0.3", + "is-typed-array": "^1.1.13" }, "engines": { "node": ">= 0.4" @@ -6667,16 +10836,17 @@ } }, "node_modules/typed-array-byte-offset": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/typed-array-byte-offset/-/typed-array-byte-offset-1.0.0.tgz", - "integrity": "sha512-RD97prjEt9EL8YgAgpOkf3O4IF9lhJFr9g0htQkm0rchFp/Vx7LW5Q8fSXXub7BXAODyUQohRMyOc3faCPd0hg==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/typed-array-byte-offset/-/typed-array-byte-offset-1.0.2.tgz", + "integrity": "sha512-Ous0vodHa56FviZucS2E63zkgtgrACj7omjwd/8lTEMEPFFyjfixMZ1ZXenpgCFBBt4EC1J2XsyVS2gkG0eTFA==", "dev": true, "dependencies": { - "available-typed-arrays": "^1.0.5", - "call-bind": "^1.0.2", + "available-typed-arrays": "^1.0.7", + "call-bind": "^1.0.7", "for-each": "^0.3.3", - "has-proto": "^1.0.1", - "is-typed-array": "^1.1.10" + "gopd": "^1.0.1", + "has-proto": "^1.0.3", + "is-typed-array": "^1.1.13" }, "engines": { "node": ">= 0.4" @@ -6686,14 +10856,20 @@ } }, "node_modules/typed-array-length": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/typed-array-length/-/typed-array-length-1.0.4.tgz", - "integrity": "sha512-KjZypGq+I/H7HI5HlOoGHkWUUGq+Q0TPhQurLbyrVrvnKTBgzLhIJ7j6J/XTQOi0d1RjyZ0wdas8bKs2p0x3Ng==", + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/typed-array-length/-/typed-array-length-1.0.6.tgz", + "integrity": "sha512-/OxDN6OtAk5KBpGb28T+HZc2M+ADtvRxXrKKbUwtsLgdoxgX13hyy7ek6bFRl5+aBs2yZzB0c4CnQfAtVypW/g==", "dev": true, "dependencies": { - "call-bind": "^1.0.2", + "call-bind": "^1.0.7", "for-each": "^0.3.3", - "is-typed-array": "^1.1.9" + "gopd": "^1.0.1", + "has-proto": "^1.0.3", + "is-typed-array": "^1.1.13", + "possible-typed-array-names": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -6727,38 +10903,39 @@ } }, "node_modules/unified": { - "version": "10.1.2", - "resolved": "https://registry.npmjs.org/unified/-/unified-10.1.2.tgz", - "integrity": "sha512-pUSWAi/RAnVy1Pif2kAoeWNBa3JVrx0MId2LASj8G+7AiHWoKZNTomq6LG326T68U7/e263X6fTdcXIy7XnF7Q==", + "version": "11.0.4", + "resolved": "https://registry.npmjs.org/unified/-/unified-11.0.4.tgz", + "integrity": "sha512-apMPnyLjAX+ty4OrNap7yumyVAMlKx5IWU2wlzzUdYJO9A8f1p9m/gywF/GM2ZDFcjQPrx59Mc90KwmxsoklxQ==", "dependencies": { - "@types/unist": "^2.0.0", + "@types/unist": "^3.0.0", "bail": "^2.0.0", + "devlop": "^1.0.0", "extend": "^3.0.0", - "is-buffer": "^2.0.0", "is-plain-obj": "^4.0.0", "trough": "^2.0.0", - "vfile": "^5.0.0" + "vfile": "^6.0.0" }, "funding": { "type": "opencollective", "url": "https://opencollective.com/unified" } }, - "node_modules/unist-util-generated": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/unist-util-generated/-/unist-util-generated-2.0.1.tgz", - "integrity": "sha512-qF72kLmPxAw0oN2fwpWIqbXAVyEqUzDHMsbtPvOudIlUzXYFIeQIuxXQCRCFh22B7cixvU0MG7m3MW8FTq/S+A==", - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/unified" + "node_modules/unist-util-filter": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/unist-util-filter/-/unist-util-filter-5.0.1.tgz", + "integrity": "sha512-pHx7D4Zt6+TsfwylH9+lYhBhzyhEnCXs/lbq/Hstxno5z4gVdyc2WEW0asfjGKPyG4pEKrnBv5hdkO6+aRnQJw==", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0", + "unist-util-visit-parents": "^6.0.0" } }, "node_modules/unist-util-is": { - "version": "5.2.1", - "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-5.2.1.tgz", - "integrity": "sha512-u9njyyfEh43npf1M+yGKDGVPbY/JWEemg5nH05ncKPfi+kBbKBJoTdsogMu33uhytuLlv9y0O7GH7fEdwLdLQw==", + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.0.tgz", + "integrity": "sha512-2qCTHimwdxLfz+YzdGfkqNlH0tLi9xjTnHddPmJwtIG9MGsdbutfTc4P+haPD7l7Cjxf/WZj+we5qfVPvvxfYw==", "dependencies": { - "@types/unist": "^2.0.0" + "@types/unist": "^3.0.0" }, "funding": { "type": "opencollective", @@ -6766,11 +10943,24 @@ } }, "node_modules/unist-util-position": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-4.0.4.tgz", - "integrity": "sha512-kUBE91efOWfIVBo8xzh/uZQ7p9ffYRtUbMRZBNFYwf0RK8koUMx6dGUfwylLOKmaT2cs4wSW96QoYUSXAyEtpg==", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-5.0.0.tgz", + "integrity": "sha512-fucsC7HjXvkB5R3kTCO7kUjRdrS0BJt3M/FPxmHMBOm8JQi2BsHAHFsy27E0EolP8rp0NzXsJ+jNPyDWvOJZPA==", "dependencies": { - "@types/unist": "^2.0.0" + "@types/unist": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/unist-util-remove-position": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-remove-position/-/unist-util-remove-position-5.0.0.tgz", + "integrity": "sha512-Hp5Kh3wLxv0PHj9m2yZhhLt58KzPtEYKQQ4yxfYFEO7EvHwzyDYnduhHnY1mDxoqr7VUwVuHXk9RXKIiYS1N8Q==", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-visit": "^5.0.0" }, "funding": { "type": "opencollective", @@ -6778,11 +10968,11 @@ } }, "node_modules/unist-util-stringify-position": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-3.0.3.tgz", - "integrity": "sha512-k5GzIBZ/QatR8N5X2y+drfpWG8IDBzdnVj6OInRNWm1oXrzydiaAT2OQiA8DPRRZyAKb9b6I2a6PxYklZD0gKg==", + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-4.0.0.tgz", + "integrity": "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==", "dependencies": { - "@types/unist": "^2.0.0" + "@types/unist": "^3.0.0" }, "funding": { "type": "opencollective", @@ -6790,13 +10980,13 @@ } }, "node_modules/unist-util-visit": { - "version": "4.1.2", - "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-4.1.2.tgz", - "integrity": "sha512-MSd8OUGISqHdVvfY9TPhyK2VdUrPgxkUtWSuMHF6XAAFuL4LokseigBnZtPnJMu+FbynTkFNnFlyjxpVKujMRg==", + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-5.0.0.tgz", + "integrity": "sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==", "dependencies": { - "@types/unist": "^2.0.0", - "unist-util-is": "^5.0.0", - "unist-util-visit-parents": "^5.1.1" + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0", + "unist-util-visit-parents": "^6.0.0" }, "funding": { "type": "opencollective", @@ -6804,12 +10994,12 @@ } }, "node_modules/unist-util-visit-parents": { - "version": "5.1.3", - "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-5.1.3.tgz", - "integrity": "sha512-x6+y8g7wWMyQhL1iZfhIPhDAs7Xwbn9nRosDXl7qoPTSCy0yNxnKc+hWokFifWQIDGi154rdUqKvbCa4+1kLhg==", + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-6.0.1.tgz", + "integrity": "sha512-L/PqWzfTP9lzzEa6CKs0k2nARxTdZduw3zyh8d2NVBnsyvHjSX4TWse388YrrQKbvI8w20fGjGlhgT96WwKykw==", "dependencies": { - "@types/unist": "^2.0.0", - "unist-util-is": "^5.0.0" + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0" }, "funding": { "type": "opencollective", @@ -6817,9 +11007,9 @@ } }, "node_modules/update-browserslist-db": { - "version": "1.0.13", - "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.13.tgz", - "integrity": "sha512-xebP81SNcPuNpPP3uzeW1NYXxI3rxyJzF3pD6sH4jE7o/IX+WtSpwnVU+qIsDPyk0d3hmFQ7mjqc6AtV604hbg==", + "version": "1.0.16", + "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.16.tgz", + "integrity": "sha512-KVbTxlBYlckhF5wgfyZXTWnMn7MMZjMu9XG8bPlliUOP9ThaF4QnhP8qrjrH7DRzHfSk0oQv1wToW+iA5GajEQ==", "funding": [ { "type": "opencollective", @@ -6835,8 +11025,8 @@ } ], "dependencies": { - "escalade": "^3.1.1", - "picocolors": "^1.0.0" + "escalade": "^3.1.2", + "picocolors": "^1.0.1" }, "bin": { "update-browserslist-db": "cli.js" @@ -6854,10 +11044,51 @@ "punycode": "^2.1.0" } }, + "node_modules/use-callback-ref": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.2.tgz", + "integrity": "sha512-elOQwe6Q8gqZgDA8mrh44qRTQqpIHDcZ3hXTLjBe1i4ph8XpNJnO+aQf3NaG+lriLopI4HMx9VjQLfPQ6vhnoA==", + "dependencies": { + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/use-sidecar": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.2.tgz", + "integrity": "sha512-epTbsLuzZ7lPClpz2TyryBfztm7m+28DlEv2ZCQ3MDr5ssiwyOwGH/e5F9CkfWjJ1t4clvI58yF822/GUkjjhw==", + "dependencies": { + "detect-node-es": "^1.1.0", + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.9.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, "node_modules/use-sync-external-store": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.2.0.tgz", - "integrity": "sha512-eEgnFxGQ1Ife9bzYs6VLi8/4X6CObHMw9Qr9tPY43iKwsPw8xE8+EFsf/2cFZ5S3esXgpWgtSCtLNS41F+sKPA==", + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.2.2.tgz", + "integrity": "sha512-PElTlVMwpblvbNqQ82d2n6RjStvdSoNe9FG28kNfz3WiXilJm4DdNkEzRhCZuIDwY8U08WVihhGR5iRqAwfDiw==", "peerDependencies": { "react": "^16.8.0 || ^17.0.0 || ^18.0.0" } @@ -6867,32 +11098,39 @@ "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==" }, - "node_modules/uvu": { - "version": "0.5.6", - "resolved": "https://registry.npmjs.org/uvu/-/uvu-0.5.6.tgz", - "integrity": "sha512-+g8ENReyr8YsOc6fv/NVJs2vFdHBnBNdfE49rshrTzDWOlUx4Gq7KOS2GD8eqhy2j+Ejq29+SbKH8yjkAqXqoA==", - "dependencies": { - "dequal": "^2.0.0", - "diff": "^5.0.0", - "kleur": "^4.0.3", - "sade": "^1.7.3" - }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], "bin": { - "uvu": "bin.js" - }, - "engines": { - "node": ">=8" + "uuid": "dist/bin/uuid" } }, "node_modules/vfile": { - "version": "5.3.7", - "resolved": "https://registry.npmjs.org/vfile/-/vfile-5.3.7.tgz", - "integrity": "sha512-r7qlzkgErKjobAmyNIkkSpizsFPYiUPuJb5pNW1RB4JcYVZhs4lIbVqk8XPk033CV/1z8ss5pkax8SuhGpcG8g==", + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.1.tgz", + "integrity": "sha512-1bYqc7pt6NIADBJ98UiG0Bn/CHIVOoZ/IyEkqIruLg0mE1BKzkOXY2D6CSqQIcKqgadppE5lrxgWXJmXd7zZJw==", "dependencies": { - "@types/unist": "^2.0.0", - "is-buffer": "^2.0.0", - "unist-util-stringify-position": "^3.0.0", - "vfile-message": "^3.0.0" + "@types/unist": "^3.0.0", + "unist-util-stringify-position": "^4.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/vfile-location": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/vfile-location/-/vfile-location-5.0.2.tgz", + "integrity": "sha512-NXPYyxyBSH7zB5U6+3uDdd6Nybz6o6/od9rk8bp9H8GR3L+cm/fC0uUTbqBmUTnMCUDslAGBOIKNfvvb+gGlDg==", + "dependencies": { + "@types/unist": "^3.0.0", + "vfile": "^6.0.0" }, "funding": { "type": "opencollective", @@ -6900,12 +11138,12 @@ } }, "node_modules/vfile-message": { - "version": "3.1.4", - "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-3.1.4.tgz", - "integrity": "sha512-fa0Z6P8HUrQN4BZaX05SIVXic+7kE3b05PWAtPuYP9QLHsLKYR7/AlLW3NtOrpXRLeawpDLMsVkmk5DG0NXgWw==", + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.2.tgz", + "integrity": "sha512-jRDZ1IMLttGj41KcZvlrYAaI3CfqpLpfpf+Mfig13viT6NKvRzWZ+lXz0Y5D60w6uJIBAOGq9mSHf0gktF0duw==", "dependencies": { - "@types/unist": "^2.0.0", - "unist-util-stringify-position": "^3.0.0" + "@types/unist": "^3.0.0", + "unist-util-stringify-position": "^4.0.0" }, "funding": { "type": "opencollective", @@ -6913,9 +11151,9 @@ } }, "node_modules/victory-vendor": { - "version": "36.7.0", - "resolved": "https://registry.npmjs.org/victory-vendor/-/victory-vendor-36.7.0.tgz", - "integrity": "sha512-nqYuTkLSdTTeACyXcCLbL7rl0y6jpzLPtTNGOtSnajdR+xxMxBdjMxDjfNJNlhR+ZU8vbXz+QejntcbY7h9/ZA==", + "version": "36.9.2", + "resolved": "https://registry.npmjs.org/victory-vendor/-/victory-vendor-36.9.2.tgz", + "integrity": "sha512-PnpQQMuxlwYdocC8fIJqVXvkeViHYzotI+NJrCuav0ZYFoq912ZHBk3mCeuj+5/VpodOjPe1z0Fk2ihgzlXqjQ==", "dependencies": { "@types/d3-array": "^3.0.3", "@types/d3-ease": "^3.0.0", @@ -6933,23 +11171,19 @@ "d3-timer": "^3.0.1" } }, - "node_modules/watchpack": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.0.tgz", - "integrity": "sha512-Lcvm7MGST/4fup+ifyKi2hjyIAwcdI4HRgtvTpIUxBRhB+RFtUh8XtDOxUfctVCnhVi+QQj49i91OyvzkJl6cg==", - "dependencies": { - "glob-to-regexp": "^0.4.1", - "graceful-fs": "^4.1.2" - }, - "engines": { - "node": ">=10.13.0" + "node_modules/web-namespaces": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-2.0.1.tgz", + "integrity": "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" } }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dev": true, "dependencies": { "isexe": "^2.0.0" }, @@ -7003,31 +11237,15 @@ } }, "node_modules/which-collection": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/which-collection/-/which-collection-1.0.1.tgz", - "integrity": "sha512-W8xeTUwaln8i3K/cY1nGXzdnVZlidBcagyNFtBdD5kxnb4TvGKR7FfSIS3mYpwWS1QUCutfKz8IY8RjftB0+1A==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/which-collection/-/which-collection-1.0.2.tgz", + "integrity": "sha512-K4jVyjnBdgvc86Y6BkaLZEN933SwYOuBFkdmBu9ZfkcAbdVbpITnDmjvZ/aQjRXQrv5EPkTnD1s39GiiqbngCw==", "dev": true, "dependencies": { - "is-map": "^2.0.1", - "is-set": "^2.0.1", - "is-weakmap": "^2.0.1", - "is-weakset": "^2.0.1" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/which-typed-array": { - "version": "1.1.13", - "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.13.tgz", - "integrity": "sha512-P5Nra0qjSncduVPEAr7xhoF5guty49ArDTwzJ/yNuPIbZppyRxFQsRCWrocxIY+CnMVG+qfbU2FmDKyvSGClow==", - "dev": true, - "dependencies": { - "available-typed-arrays": "^1.0.5", - "call-bind": "^1.0.4", - "for-each": "^0.3.3", - "gopd": "^1.0.1", - "has-tostringtag": "^1.0.0" + "is-map": "^2.0.3", + "is-set": "^2.0.3", + "is-weakmap": "^2.0.2", + "is-weakset": "^2.0.3" }, "engines": { "node": ">= 0.4" @@ -7036,20 +11254,139 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/which-typed-array": { + "version": "1.1.15", + "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.15.tgz", + "integrity": "sha512-oV0jmFtUky6CXfkqehVvBP/LSWJ2sy4vWMioiENyJLePrBO/yKyV9OyJySfAKosh+RYkIl5zJCNZ8/4JncrpdA==", + "dev": true, + "dependencies": { + "available-typed-arrays": "^1.0.7", + "call-bind": "^1.0.7", + "for-each": "^0.3.3", + "gopd": "^1.0.1", + "has-tostringtag": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/word-wrap": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", + "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/wrap-ansi": { + "version": "8.1.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", + "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", + "dependencies": { + "ansi-styles": "^6.1.0", + "string-width": "^5.0.1", + "strip-ansi": "^7.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrap-ansi-cjs": { + "name": "wrap-ansi", + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" + }, + "node_modules/wrap-ansi-cjs/node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/wrap-ansi/node_modules/ansi-regex": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", + "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" + } + }, + "node_modules/wrap-ansi/node_modules/ansi-styles": { + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", + "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/wrap-ansi/node_modules/strip-ansi": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" }, "node_modules/yallist": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", - "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", + "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", + "peer": true }, "node_modules/yaml": { - "version": "2.3.4", - "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.4.tgz", - "integrity": "sha512-8aAvwVUSHpfEqTQ4w/KMlf3HcRdt50E5ODIQJBw1fQ5RL34xabzxtUlzTXVqc4rkZsPbvrXKWnABCD7kWSmocA==", + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.4.2.tgz", + "integrity": "sha512-B3VqDZ+JAg1nZpaEmWtTXUlBneoGx6CPM9b0TENK6aoSu5t73dItudwdgmi6tHlIZZId4dZ9skcAQ2UbcyAeVA==", + "bin": { + "yaml": "bin.mjs" + }, "engines": { "node": ">= 14" } @@ -7067,9 +11404,9 @@ } }, "node_modules/yup": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/yup/-/yup-1.3.2.tgz", - "integrity": "sha512-6KCM971iQtJ+/KUaHdrhVr2LDkfhBtFPRnsG1P8F4q3uUVQ2RfEM9xekpha9aA4GXWJevjM10eDcPQ1FfWlmaQ==", + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/yup/-/yup-1.4.0.tgz", + "integrity": "sha512-wPbgkJRCqIf+OHyiTBQoJiP5PFuAXaWiJK6AmYkzQAh5/c2K9hzSApBZG5wV9KoKSePF7sAxmNSvh/13YHkFDg==", "dependencies": { "property-expr": "^2.0.5", "tiny-case": "^1.0.3", @@ -7087,6 +11424,15 @@ "funding": { "url": "https://github.com/sponsors/sindresorhus" } + }, + "node_modules/zwitch": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", + "integrity": "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } } } } diff --git a/web/package.json b/web/package.json index c0872e4ab..1ba7286d4 100644 --- a/web/package.json +++ b/web/package.json @@ -13,33 +13,46 @@ "@dnd-kit/modifiers": "^7.0.0", "@dnd-kit/sortable": "^8.0.0", "@phosphor-icons/react": "^2.0.8", + "@radix-ui/react-dialog": "^1.0.5", + "@radix-ui/react-popover": "^1.0.7", + "@radix-ui/react-tooltip": "^1.0.7", "@tremor/react": "^3.9.2", "@types/js-cookie": "^3.0.3", + "@types/lodash": "^4.17.0", "@types/node": "18.15.11", + "@types/prismjs": "^1.26.4", "@types/react": "18.0.32", "@types/react-dom": "18.0.11", + "@types/uuid": "^9.0.8", "autoprefixer": "^10.4.14", "formik": "^2.2.9", "js-cookie": "^3.0.5", - "next": "^14.0.0", + "lodash": "^4.17.21", + "mdast-util-find-and-replace": "^3.0.1", + "next": "^14.2.3", + "npm": "^10.8.0", "postcss": "^8.4.31", - "react": "^18.2.0", - "react-dom": "^18.2.0", + "prismjs": "^1.29.0", + "react": "^18.3.1", + "react-dom": "^18.3.1", "react-dropzone": "^14.2.3", "react-icons": "^4.8.0", "react-loader-spinner": "^5.4.5", - "react-markdown": "^8.0.7", + "react-markdown": "^9.0.1", + "rehype-prism-plus": "^2.0.0", + "remark-gfm": "^4.0.0", "semver": "^7.5.4", "sharp": "^0.32.6", "swr": "^2.1.5", "tailwindcss": "^3.3.1", "typescript": "5.0.3", + "uuid": "^9.0.1", "yup": "^1.1.1" }, "devDependencies": { "@tailwindcss/typography": "^0.5.10", "eslint": "^8.48.0", - "eslint-config-next": "^14.0.0", + "eslint-config-next": "^14.1.0", "prettier": "2.8.8" } } diff --git a/web/public/Axero.jpeg b/web/public/Axero.jpeg new file mode 100644 index 000000000..f6df99217 Binary files /dev/null and b/web/public/Axero.jpeg differ diff --git a/web/public/Discourse.png b/web/public/Discourse.png new file mode 100644 index 000000000..48e4046b8 Binary files /dev/null and b/web/public/Discourse.png differ diff --git a/web/public/MediaWiki.svg b/web/public/MediaWiki.svg new file mode 100644 index 000000000..3c4ed9075 --- /dev/null +++ b/web/public/MediaWiki.svg @@ -0,0 +1,43 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/web/public/Wikipedia.svg b/web/public/Wikipedia.svg new file mode 100644 index 000000000..dc32f9848 --- /dev/null +++ b/web/public/Wikipedia.svg @@ -0,0 +1 @@ +]>Wikipedia logo version 2 \ No newline at end of file diff --git a/web/src/app/admin/assistants/AssistantEditor.tsx b/web/src/app/admin/assistants/AssistantEditor.tsx new file mode 100644 index 000000000..d6427a679 --- /dev/null +++ b/web/src/app/admin/assistants/AssistantEditor.tsx @@ -0,0 +1,897 @@ +"use client"; + +import { CCPairBasicInfo, DocumentSet, User, UserGroup } from "@/lib/types"; +import { Button, Divider, Italic, Text } from "@tremor/react"; +import { + ArrayHelpers, + ErrorMessage, + Field, + FieldArray, + Form, + Formik, +} from "formik"; + +import * as Yup from "yup"; +import { buildFinalPrompt, createPersona, updatePersona } from "./lib"; +import { useRouter } from "next/navigation"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { Persona, StarterMessage } from "./interfaces"; +import Link from "next/link"; +import { useEffect, useState } from "react"; +import { + BooleanFormField, + SelectorFormField, + TextFormField, +} from "@/components/admin/connectors/Field"; +import { HidableSection } from "./HidableSection"; +import { FiPlus, FiX } from "react-icons/fi"; +import { EE_ENABLED } from "@/lib/constants"; +import { useUserGroups } from "@/lib/hooks"; +import { Bubble } from "@/components/Bubble"; +import { GroupsIcon } from "@/components/icons/icons"; +import { SuccessfulPersonaUpdateRedirectType } from "./enums"; +import { DocumentSetSelectable } from "@/components/documentSet/DocumentSetSelectable"; +import { FullLLMProvider } from "../models/llm/interfaces"; +import { Option } from "@/components/Dropdown"; +import { ToolSnapshot } from "@/lib/tools/interfaces"; +import { checkUserIsNoAuthUser } from "@/lib/user"; +import { addAssistantToList } from "@/lib/assistants/updateAssistantPreferences"; +import { checkLLMSupportsImageInput } from "@/lib/llm/utils"; + +function findSearchTool(tools: ToolSnapshot[]) { + return tools.find((tool) => tool.in_code_tool_id === "SearchTool"); +} + +function findImageGenerationTool(tools: ToolSnapshot[]) { + return tools.find((tool) => tool.in_code_tool_id === "ImageGenerationTool"); +} + +function Label({ children }: { children: string | JSX.Element }) { + return ( +
{children}
+ ); +} + +function SubLabel({ children }: { children: string | JSX.Element }) { + return
{children}
; +} + +export function AssistantEditor({ + existingPersona, + ccPairs, + documentSets, + user, + defaultPublic, + redirectType, + llmProviders, + tools, + shouldAddAssistantToUserPreferences, +}: { + existingPersona?: Persona | null; + ccPairs: CCPairBasicInfo[]; + documentSets: DocumentSet[]; + user: User | null; + defaultPublic: boolean; + redirectType: SuccessfulPersonaUpdateRedirectType; + llmProviders: FullLLMProvider[]; + tools: ToolSnapshot[]; + shouldAddAssistantToUserPreferences?: boolean; +}) { + const router = useRouter(); + const { popup, setPopup } = usePopup(); + + // EE only + const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups(); + + const [finalPrompt, setFinalPrompt] = useState(""); + const [finalPromptError, setFinalPromptError] = useState(""); + + const triggerFinalPromptUpdate = async ( + systemPrompt: string, + taskPrompt: string, + retrievalDisabled: boolean + ) => { + const response = await buildFinalPrompt( + systemPrompt, + taskPrompt, + retrievalDisabled + ); + if (response.ok) { + setFinalPrompt((await response.json()).final_prompt_template); + } + }; + + const isUpdate = existingPersona !== undefined && existingPersona !== null; + const existingPrompt = existingPersona?.prompts[0] ?? null; + + useEffect(() => { + if (isUpdate && existingPrompt) { + triggerFinalPromptUpdate( + existingPrompt.system_prompt, + existingPrompt.task_prompt, + existingPersona.num_chunks === 0 + ); + } + }, []); + + const defaultProvider = llmProviders.find( + (llmProvider) => llmProvider.is_default_provider + ); + const defaultProviderName = defaultProvider?.provider; + const defaultModelName = defaultProvider?.default_model_name; + const providerDisplayNameToProviderName = new Map(); + llmProviders.forEach((llmProvider) => { + providerDisplayNameToProviderName.set( + llmProvider.name, + llmProvider.provider + ); + }); + + const modelOptionsByProvider = new Map[]>(); + llmProviders.forEach((llmProvider) => { + const providerOptions = llmProvider.model_names.map((modelName) => { + return { + name: modelName, + value: modelName, + }; + }); + modelOptionsByProvider.set(llmProvider.name, providerOptions); + }); + const providerSupportingImageGenerationExists = llmProviders.some( + (provider) => provider.provider === "openai" + ); + + const personaCurrentToolIds = + existingPersona?.tools.map((tool) => tool.id) || []; + const searchTool = findSearchTool(tools); + const imageGenerationTool = providerSupportingImageGenerationExists + ? findImageGenerationTool(tools) + : undefined; + + return ( +
+ {popup} + documentSet.id + ) ?? ([] as number[]), + num_chunks: existingPersona?.num_chunks ?? null, + include_citations: + existingPersona?.prompts[0]?.include_citations ?? true, + llm_relevance_filter: existingPersona?.llm_relevance_filter ?? false, + llm_model_provider_override: + existingPersona?.llm_model_provider_override ?? null, + llm_model_version_override: + existingPersona?.llm_model_version_override ?? null, + starter_messages: existingPersona?.starter_messages ?? [], + // EE Only + groups: existingPersona?.groups ?? [], + search_tool_enabled: existingPersona + ? personaCurrentToolIds.includes(searchTool!.id) + : ccPairs.length > 0, + image_generation_tool_enabled: imageGenerationTool + ? personaCurrentToolIds.includes(imageGenerationTool.id) + : false, + }} + validationSchema={Yup.object() + .shape({ + name: Yup.string().required("Must give the Assistant a name!"), + description: Yup.string().required( + "Must give the Assistant a description!" + ), + system_prompt: Yup.string(), + task_prompt: Yup.string(), + is_public: Yup.boolean().required(), + document_set_ids: Yup.array().of(Yup.number()), + num_chunks: Yup.number().nullable(), + include_citations: Yup.boolean().required(), + llm_relevance_filter: Yup.boolean().required(), + llm_model_version_override: Yup.string().nullable(), + llm_model_provider_override: Yup.string().nullable(), + starter_messages: Yup.array().of( + Yup.object().shape({ + name: Yup.string().required(), + description: Yup.string().required(), + message: Yup.string().required(), + }) + ), + // EE Only + groups: Yup.array().of(Yup.number()), + search_tool_enabled: Yup.boolean().required(), + image_generation_tool_enabled: Yup.boolean().required(), + }) + .test( + "system-prompt-or-task-prompt", + "Must provide at least one of System Prompt or Task Prompt", + (values) => { + const systemPromptSpecified = values.system_prompt + ? values.system_prompt.length > 0 + : false; + const taskPromptSpecified = values.task_prompt + ? values.task_prompt.length > 0 + : false; + if (systemPromptSpecified || taskPromptSpecified) { + setFinalPromptError(""); + return true; + } // Return true if at least one field has a value + + setFinalPromptError( + "Must provide at least one of System Prompt or Task Prompt" + ); + } + )} + onSubmit={async (values, formikHelpers) => { + if (finalPromptError) { + setPopup({ + type: "error", + message: "Cannot submit while there are errors in the form!", + }); + return; + } + + if ( + values.llm_model_provider_override && + !values.llm_model_version_override + ) { + setPopup({ + type: "error", + message: + "Must select a model if a non-default LLM provider is chosen.", + }); + return; + } + + formikHelpers.setSubmitting(true); + + const tools = []; + if (values.search_tool_enabled && ccPairs.length > 0) { + tools.push(searchTool!.id); + } + if ( + values.image_generation_tool_enabled && + imageGenerationTool && + checkLLMSupportsImageInput( + providerDisplayNameToProviderName.get( + values.llm_model_provider_override || "" + ) || + defaultProviderName || + "", + values.llm_model_version_override || defaultModelName || "" + ) + ) { + tools.push(imageGenerationTool.id); + } + + // if disable_retrieval is set, set num_chunks to 0 + // to tell the backend to not fetch any documents + const numChunks = values.search_tool_enabled + ? values.num_chunks || 10 + : 0; + + // don't set groups if marked as public + const groups = values.is_public ? [] : values.groups; + + let promptResponse; + let personaResponse; + if (isUpdate) { + [promptResponse, personaResponse] = await updatePersona({ + id: existingPersona.id, + existingPromptId: existingPrompt?.id, + ...values, + num_chunks: numChunks, + users: + user && !checkUserIsNoAuthUser(user.id) ? [user.id] : undefined, + groups, + tool_ids: tools, + }); + } else { + [promptResponse, personaResponse] = await createPersona({ + ...values, + num_chunks: numChunks, + users: + user && !checkUserIsNoAuthUser(user.id) ? [user.id] : undefined, + groups, + tool_ids: tools, + }); + } + + let error = null; + if (!promptResponse.ok) { + error = await promptResponse.text(); + } + if (!personaResponse) { + error = "Failed to create Assistant - no response received"; + } else if (!personaResponse.ok) { + error = await personaResponse.text(); + } + + if (error || !personaResponse) { + setPopup({ + type: "error", + message: `Failed to create Assistant - ${error}`, + }); + formikHelpers.setSubmitting(false); + } else { + const assistant = await personaResponse.json(); + const assistantId = assistant.id; + if ( + shouldAddAssistantToUserPreferences && + user?.preferences?.chosen_assistants + ) { + const success = await addAssistantToList( + assistantId, + user.preferences.chosen_assistants + ); + if (success) { + setPopup({ + message: `"${assistant.name}" has been added to your list.`, + type: "success", + }); + router.refresh(); + } else { + setPopup({ + message: `"${assistant.name}" could not be added to your list.`, + type: "error", + }); + } + } + router.push( + redirectType === SuccessfulPersonaUpdateRedirectType.ADMIN + ? `/admin/assistants?u=${Date.now()}` + : `/chat?assistantId=${assistantId}` + ); + } + }} + > + {({ isSubmitting, values, setFieldValue }) => ( +
+
+ + <> + + + + + { + setFieldValue("system_prompt", e.target.value); + triggerFinalPromptUpdate( + e.target.value, + values.task_prompt, + values.search_tool_enabled + ); + }} + error={finalPromptError} + /> + + { + setFieldValue("task_prompt", e.target.value); + triggerFinalPromptUpdate( + values.system_prompt, + e.target.value, + values.search_tool_enabled + ); + }} + error={finalPromptError} + /> + + + + {finalPrompt ? ( +
+                      {finalPrompt}
+                    
+ ) : ( + "-" + )} + +
+ + + + + <> + {ccPairs.length > 0 && ( + <> + { + setFieldValue("num_chunks", null); + setFieldValue( + "search_tool_enabled", + e.target.checked + ); + }} + /> + + {values.search_tool_enabled && ( +
+ {ccPairs.length > 0 && ( + <> + + +
+ + <> + Select which{" "} + {!user || user.role === "admin" ? ( + + Document Sets + + ) : ( + "Document Sets" + )}{" "} + that this Assistant should search through. + If none are specified, the Assistant will + search through all available documents in + order to try and respond to queries. + + +
+ + {documentSets.length > 0 ? ( + ( +
+
+ {documentSets.map((documentSet) => { + const ind = + values.document_set_ids.indexOf( + documentSet.id + ); + let isSelected = ind !== -1; + return ( + { + if (isSelected) { + arrayHelpers.remove(ind); + } else { + arrayHelpers.push( + documentSet.id + ); + } + }} + /> + ); + })} +
+
+ )} + /> + ) : ( + + No Document Sets available.{" "} + {user?.role !== "admin" && ( + <> + If this functionality would be useful, + reach out to the administrators of Danswer + for assistance. + + )} + + )} + + <> + + How many chunks should we feed into the + LLM when generating the final response? + Each chunk is ~400 words long. +
+ } + onChange={(e) => { + const value = e.target.value; + // Allow only integer values + if ( + value === "" || + /^[0-9]+$/.test(value) + ) { + setFieldValue("num_chunks", value); + } + }} + /> + + + + + + + + + )} +
+ )} + + )} + + {imageGenerationTool && + checkLLMSupportsImageInput( + providerDisplayNameToProviderName.get( + values.llm_model_provider_override || "" + ) || + defaultProviderName || + "", + values.llm_model_version_override || + defaultModelName || + "" + ) && ( + { + setFieldValue( + "image_generation_tool_enabled", + e.target.checked + ); + }} + /> + )} + + + + + + {llmProviders.length > 0 && ( + <> + + <> + + Pick which LLM to use for this Assistant. If left as + Default, will use{" "} + {defaultModelName} + . +
+
+ For more information on the different LLMs, checkout the{" "} + + OpenAI docs + + . +
+ +
+
+ LLM Provider + ({ + name: llmProvider.name, + value: llmProvider.name, + }))} + includeDefault={true} + onSelect={(selected) => { + if ( + selected !== values.llm_model_provider_override + ) { + setFieldValue( + "llm_model_version_override", + null + ); + } + setFieldValue( + "llm_model_provider_override", + selected + ); + }} + /> +
+ + {values.llm_model_provider_override && ( +
+ Model + +
+ )} +
+ +
+ + + + )} + + + <> +
+ + Starter Messages help guide users to use this Assistant. + They are shown to the user as clickable options when they + select this Assistant. When selected, the specified + message is sent to the LLM as the initial user message. + +
+ + ) => ( +
+ {values.starter_messages && + values.starter_messages.length > 0 && + values.starter_messages.map((_, index) => { + return ( +
+
+
+
+ + + Shows up as the "title" for + this Starter Message. For example, + "Write an email". + + + +
+ +
+ + + A description which tells the user what + they might want to use this Starter + Message for. For example "to a + client about a new feature" + + + +
+ +
+ + + The actual message to be sent as the + initial user message if a user selects + this starter prompt. For example, + "Write me an email to a client + about a new billing feature we just + released." + + + +
+
+
+ arrayHelpers.remove(index)} + /> +
+
+
+ ); + })} + + +
+ )} + /> + +
+ + + + {EE_ENABLED && userGroups && (!user || user.role === "admin") && ( + <> + + <> + + + {userGroups && + userGroups.length > 0 && + !values.is_public && ( +
+ + Select which User Groups should have access to + this Assistant. + +
+ {userGroups.map((userGroup) => { + const isSelected = values.groups.includes( + userGroup.id + ); + return ( + { + if (isSelected) { + setFieldValue( + "groups", + values.groups.filter( + (id) => id !== userGroup.id + ) + ); + } else { + setFieldValue("groups", [ + ...values.groups, + userGroup.id, + ]); + } + }} + > +
+ +
+ {userGroup.name} +
+
+
+ ); + })} +
+
+ )} + +
+ + + )} + +
+ +
+
+ + )} + + + ); +} diff --git a/web/src/app/admin/assistants/HidableSection.tsx b/web/src/app/admin/assistants/HidableSection.tsx new file mode 100644 index 000000000..714f2344c --- /dev/null +++ b/web/src/app/admin/assistants/HidableSection.tsx @@ -0,0 +1,50 @@ +import { useState } from "react"; +import { FiChevronDown, FiChevronRight } from "react-icons/fi"; + +export function SectionHeader({ + children, + includeMargin = true, +}: { + children: string | JSX.Element; + includeMargin?: boolean; +}) { + return ( +
+ {children} +
+ ); +} + +export function HidableSection({ + children, + sectionTitle, + defaultHidden = false, +}: { + children: string | JSX.Element; + sectionTitle: string | JSX.Element; + defaultHidden?: boolean; +}) { + const [isHidden, setIsHidden] = useState(defaultHidden); + + return ( +
+
setIsHidden(!isHidden)} + > + {sectionTitle} +
+ {isHidden ? ( + + ) : ( + + )} +
+
+ + {!isHidden &&
{children}
} +
+ ); +} diff --git a/web/src/app/admin/personas/PersonaTable.tsx b/web/src/app/admin/assistants/PersonaTable.tsx similarity index 86% rename from web/src/app/admin/personas/PersonaTable.tsx rename to web/src/app/admin/assistants/PersonaTable.tsx index bce8e2985..878736115 100644 --- a/web/src/app/admin/personas/PersonaTable.tsx +++ b/web/src/app/admin/assistants/PersonaTable.tsx @@ -12,6 +12,18 @@ import { deletePersona, personaComparator } from "./lib"; import { FiEdit } from "react-icons/fi"; import { TrashIcon } from "@/components/icons/icons"; +function PersonaTypeDisplay({ persona }: { persona: Persona }) { + if (persona.default_persona) { + return Built-In; + } + + if (persona.is_public) { + return Global; + } + + return Personal {persona.owner && <>({persona.owner.email})}; +} + export function PersonasTable({ personas }: { personas: Persona[] }) { const router = useRouter(); const { popup, setPopup } = usePopup(); @@ -64,13 +76,13 @@ export function PersonasTable({ personas }: { personas: Persona[] }) { {popup} - Personas will be displayed as options on the Chat / Search interfaces in - the order they are displayed below. Personas marked as hidden will not - be displayed. + Assistants will be displayed as options on the Chat / Search interfaces + in the order they are displayed below. Assistants marked as hidden will + not be displayed. { return { id: persona.id.toString(), @@ -79,7 +91,11 @@ export function PersonasTable({ personas }: { personas: Persona[] }) { {!persona.default_persona && ( router.push(`/admin/personas/${persona.id}`)} + onClick={() => + router.push( + `/admin/assistants/${persona.id}?u=${Date.now()}` + ) + } /> )}

@@ -92,7 +108,7 @@ export function PersonasTable({ personas }: { personas: Persona[] }) { > {persona.description}

, - persona.default_persona ? "Yes" : "No", + ,
{ @@ -138,7 +154,7 @@ export function PersonasTable({ personas }: { personas: Persona[] }) { onClick={async () => { const response = await deletePersona(persona.id); if (response.ok) { - router.push(`/admin/personas?u=${Date.now()}`); + router.refresh(); } else { alert( `Failed to delete persona - ${await response.text()}` diff --git a/web/src/app/admin/personas/[personaId]/DeletePersonaButton.tsx b/web/src/app/admin/assistants/[id]/DeletePersonaButton.tsx similarity index 58% rename from web/src/app/admin/personas/[personaId]/DeletePersonaButton.tsx rename to web/src/app/admin/assistants/[id]/DeletePersonaButton.tsx index f7e96041c..0bbc268fa 100644 --- a/web/src/app/admin/personas/[personaId]/DeletePersonaButton.tsx +++ b/web/src/app/admin/assistants/[id]/DeletePersonaButton.tsx @@ -4,8 +4,15 @@ import { Button } from "@tremor/react"; import { FiTrash } from "react-icons/fi"; import { deletePersona } from "../lib"; import { useRouter } from "next/navigation"; +import { SuccessfulPersonaUpdateRedirectType } from "../enums"; -export function DeletePersonaButton({ personaId }: { personaId: number }) { +export function DeletePersonaButton({ + personaId, + redirectType, +}: { + personaId: number; + redirectType: SuccessfulPersonaUpdateRedirectType; +}) { const router = useRouter(); return ( @@ -15,7 +22,11 @@ export function DeletePersonaButton({ personaId }: { personaId: number }) { onClick={async () => { const response = await deletePersona(personaId); if (response.ok) { - router.push(`/admin/personas?u=${Date.now()}`); + router.push( + redirectType === SuccessfulPersonaUpdateRedirectType.ADMIN + ? `/admin/assistants?u=${Date.now()}` + : `/chat` + ); } else { alert(`Failed to delete persona - ${await response.text()}`); } diff --git a/web/src/app/admin/assistants/[id]/page.tsx b/web/src/app/admin/assistants/[id]/page.tsx new file mode 100644 index 000000000..86b735360 --- /dev/null +++ b/web/src/app/admin/assistants/[id]/page.tsx @@ -0,0 +1,52 @@ +import { ErrorCallout } from "@/components/ErrorCallout"; +import { AssistantEditor } from "../AssistantEditor"; +import { BackButton } from "@/components/BackButton"; +import { Card, Title } from "@tremor/react"; +import { DeletePersonaButton } from "./DeletePersonaButton"; +import { fetchAssistantEditorInfoSS } from "@/lib/assistants/fetchPersonaEditorInfoSS"; +import { SuccessfulPersonaUpdateRedirectType } from "../enums"; +import { RobotIcon } from "@/components/icons/icons"; +import { AdminPageTitle } from "@/components/admin/Title"; + +export default async function Page({ params }: { params: { id: string } }) { + const [values, error] = await fetchAssistantEditorInfoSS(params.id); + + let body; + if (!values) { + body = ( + + ); + } else { + body = ( + <> + + + + +
+ Delete Assistant +
+ +
+
+ + ); + } + + return ( +
+ + + } /> + + {body} +
+ ); +} diff --git a/web/src/app/admin/assistants/enums.ts b/web/src/app/admin/assistants/enums.ts new file mode 100644 index 000000000..602d1692f --- /dev/null +++ b/web/src/app/admin/assistants/enums.ts @@ -0,0 +1,4 @@ +export enum SuccessfulPersonaUpdateRedirectType { + ADMIN = "ADMIN", + CHAT = "CHAT", +} diff --git a/web/src/app/admin/personas/interfaces.ts b/web/src/app/admin/assistants/interfaces.ts similarity index 56% rename from web/src/app/admin/personas/interfaces.ts rename to web/src/app/admin/assistants/interfaces.ts index 0ee5296ae..0a06ac4cc 100644 --- a/web/src/app/admin/personas/interfaces.ts +++ b/web/src/app/admin/assistants/interfaces.ts @@ -1,9 +1,15 @@ -import { DocumentSet } from "@/lib/types"; +import { ToolSnapshot } from "@/lib/tools/interfaces"; +import { DocumentSet, MinimalUserSnapshot } from "@/lib/types"; + +export interface StarterMessage { + name: string; + description: string | null; + message: string; +} export interface Prompt { id: number; name: string; - shared: boolean; description: string; system_prompt: string; task_prompt: string; @@ -15,15 +21,21 @@ export interface Prompt { export interface Persona { id: number; name: string; - shared: boolean; + owner: MinimalUserSnapshot | null; is_visible: boolean; + is_public: boolean; display_priority: number | null; description: string; document_sets: DocumentSet[]; prompts: Prompt[]; + tools: ToolSnapshot[]; num_chunks?: number; llm_relevance_filter?: boolean; llm_filter_extraction?: boolean; + llm_model_provider_override?: string; llm_model_version_override?: string; + starter_messages: StarterMessage[] | null; default_persona: boolean; + users: MinimalUserSnapshot[]; + groups: number[]; } diff --git a/web/src/app/admin/personas/lib.ts b/web/src/app/admin/assistants/lib.ts similarity index 80% rename from web/src/app/admin/personas/lib.ts rename to web/src/app/admin/assistants/lib.ts index f04c82eea..4d42789d8 100644 --- a/web/src/app/admin/personas/lib.ts +++ b/web/src/app/admin/assistants/lib.ts @@ -1,4 +1,4 @@ -import { Persona, Prompt } from "./interfaces"; +import { Persona, Prompt, StarterMessage } from "./interfaces"; interface PersonaCreationRequest { name: string; @@ -8,8 +8,14 @@ interface PersonaCreationRequest { document_set_ids: number[]; num_chunks: number | null; include_citations: boolean; + is_public: boolean; llm_relevance_filter: boolean | null; + llm_model_provider_override: string | null; llm_model_version_override: string | null; + starter_messages: StarterMessage[] | null; + users?: string[]; + groups: number[]; + tool_ids: number[]; // Added tool_ids to the interface } interface PersonaUpdateRequest { @@ -22,8 +28,14 @@ interface PersonaUpdateRequest { document_set_ids: number[]; num_chunks: number | null; include_citations: boolean; + is_public: boolean; llm_relevance_filter: boolean | null; + llm_model_provider_override: string | null; llm_model_version_override: string | null; + starter_messages: StarterMessage[] | null; + users?: string[]; + groups: number[]; + tool_ids: number[]; // Added tool_ids to the interface } function promptNameFromPersonaName(personaName: string) { @@ -49,7 +61,6 @@ function createPrompt({ body: JSON.stringify({ name: promptNameFromPersonaName(personaName), description: `Default prompt for persona ${personaName}`, - shared: true, system_prompt: systemPrompt, task_prompt: taskPrompt, include_citations: includeCitations, @@ -78,7 +89,6 @@ function updatePrompt({ body: JSON.stringify({ name: promptNameFromPersonaName(personaName), description: `Default prompt for persona ${personaName}`, - shared: true, system_prompt: systemPrompt, task_prompt: taskPrompt, include_citations: includeCitations, @@ -96,19 +106,28 @@ function buildPersonaAPIBody( document_set_ids, num_chunks, llm_relevance_filter, + is_public, + groups, + users, + tool_ids, // Added tool_ids to the destructuring } = creationRequest; return { name, description, - shared: true, num_chunks, llm_relevance_filter, llm_filter_extraction: false, + is_public, recency_bias: "base_decay", prompt_ids: [promptId], document_set_ids, + llm_model_provider_override: creationRequest.llm_model_provider_override, llm_model_version_override: creationRequest.llm_model_version_override, + starter_messages: creationRequest.starter_messages, + users, + groups, + tool_ids, // Added tool_ids to the return object }; } @@ -128,7 +147,7 @@ export async function createPersona( const createPersonaResponse = promptId !== null - ? await fetch("/api/admin/persona", { + ? await fetch("/api/persona", { method: "POST", headers: { "Content-Type": "application/json", @@ -171,7 +190,7 @@ export async function updatePersona( const updatePersonaResponse = promptResponse.ok && promptId - ? await fetch(`/api/admin/persona/${id}`, { + ? await fetch(`/api/persona/${id}`, { method: "PATCH", headers: { "Content-Type": "application/json", @@ -186,7 +205,7 @@ export async function updatePersona( } export function deletePersona(personaId: number) { - return fetch(`/api/admin/persona/${personaId}`, { + return fetch(`/api/persona/${personaId}`, { method: "DELETE", }); } @@ -214,9 +233,27 @@ function smallerNumberFirstComparator(a: number, b: number) { return a > b ? 1 : -1; } +function closerToZeroNegativesFirstComparator(a: number, b: number) { + if (a < 0 && b > 0) { + return -1; + } + if (a > 0 && b < 0) { + return 1; + } + + const absA = Math.abs(a); + const absB = Math.abs(b); + + if (absA === absB) { + return a > b ? 1 : -1; + } + + return absA > absB ? 1 : -1; +} + export function personaComparator(a: Persona, b: Persona) { if (a.display_priority === null && b.display_priority === null) { - return smallerNumberFirstComparator(a.id, b.id); + return closerToZeroNegativesFirstComparator(a.id, b.id); } if (a.display_priority !== b.display_priority) { @@ -230,5 +267,5 @@ export function personaComparator(a: Persona, b: Persona) { return smallerNumberFirstComparator(a.display_priority, b.display_priority); } - return smallerNumberFirstComparator(a.id, b.id); + return closerToZeroNegativesFirstComparator(a.id, b.id); } diff --git a/web/src/app/admin/assistants/new/page.tsx b/web/src/app/admin/assistants/new/page.tsx new file mode 100644 index 000000000..343d78c7e --- /dev/null +++ b/web/src/app/admin/assistants/new/page.tsx @@ -0,0 +1,42 @@ +import { AssistantEditor } from "../AssistantEditor"; +import { ErrorCallout } from "@/components/ErrorCallout"; +import { RobotIcon } from "@/components/icons/icons"; +import { BackButton } from "@/components/BackButton"; +import { Card } from "@tremor/react"; +import { AdminPageTitle } from "@/components/admin/Title"; +import { fetchAssistantEditorInfoSS } from "@/lib/assistants/fetchPersonaEditorInfoSS"; +import { SuccessfulPersonaUpdateRedirectType } from "../enums"; + +export default async function Page() { + const [values, error] = await fetchAssistantEditorInfoSS(); + + let body; + if (!values) { + body = ( + + ); + } else { + body = ( + + + + ); + } + + return ( +
+ + + } + /> + + {body} +
+ ); +} diff --git a/web/src/app/admin/personas/page.tsx b/web/src/app/admin/assistants/page.tsx similarity index 77% rename from web/src/app/admin/personas/page.tsx rename to web/src/app/admin/assistants/page.tsx index b4e42f579..55b89045e 100644 --- a/web/src/app/admin/personas/page.tsx +++ b/web/src/app/admin/assistants/page.tsx @@ -9,7 +9,7 @@ import { RobotIcon } from "@/components/icons/icons"; import { AdminPageTitle } from "@/components/admin/Title"; export default async function Page() { - const personaResponse = await fetchSS("/persona"); + const personaResponse = await fetchSS("/admin/persona"); if (!personaResponse.ok) { return ( @@ -24,11 +24,11 @@ export default async function Page() { return (
- } title="Personas" /> + } title="Assistants" /> - Personas are a way to build custom search/question-answering experiences - for different use cases. + Assistants are a way to build custom search/question-answering + experiences for different use cases. They allow you to customize:
@@ -43,20 +43,20 @@ export default async function Page() {
- Create a Persona + Create an Assistant
- New Persona + New Assistant
- Existing Personas + Existing Assistants
diff --git a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx index 06efc724a..6a5984125 100644 --- a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx +++ b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx @@ -28,7 +28,7 @@ import { Text, } from "@tremor/react"; import { useRouter } from "next/navigation"; -import { Persona } from "../personas/interfaces"; +import { Persona } from "../assistants/interfaces"; import { useState } from "react"; import { BookmarkIcon, RobotIcon } from "@/components/icons/icons"; @@ -90,9 +90,13 @@ export const SlackBotCreationForm = ({ !isPersonaASlackBotPersona(existingSlackBotConfig.persona) ? existingSlackBotConfig.persona.id : null, + response_type: existingSlackBotConfig?.response_type || "citations", }} validationSchema={Yup.object().shape({ channel_names: Yup.array().of(Yup.string()), + response_type: Yup.string() + .oneOf(["quotes", "citations"]) + .required(), answer_validity_check_enabled: Yup.boolean().required(), questionmark_prefilter_enabled: Yup.boolean().required(), respond_tag_only: Yup.boolean().required(), @@ -171,6 +175,33 @@ export const SlackBotCreationForm = ({
} /> + + + If set to Citations, DanswerBot will respond with a direct + answer with inline citations. It will also provide links + to these cited documents below the answer. When in doubt, + choose this option. +
+
+ If set to Quotes, DanswerBot will respond with a direct + answer as well as with quotes pulled from the context + documents to support that answer. DanswerBot will also + give a list of relevant documents. Choose this option if + you want a very detailed response AND/OR a list of + relevant documents would be useful just in case the LLM + missed anything. + + } + options={[ + { name: "Citations", value: "citations" }, + { name: "Quotes", value: "quotes" }, + ]} + /> + When should DanswerBot respond? diff --git a/web/src/app/admin/bot/[id]/page.tsx b/web/src/app/admin/bot/[id]/page.tsx index 1fc544e5d..2cc5eaea1 100644 --- a/web/src/app/admin/bot/[id]/page.tsx +++ b/web/src/app/admin/bot/[id]/page.tsx @@ -6,7 +6,7 @@ import { ErrorCallout } from "@/components/ErrorCallout"; import { DocumentSet, SlackBotConfig } from "@/lib/types"; import { Text } from "@tremor/react"; import { BackButton } from "@/components/BackButton"; -import { Persona } from "../../personas/interfaces"; +import { Persona } from "../../assistants/interfaces"; import { InstantSSRAutoRefresh } from "@/components/SSRAutoRefresh"; async function Page({ params }: { params: { id: string } }) { diff --git a/web/src/app/admin/bot/lib.ts b/web/src/app/admin/bot/lib.ts index 3c0db3274..b6c6bec16 100644 --- a/web/src/app/admin/bot/lib.ts +++ b/web/src/app/admin/bot/lib.ts @@ -1,5 +1,9 @@ -import { ChannelConfig, SlackBotTokens } from "@/lib/types"; -import { Persona } from "../personas/interfaces"; +import { + ChannelConfig, + SlackBotResponseType, + SlackBotTokens, +} from "@/lib/types"; +import { Persona } from "../assistants/interfaces"; interface SlackBotConfigCreationRequest { document_sets: number[]; @@ -12,6 +16,7 @@ interface SlackBotConfigCreationRequest { respond_team_member_list: string[]; follow_up_tags?: string[]; usePersona: boolean; + response_type: SlackBotResponseType; } const buildFiltersFromCreationRequest = ( @@ -40,6 +45,7 @@ const buildRequestBodyFromCreationRequest = ( ...(creationRequest.usePersona ? { persona_id: creationRequest.persona_id } : { document_sets: creationRequest.document_sets }), + response_type: creationRequest.response_type, }); }; diff --git a/web/src/app/admin/bot/new/page.tsx b/web/src/app/admin/bot/new/page.tsx index 93c9c7f46..3c4972b92 100644 --- a/web/src/app/admin/bot/new/page.tsx +++ b/web/src/app/admin/bot/new/page.tsx @@ -6,7 +6,7 @@ import { ErrorCallout } from "@/components/ErrorCallout"; import { DocumentSet } from "@/lib/types"; import { BackButton } from "@/components/BackButton"; import { Text } from "@tremor/react"; -import { Persona } from "../../personas/interfaces"; +import { Persona } from "../../assistants/interfaces"; async function Page() { const tasks = [fetchSS("/manage/document-set"), fetchSS("/persona")]; diff --git a/web/src/app/admin/bot/page.tsx b/web/src/app/admin/bot/page.tsx index ddbb15f86..bdf081be6 100644 --- a/web/src/app/admin/bot/page.tsx +++ b/web/src/app/admin/bot/page.tsx @@ -34,6 +34,7 @@ import { } from "react-icons/fi"; import Link from "next/link"; import { InstantSSRAutoRefresh } from "@/components/SSRAutoRefresh"; +import { ErrorCallout } from "@/components/ErrorCallout"; const numToDisplay = 50; @@ -95,7 +96,7 @@ const SlackBotConfigsTable = ({ {slackBotConfig.persona && !isPersonaASlackBotPersona(slackBotConfig.persona) ? ( @@ -178,8 +179,16 @@ const Main = () => { return ; } - if (slackBotConfigsError || !slackBotConfigs) { - return
Error: {slackBotConfigsError}
; + if (slackBotConfigsError || !slackBotConfigs || !slackBotConfigs) { + return ( + + ); } return ( diff --git a/web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx b/web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx index 42da01e18..c959b9037 100644 --- a/web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx +++ b/web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx @@ -11,7 +11,7 @@ function convertObjectToString(obj: any): string | any { if (obj.length === 0) { return null; } - return obj.map((item) => convertObjectToString(item)); + return obj.map((item) => convertObjectToString(item)).join(", "); } } if (typeof obj === "boolean") { diff --git a/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx b/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx index 204e371b8..eeca26197 100644 --- a/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx +++ b/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx @@ -108,7 +108,19 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) {
)} - {indexAttempt.new_docs_indexed} + +
+
+
{indexAttempt.new_docs_indexed}
+ {indexAttempt.docs_removed_from_index > 0 && ( +
+ (also removed {indexAttempt.docs_removed_from_index}{" "} + docs that were detected as deleted in the source) +
+ )} +
+
+
{indexAttempt.total_docs_indexed}
@@ -116,14 +128,14 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) { {indexAttempt.error_msg || "-"} {indexAttempt.full_exception_trace && ( - - setIndexAttemptTracePopupId(indexAttempt.id) - } - className="mt-2 text-link cursor-pointer" +
{ + setIndexAttemptTracePopupId(indexAttempt.id); + }} + className="mt-2 text-link cursor-pointer select-none" > View Full Trace - +
)}
diff --git a/web/src/app/admin/connectors/axero/page.tsx b/web/src/app/admin/connectors/axero/page.tsx new file mode 100644 index 000000000..ccabc380c --- /dev/null +++ b/web/src/app/admin/connectors/axero/page.tsx @@ -0,0 +1,251 @@ +"use client"; + +import * as Yup from "yup"; +import { AxeroIcon, TrashIcon } from "@/components/icons/icons"; +import { fetcher } from "@/lib/fetcher"; +import useSWR, { useSWRConfig } from "swr"; +import { LoadingAnimation } from "@/components/Loading"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; +import { + AxeroConfig, + AxeroCredentialJson, + ConnectorIndexingStatus, + Credential, +} from "@/lib/types"; +import { adminDeleteCredential, linkCredential } from "@/lib/credential"; +import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; +import { + TextFormField, + TextArrayFieldBuilder, + BooleanFormField, + TextArrayField, +} from "@/components/admin/connectors/Field"; +import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; +import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; +import { usePublicCredentials } from "@/lib/hooks"; +import { Button, Card, Divider, Text, Title } from "@tremor/react"; +import { AdminPageTitle } from "@/components/admin/Title"; + +const MainSection = () => { + const { mutate } = useSWRConfig(); + const { + data: connectorIndexingStatuses, + isLoading: isConnectorIndexingStatusesLoading, + error: isConnectorIndexingStatusesError, + } = useSWR[]>( + "/api/manage/admin/connector/indexing-status", + fetcher + ); + + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: isCredentialsError, + refreshCredentials, + } = usePublicCredentials(); + + if ( + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) + ) { + return ; + } + + if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) { + return
Failed to load connectors
; + } + + if (isCredentialsError || !credentialsData) { + return
Failed to load credentials
; + } + + const axeroConnectorIndexingStatuses: ConnectorIndexingStatus< + AxeroConfig, + AxeroCredentialJson + >[] = connectorIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "axero" + ); + const axeroCredential: Credential | undefined = + credentialsData.find( + (credential) => credential.credential_json?.axero_api_token + ); + + return ( + <> + + Step 1: Provide Axero API Key + + {axeroCredential ? ( + <> +
+ Existing Axero API Key: + + {axeroCredential.credential_json.axero_api_token} + + +
+ + ) : ( + <> +

+ To use the Axero connector, first follow the guide{" "} + + here + {" "} + to generate an API Key. +

+ + + formBody={ + <> + + + + } + validationSchema={Yup.object().shape({ + base_url: Yup.string().required( + "Please enter the base URL of your Axero instance" + ), + axero_api_token: Yup.string().required( + "Please enter your Axero API Token" + ), + })} + initialValues={{ + base_url: "", + axero_api_token: "", + }} + onSubmit={(isSuccess) => { + if (isSuccess) { + refreshCredentials(); + } + }} + /> + + + )} + + + Step 2: Which spaces do you want to connect? + + + {axeroConnectorIndexingStatuses.length > 0 && ( + <> + + We pull the latest Articles, Blogs, Wikis and{" "} + Forums once per day. + +
+ + connectorIndexingStatuses={axeroConnectorIndexingStatuses} + liveCredential={axeroCredential} + getCredential={(credential) => + credential.credential_json.axero_api_token + } + specialColumns={[ + { + header: "Space", + key: "spaces", + getValue: (ccPairStatus) => { + const connectorConfig = + ccPairStatus.connector.connector_specific_config; + return connectorConfig.spaces && + connectorConfig.spaces.length > 0 + ? connectorConfig.spaces.join(", ") + : ""; + }, + }, + ]} + onUpdate={() => + mutate("/api/manage/admin/connector/indexing-status") + } + onCredentialLink={async (connectorId) => { + if (axeroCredential) { + await linkCredential(connectorId, axeroCredential.id); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + /> +
+ + + )} + + {axeroCredential ? ( + +

Configure an Axero Connector

+ + nameBuilder={(values) => + values.spaces + ? `AxeroConnector-${values.spaces.join("_")}` + : `AxeroConnector` + } + source="axero" + inputType="poll" + formBodyBuilder={(values) => { + return ( + <> + + {TextArrayFieldBuilder({ + name: "spaces", + label: "Space IDs:", + subtext: ` + Specify zero or more Spaces to index (by the Space IDs). If no Space IDs + are specified, all Spaces will be indexed.`, + })(values)} + + ); + }} + validationSchema={Yup.object().shape({ + spaces: Yup.array() + .of(Yup.string().required("Space Ids cannot be empty")) + .required(), + })} + initialValues={{ + spaces: [], + }} + refreshFreq={60 * 60 * 24} // 1 day + credentialId={axeroCredential.id} + /> +
+ ) : ( + + Please provide your Axero API Token in Step 1 first! Once done with + that, you can then specify which spaces you want to connect. + + )} + + ); +}; + +export default function Page() { + return ( +
+
+ +
+ + } title="Axero" /> + + +
+ ); +} diff --git a/web/src/app/admin/connectors/confluence/page.tsx b/web/src/app/admin/connectors/confluence/page.tsx index 2e1ded53a..649d8853e 100644 --- a/web/src/app/admin/connectors/confluence/page.tsx +++ b/web/src/app/admin/connectors/confluence/page.tsx @@ -43,7 +43,10 @@ const extractSpaceFromDataCenterUrl = (wikiUrl: string): string => { // Copied from the `extract_confluence_keys_from_url` function const extractSpaceFromUrl = (wikiUrl: string): string | null => { try { - if (wikiUrl.includes(".atlassian.net/wiki/spaces/")) { + if ( + wikiUrl.includes(".atlassian.net/wiki/spaces/") || + wikiUrl.includes(".jira.com/wiki/spaces/") + ) { return extractSpaceFromCloudUrl(wikiUrl); } return extractSpaceFromDataCenterUrl(wikiUrl); diff --git a/web/src/app/admin/connectors/discourse/page.tsx b/web/src/app/admin/connectors/discourse/page.tsx new file mode 100644 index 000000000..5a2459760 --- /dev/null +++ b/web/src/app/admin/connectors/discourse/page.tsx @@ -0,0 +1,274 @@ +"use client"; + +import * as Yup from "yup"; +import { DiscourseIcon, TrashIcon } from "@/components/icons/icons"; +import { + TextFormField, + TextArrayFieldBuilder, +} from "@/components/admin/connectors/Field"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; +import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; +import { + Credential, + ConnectorIndexingStatus, + DiscourseConfig, + DiscourseCredentialJson, +} from "@/lib/types"; +import useSWR, { useSWRConfig } from "swr"; +import { fetcher } from "@/lib/fetcher"; +import { LoadingAnimation } from "@/components/Loading"; +import { adminDeleteCredential, linkCredential } from "@/lib/credential"; +import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; +import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { usePublicCredentials } from "@/lib/hooks"; +import { Card, Divider, Text, Title } from "@tremor/react"; +import { AdminPageTitle } from "@/components/admin/Title"; + +const Main = () => { + const { popup, setPopup } = usePopup(); + + const { mutate } = useSWRConfig(); + const { + data: connectorIndexingStatuses, + isLoading: isConnectorIndexingStatusesLoading, + error: isConnectorIndexingStatusesError, + } = useSWR[]>( + "/api/manage/admin/connector/indexing-status", + fetcher + ); + + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: isCredentialsError, + refreshCredentials, + } = usePublicCredentials(); + + if ( + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) + ) { + return ; + } + + if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) { + return
Failed to load connectors
; + } + + if (isCredentialsError || !credentialsData) { + return
Failed to load credentials
; + } + + const discourseConnectorIndexingStatuses: ConnectorIndexingStatus< + DiscourseConfig, + DiscourseCredentialJson + >[] = connectorIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "discourse" + ); + const discourseCredential: Credential | undefined = + credentialsData.find( + (credential) => credential.credential_json?.discourse_api_username + ); + + return ( + <> + {popup} + + This connector allows you to sync all your Discourse Topics into + Danswer. More details on how to setup the Discourse connector can be + found in{" "} + + this guide. + + + + + Step 1: Provide your API Access info + + + {discourseCredential ? ( + <> +
+

Existing API Key:

+

+ {discourseCredential.credential_json?.discourse_api_key} +

+ +
+ + ) : ( + <> + + + formBody={ + <> + + + + } + validationSchema={Yup.object().shape({ + discourse_api_username: Yup.string().required( + "Please enter the Username associated with the API key" + ), + discourse_api_key: Yup.string().required( + "Please enter the API key" + ), + })} + initialValues={{ + discourse_api_username: "", + discourse_api_key: "", + }} + onSubmit={(isSuccess) => { + if (isSuccess) { + refreshCredentials(); + } + }} + /> + + + )} + + + Step 2: Which Categories do you want to make searchable? + + + {discourseConnectorIndexingStatuses.length > 0 && ( + <> + + We pull Topics with new Posts every 10 minutes. + +
+ + connectorIndexingStatuses={discourseConnectorIndexingStatuses} + liveCredential={discourseCredential} + getCredential={(credential) => + credential.credential_json.discourse_api_username + } + specialColumns={[ + { + header: "Categories", + key: "categories", + getValue: (ccPairStatus) => + ccPairStatus.connector.connector_specific_config + .categories && + ccPairStatus.connector.connector_specific_config.categories + .length > 0 + ? ccPairStatus.connector.connector_specific_config.categories.join( + ", " + ) + : "", + }, + ]} + includeName={true} + onUpdate={() => + mutate("/api/manage/admin/connector/indexing-status") + } + onCredentialLink={async (connectorId) => { + if (discourseCredential) { + await linkCredential(connectorId, discourseCredential.id); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + /> +
+ + + )} + + {discourseCredential ? ( + <> + +

Create a new Discourse Connector

+ + nameBuilder={(values) => + values.categories + ? `${values.base_url}-${values.categories.join("_")}` + : `${values.base_url}-All` + } + source="discourse" + inputType="poll" + formBody={ + <> + + + } + formBodyBuilder={TextArrayFieldBuilder({ + name: "categories", + label: "Categories:", + subtext: + "Specify 0 or more Categories to index. If no Categories are specified, Topics from " + + "all categories will be indexed.", + })} + validationSchema={Yup.object().shape({ + base_url: Yup.string().required( + "Please the base URL of your Discourse site." + ), + categories: Yup.array().of( + Yup.string().required("Category names must be strings") + ), + })} + initialValues={{ + categories: [], + base_url: "", + }} + refreshFreq={10 * 60} // 10 minutes + credentialId={discourseCredential.id} + /> +
+ + ) : ( + + Please provide your API Key Info in Step 1 first! Once done with that, + you can then start indexing all your Discourse Topics. + + )} + + ); +}; + +export default function Page() { + return ( +
+
+ +
+ + } title="Discourse" /> + +
+
+ ); +} diff --git a/web/src/app/admin/connectors/file/page.tsx b/web/src/app/admin/connectors/file/page.tsx index b963b2d94..a8193729e 100644 --- a/web/src/app/admin/connectors/file/page.tsx +++ b/web/src/app/admin/connectors/file/page.tsx @@ -52,9 +52,12 @@ const Main = () => { {filesAreUploading && } Specify files below, click the Upload button, and the contents of - these files will be searchable via Danswer! Currently only .txt,{" "} - .pdf and .zip files (containing only .txt files) - are supported. + these files will be searchable via Danswer! Currently supported file + types include .txt, .pdf, .docx, .pptx,{" "} + .xlsx, .csv, .md, .mdx, .conf,{" "} + .log, .json, .tsv, .xml, .yml,{" "} + .yaml, .eml, .epub, and finally .zip files + (containing supported file types). NOTE: if the original document is accessible via a link, you can diff --git a/web/src/app/admin/connectors/gong/page.tsx b/web/src/app/admin/connectors/gong/page.tsx index d00beb0e7..e617450d1 100644 --- a/web/src/app/admin/connectors/gong/page.tsx +++ b/web/src/app/admin/connectors/gong/page.tsx @@ -41,15 +41,13 @@ const Main = () => { const { data: credentialsData, isLoading: isCredentialsLoading, - isValidating: isCredentialsValidating, error: isCredentialsError, refreshCredentials, } = usePublicCredentials(); if ( - isConnectorIndexingStatusesLoading || - isCredentialsLoading || - isCredentialsValidating + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) ) { return ; } diff --git a/web/src/app/admin/connectors/jira/page.tsx b/web/src/app/admin/connectors/jira/page.tsx index 7e46fcee6..a9449f5f0 100644 --- a/web/src/app/admin/connectors/jira/page.tsx +++ b/web/src/app/admin/connectors/jira/page.tsx @@ -2,12 +2,16 @@ import * as Yup from "yup"; import { JiraIcon, TrashIcon } from "@/components/icons/icons"; -import { TextFormField } from "@/components/admin/connectors/Field"; +import { + TextFormField, + TextArrayFieldBuilder, +} from "@/components/admin/connectors/Field"; import { HealthCheckBanner } from "@/components/health/healthcheck"; import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; import { JiraConfig, JiraCredentialJson, + JiraServerCredentialJson, ConnectorIndexingStatus, } from "@/lib/types"; import useSWR, { useSWRConfig } from "swr"; @@ -71,7 +75,7 @@ const Main = () => { const jiraConnectorIndexingStatuses: ConnectorIndexingStatus< JiraConfig, - JiraCredentialJson + JiraCredentialJson | JiraServerCredentialJson >[] = connectorIndexingStatuses.filter( (connectorIndexingStatus) => connectorIndexingStatus.connector.source === "jira" @@ -90,12 +94,6 @@ const Main = () => { {jiraCredential ? ( <>
- {/*
-

Existing Username:

-

- {confluenceCredential.credential_json?.confluence_username} -

{" "} -
*/}

Existing Access Token:

{jiraCredential.credential_json?.jira_api_token} @@ -142,8 +140,10 @@ const Main = () => { > here {" "} - to generate an Access Token. + to generate an Access Token (for cloud) or Personal Access Token + (for server). Submit only one form. + Cloud formBody={ @@ -175,6 +175,33 @@ const Main = () => { }} /> + Server + + + formBody={ + <> + + + } + validationSchema={Yup.object().shape({ + jira_api_token: Yup.string().required( + "Please enter your Jira personal access token" + ), + })} + initialValues={{ + jira_api_token: "", + }} + onSubmit={(isSuccess) => { + if (isSuccess) { + refreshCredentials(); + } + }} + /> + )} @@ -202,7 +229,10 @@ const Main = () => { below every 10 minutes.

- + connectorIndexingStatuses={jiraConnectorIndexingStatuses} liveCredential={jiraCredential} getCredential={(credential) => { @@ -235,6 +265,18 @@ const Main = () => { ); }, }, + { + header: "Disable comments from users", + key: "comment_email_blacklist", + getValue: (ccPairStatus) => { + const connectorConfig = + ccPairStatus.connector.connector_specific_config; + return connectorConfig.comment_email_blacklist && + connectorConfig.comment_email_blacklist.length > 0 + ? connectorConfig.comment_email_blacklist.join(", ") + : ""; + }, + }, ]} onUpdate={() => mutate("/api/manage/admin/connector/indexing-status") @@ -264,13 +306,30 @@ const Main = () => { /> } + formBodyBuilder={(values) => { + return ( + <> + + {TextArrayFieldBuilder({ + name: "comment_email_blacklist", + label: "Disable comments from users:", + subtext: ` + This is generally useful to ignore certain bots. Add user emails which comments should NOT be indexed.`, + })(values)} + + ); + }} validationSchema={Yup.object().shape({ jira_project_url: Yup.string().required( "Please enter any link to your jira project e.g. https://danswer.atlassian.net/jira/software/projects/DAN/boards/1" ), + comment_email_blacklist: Yup.array() + .of(Yup.string().required("Emails names must be strings")) + .required(), })} initialValues={{ jira_project_url: "", + comment_email_blacklist: [], }} refreshFreq={10 * 60} // 10 minutes /> diff --git a/web/src/app/admin/connectors/linear/page.tsx b/web/src/app/admin/connectors/linear/page.tsx index 76c4a13a8..1b601ec8e 100644 --- a/web/src/app/admin/connectors/linear/page.tsx +++ b/web/src/app/admin/connectors/linear/page.tsx @@ -37,14 +37,12 @@ const Main = () => { data: credentialsData, isLoading: isCredentialsLoading, error: isCredentialsError, - isValidating: isCredentialsValidating, refreshCredentials, } = usePublicCredentials(); if ( - isConnectorIndexingStatusesLoading || - isCredentialsLoading || - isCredentialsValidating + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) ) { return ; } diff --git a/web/src/app/admin/connectors/mediawiki/page.tsx b/web/src/app/admin/connectors/mediawiki/page.tsx new file mode 100644 index 000000000..f4dd04a6b --- /dev/null +++ b/web/src/app/admin/connectors/mediawiki/page.tsx @@ -0,0 +1,208 @@ +"use client"; + +import * as Yup from "yup"; +import { MediaWikiIcon, TrashIcon } from "@/components/icons/icons"; +import { + TextArrayField, + TextArrayFieldBuilder, + TextFormField, +} from "@/components/admin/connectors/Field"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; +import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; +import { + MediaWikiCredentialJson, + MediaWikiConfig, + ConnectorIndexingStatus, + Credential, +} from "@/lib/types"; +import useSWR, { useSWRConfig } from "swr"; +import { fetcher } from "@/lib/fetcher"; +import { LoadingAnimation } from "@/components/Loading"; +import { adminDeleteCredential, linkCredential } from "@/lib/credential"; +import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; +import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { usePublicCredentials } from "@/lib/hooks"; +import { AdminPageTitle } from "@/components/admin/Title"; +import { Card, Text, Title } from "@tremor/react"; + +const Main = () => { + const { popup, setPopup } = usePopup(); + + const { mutate } = useSWRConfig(); + const { + data: connectorIndexingStatuses, + isLoading: isConnectorIndexingStatusesLoading, + error: isConnectorIndexingStatusesError, + } = useSWR[]>( + "/api/manage/admin/connector/indexing-status", + fetcher + ); + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: isCredentialsError, + refreshCredentials, + } = usePublicCredentials(); + + if ( + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) + ) { + return ; + } + + if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) { + return
Failed to load connectors
; + } + + if (isCredentialsError || !credentialsData) { + return
Failed to load credentials
; + } + + const mediawikiConnectorIndexingStatuses: ConnectorIndexingStatus< + MediaWikiConfig, + MediaWikiCredentialJson + >[] = connectorIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "mediawiki" + ); + const mediawikiCredential: Credential | undefined = + credentialsData.find((credential) => true); + + return ( + <> + {popup} + {mediawikiConnectorIndexingStatuses.length > 0 && ( + <> + + MediaWiki indexing status + + + The latest page, chapter, book and shelf changes are fetched every + 10 minutes. + +
+ + connectorIndexingStatuses={mediawikiConnectorIndexingStatuses} + liveCredential={mediawikiCredential} + getCredential={(credential) => { + return
; + }} + onCredentialLink={async (connectorId) => { + if (mediawikiCredential) { + await linkCredential(connectorId, mediawikiCredential.id); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + onUpdate={() => + mutate("/api/manage/admin/connector/indexing-status") + } + /> +
+ + )} + + {mediawikiCredential && ( + <> + +

Create Connection

+ + Press connect below to start the connection to your MediaWiki + instance. + + + nameBuilder={(values) => + `MediaWikiConnector-${values.connector_name}` + } + ccPairNameBuilder={(values) => + `MediaWikiConnector-${values.connector_name}` + } + source="mediawiki" + inputType="poll" + formBodyBuilder={(values) => ( +
+ + + + {TextArrayFieldBuilder({ + name: "pages", + label: "Pages to index:", + subtext: + "Specify 0 or more names of pages to index. Only specify the name of the page, not its url.", + })(values)} + {TextArrayFieldBuilder({ + name: "categories", + label: "Categories to index:", + subtext: + "Specify 0 or more names of categories to index. For most MediaWiki sites, these are pages" + + " with a name of the form 'Category: XYZ', that are lists of other pages/categories. Only" + + " specify the name of the category, not its url.", + })(values)} + +
+ )} + validationSchema={Yup.object().shape({ + connector_name: Yup.string().required( + "Please enter a name for your MediaWiki connector." + ), + hostname: Yup.string().required( + "Please enter the base URL for your MediaWiki site" + ), + language_code: Yup.string().default("en"), + categories: Yup.array().of( + Yup.string().required( + "Please enter categories to index from your MediaWiki site" + ) + ), + pages: Yup.array().of( + Yup.string().required( + "Please enter pages to index from your MediaWiki site" + ) + ), + recurse_depth: Yup.number().required( + "Please enter the recursion depth for your MediaWiki site." + ), + })} + initialValues={{ + connector_name: "", + hostname: "", + language_code: "en", + categories: [], + pages: [], + recurse_depth: 0, + }} + refreshFreq={10 * 60} // 10 minutes + credentialId={mediawikiCredential.id} + /> +
+ + )} + + ); +}; + +export default function Page() { + return ( +
+
+ +
+ + } title="MediaWiki" /> + +
+
+ ); +} diff --git a/web/src/app/admin/connectors/wikipedia/page.tsx b/web/src/app/admin/connectors/wikipedia/page.tsx new file mode 100644 index 000000000..02c89a620 --- /dev/null +++ b/web/src/app/admin/connectors/wikipedia/page.tsx @@ -0,0 +1,203 @@ +"use client"; + +import * as Yup from "yup"; +import { WikipediaIcon, TrashIcon } from "@/components/icons/icons"; +import { + TextArrayField, + TextArrayFieldBuilder, + TextFormField, +} from "@/components/admin/connectors/Field"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; +import { CredentialForm } from "@/components/admin/connectors/CredentialForm"; +import { + WikipediaCredentialJson, + WikipediaConfig, + ConnectorIndexingStatus, + Credential, +} from "@/lib/types"; +import useSWR, { useSWRConfig } from "swr"; +import { fetcher } from "@/lib/fetcher"; +import { LoadingAnimation } from "@/components/Loading"; +import { adminDeleteCredential, linkCredential } from "@/lib/credential"; +import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm"; +import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { usePublicCredentials } from "@/lib/hooks"; +import { AdminPageTitle } from "@/components/admin/Title"; +import { Card, Text, Title } from "@tremor/react"; + +const Main = () => { + const { popup, setPopup } = usePopup(); + + const { mutate } = useSWRConfig(); + const { + data: connectorIndexingStatuses, + isLoading: isConnectorIndexingStatusesLoading, + error: isConnectorIndexingStatusesError, + } = useSWR[]>( + "/api/manage/admin/connector/indexing-status", + fetcher + ); + const { + data: credentialsData, + isLoading: isCredentialsLoading, + error: isCredentialsError, + refreshCredentials, + } = usePublicCredentials(); + + if ( + (!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) || + (!credentialsData && isCredentialsLoading) + ) { + return ; + } + + if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) { + return
Failed to load connectors
; + } + + if (isCredentialsError || !credentialsData) { + return
Failed to load credentials
; + } + + const wikipediaConnectorIndexingStatuses: ConnectorIndexingStatus< + WikipediaConfig, + WikipediaCredentialJson + >[] = connectorIndexingStatuses.filter( + (connectorIndexingStatus) => + connectorIndexingStatus.connector.source === "wikipedia" + ); + const wikipediaCredential: Credential | undefined = + credentialsData.find((credential) => true); + + return ( + <> + {popup} + {wikipediaConnectorIndexingStatuses.length > 0 && ( + <> + + Wikipedia indexing status + + + The latest page, chapter, book and shelf changes are fetched every + 10 minutes. + +
+ + connectorIndexingStatuses={wikipediaConnectorIndexingStatuses} + liveCredential={wikipediaCredential} + getCredential={(credential) => { + return
; + }} + onCredentialLink={async (connectorId) => { + if (wikipediaCredential) { + await linkCredential(connectorId, wikipediaCredential.id); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + onUpdate={() => + mutate("/api/manage/admin/connector/indexing-status") + } + /> +
+ + )} + + {wikipediaCredential && ( + <> + +

Create Connection

+ + Press connect below to start the connection to your Wikipedia + instance. + + + nameBuilder={(values) => + `WikipediaConnector-${values.connector_name}` + } + ccPairNameBuilder={(values) => + `WikipediaConnector-${values.connector_name}` + } + source="wikipedia" + inputType="poll" + formBodyBuilder={(values) => ( +
+ + + {TextArrayFieldBuilder({ + name: "pages", + label: "Pages to index:", + subtext: + "Specify 0 or more names of pages to index. Only specify the name of the page, not its url.", + })(values)} + {TextArrayFieldBuilder({ + name: "categories", + label: "Categories to index:", + subtext: + "Specify 0 or more names of categories to index. These are pages" + + " with a name of the form 'Category: XYZ', that are lists of other pages/categories. Only" + + " specify the name of the category, not its url.", + })(values)} + +
+ )} + validationSchema={Yup.object().shape({ + connector_name: Yup.string().required( + "Please enter a name for your Wikipedia connector." + ), + language_code: Yup.string().default("en"), + categories: Yup.array().of( + Yup.string().required( + "Please enter categories to index from your Wikipedia site" + ) + ), + pages: Yup.array().of( + Yup.string().required( + "Please enter pages to index from your Wikipedia site" + ) + ), + recurse_depth: Yup.number().required( + "Please enter the recursion depth for your Wikipedia site." + ), + })} + initialValues={{ + connector_name: "", + language_code: "en", + categories: [], + pages: [], + recurse_depth: 0, + }} + refreshFreq={10 * 60} // 10 minutes + credentialId={wikipediaCredential.id} + /> +
+ + )} + + ); +}; + +export default function Page() { + return ( +
+
+ +
+ + } title="Wikipedia" /> + +
+
+ ); +} diff --git a/web/src/app/admin/connectors/zendesk/page.tsx b/web/src/app/admin/connectors/zendesk/page.tsx index ec03e4e70..fe7239efe 100644 --- a/web/src/app/admin/connectors/zendesk/page.tsx +++ b/web/src/app/admin/connectors/zendesk/page.tsx @@ -80,7 +80,7 @@ const Main = () => {

Existing API Token:

- {zendeskCredential.credential_json?.zendesk_email} + {zendeskCredential.credential_json?.zendesk_token}

+ (isSelected + ? " bg-background-strong" + : " hover:bg-hover") + } + onClick={() => { + if (isSelected) { + arrayHelpers.remove(ind); + } else { + arrayHelpers.push(ccPair.cc_pair_id); + } + }} + > +
+ +
+
+ ); + })}
- + )} + /> + + {EE_ENABLED && userGroups && userGroups.length > 0 && ( +
+ + + + If the document set is public, then it will be visible to{" "} + all users. If it is not public, then only users in + the specified groups will be able to see it. + + } + /> + + +

+ Groups with Access +

+ {!values.is_public ? ( + <> + + If any groups are specified, then this Document Set will + only be visible to the specified groups. If no groups are + specified, then the Document Set will be visible to all + users. + + ( +
+ {userGroups.map((userGroup) => { + const ind = values.groups.indexOf(userGroup.id); + let isSelected = ind !== -1; + return ( +
{ + if (isSelected) { + arrayHelpers.remove(ind); + } else { + arrayHelpers.push(userGroup.id); + } + }} + > +
+ {" "} + {userGroup.name} +
+
+ ); + })} +
+ )} + /> + + ) : ( + + This Document Set is public, so this does not apply. If you + want to control which user groups see this Document Set, + mark it as non-public! + + )} +
)} - -
- +
+ +
+ + )} + ); }; diff --git a/web/src/app/admin/documents/sets/[documentSetId]/page.tsx b/web/src/app/admin/documents/sets/[documentSetId]/page.tsx new file mode 100644 index 000000000..960e11cc6 --- /dev/null +++ b/web/src/app/admin/documents/sets/[documentSetId]/page.tsx @@ -0,0 +1,110 @@ +"use client"; + +import { ErrorCallout } from "@/components/ErrorCallout"; +import { refreshDocumentSets, useDocumentSets } from "../hooks"; +import { + useConnectorCredentialIndexingStatus, + useUserGroups, +} from "@/lib/hooks"; +import { ThreeDotsLoader } from "@/components/Loading"; +import { AdminPageTitle } from "@/components/admin/Title"; +import { BookmarkIcon } from "@/components/icons/icons"; +import { BackButton } from "@/components/BackButton"; +import { Card } from "@tremor/react"; +import { DocumentSetCreationForm } from "../DocumentSetCreationForm"; +import { useRouter } from "next/navigation"; +import { usePopup } from "@/components/admin/connectors/Popup"; + +function Main({ documentSetId }: { documentSetId: number }) { + const router = useRouter(); + const { popup, setPopup } = usePopup(); + + const { + data: documentSets, + isLoading: isDocumentSetsLoading, + error: documentSetsError, + } = useDocumentSets(); + + const { + data: ccPairs, + isLoading: isCCPairsLoading, + error: ccPairsError, + } = useConnectorCredentialIndexingStatus(); + + // EE only + const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups(); + + if (isDocumentSetsLoading || isCCPairsLoading || userGroupsIsLoading) { + return ; + } + + if (documentSetsError || !documentSets) { + return ( + + ); + } + + if (ccPairsError || !ccPairs) { + return ( + + ); + } + + const documentSet = documentSets.find( + (documentSet) => documentSet.id === documentSetId + ); + if (!documentSet) { + return ( + + ); + } + + return ( +
+ {popup} + + } + title={documentSet.name} + /> + + + { + refreshDocumentSets(); + router.push("/admin/documents/sets"); + }} + setPopup={setPopup} + existingDocumentSet={documentSet} + /> + +
+ ); +} + +export default function Page({ + params, +}: { + params: { documentSetId: string }; +}) { + const documentSetId = parseInt(params.documentSetId); + + return ( +
+ + +
+
+ ); +} diff --git a/web/src/app/admin/documents/sets/hooks.tsx b/web/src/app/admin/documents/sets/hooks.tsx index 179e36385..fe7969ace 100644 --- a/web/src/app/admin/documents/sets/hooks.tsx +++ b/web/src/app/admin/documents/sets/hooks.tsx @@ -2,12 +2,23 @@ import { errorHandlingFetcher } from "@/lib/fetcher"; import { DocumentSet } from "@/lib/types"; import useSWR, { mutate } from "swr"; -export const useDocumentSets = () => { - const url = "/api/manage/document-set"; - const swrResponse = useSWR(url, errorHandlingFetcher); +const DOCUMENT_SETS_URL = "/api/manage/admin/document-set"; + +export function refreshDocumentSets() { + mutate(DOCUMENT_SETS_URL); +} + +export function useDocumentSets() { + const swrResponse = useSWR( + DOCUMENT_SETS_URL, + errorHandlingFetcher, + { + refreshInterval: 5000, // 5 seconds + } + ); return { ...swrResponse, - refreshDocumentSets: () => mutate(url), + refreshDocumentSets: refreshDocumentSets, }; -}; +} diff --git a/web/src/app/admin/documents/sets/lib.ts b/web/src/app/admin/documents/sets/lib.ts index 71ddcf8d9..2184504cc 100644 --- a/web/src/app/admin/documents/sets/lib.ts +++ b/web/src/app/admin/documents/sets/lib.ts @@ -1,13 +1,19 @@ interface DocumentSetCreationRequest { name: string; description: string; - ccPairIds: number[]; + cc_pair_ids: number[]; + is_public: boolean; + users: string[]; + groups: number[]; } export const createDocumentSet = async ({ name, description, - ccPairIds, + cc_pair_ids, + is_public, + users, + groups, }: DocumentSetCreationRequest) => { return fetch("/api/manage/admin/document-set", { method: "POST", @@ -17,7 +23,10 @@ export const createDocumentSet = async ({ body: JSON.stringify({ name, description, - cc_pair_ids: ccPairIds, + cc_pair_ids, + is_public, + users, + groups, }), }); }; @@ -25,13 +34,19 @@ export const createDocumentSet = async ({ interface DocumentSetUpdateRequest { id: number; description: string; - ccPairIds: number[]; + cc_pair_ids: number[]; + is_public: boolean; + users: string[]; + groups: number[]; } export const updateDocumentSet = async ({ id, description, - ccPairIds, + cc_pair_ids, + is_public, + users, + groups, }: DocumentSetUpdateRequest) => { return fetch("/api/manage/admin/document-set", { method: "PATCH", @@ -41,7 +56,10 @@ export const updateDocumentSet = async ({ body: JSON.stringify({ id, description, - cc_pair_ids: ccPairIds, + cc_pair_ids, + is_public, + users, + groups, }), }); }; diff --git a/web/src/app/admin/documents/sets/new/page.tsx b/web/src/app/admin/documents/sets/new/page.tsx new file mode 100644 index 000000000..24b337460 --- /dev/null +++ b/web/src/app/admin/documents/sets/new/page.tsx @@ -0,0 +1,79 @@ +"use client"; + +import { AdminPageTitle } from "@/components/admin/Title"; +import { BookmarkIcon } from "@/components/icons/icons"; +import { DocumentSetCreationForm } from "../DocumentSetCreationForm"; +import { + useConnectorCredentialIndexingStatus, + useUserGroups, +} from "@/lib/hooks"; +import { ThreeDotsLoader } from "@/components/Loading"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { Card } from "@tremor/react"; +import { BackButton } from "@/components/BackButton"; +import { ErrorCallout } from "@/components/ErrorCallout"; +import { useRouter } from "next/navigation"; +import { UserGroup } from "@/lib/types"; +import { refreshDocumentSets } from "../hooks"; + +function Main() { + const { popup, setPopup } = usePopup(); + const router = useRouter(); + + const { + data: ccPairs, + isLoading: isCCPairsLoading, + error: ccPairsError, + } = useConnectorCredentialIndexingStatus(); + + // EE only + const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups(); + + if (isCCPairsLoading || userGroupsIsLoading) { + return ; + } + + if (ccPairsError || !ccPairs) { + return ( + + ); + } + + return ( + <> + {popup} + + + { + refreshDocumentSets(); + router.push("/admin/documents/sets"); + }} + setPopup={setPopup} + /> + + + ); +} + +const Page = () => { + return ( +
+ + + } + title="New Document Set" + /> + +
+
+ ); +}; + +export default Page; diff --git a/web/src/app/admin/documents/sets/page.tsx b/web/src/app/admin/documents/sets/page.tsx index 777aea092..dc692451f 100644 --- a/web/src/app/admin/documents/sets/page.tsx +++ b/web/src/app/admin/documents/sets/page.tsx @@ -1,14 +1,8 @@ "use client"; -import { LoadingAnimation, ThreeDotsLoader } from "@/components/Loading"; +import { ThreeDotsLoader } from "@/components/Loading"; import { PageSelector } from "@/components/PageSelector"; -import { BasicTable } from "@/components/admin/connectors/BasicTable"; -import { - BookmarkIcon, - EditIcon, - InfoIcon, - TrashIcon, -} from "@/components/icons/icons"; +import { BookmarkIcon, InfoIcon } from "@/components/icons/icons"; import { Table, TableHead, @@ -24,7 +18,6 @@ import { useConnectorCredentialIndexingStatus } from "@/lib/hooks"; import { ConnectorIndexingStatus, DocumentSet } from "@/lib/types"; import { useState } from "react"; import { useDocumentSets } from "./hooks"; -import { DocumentSetCreationForm } from "./DocumentSetCreationForm"; import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle"; import { deleteDocumentSet } from "./lib"; import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup"; @@ -37,49 +30,31 @@ import { FiEdit, } from "react-icons/fi"; import { DeleteButton } from "@/components/DeleteButton"; +import Link from "next/link"; +import { useRouter } from "next/navigation"; const numToDisplay = 50; -const EditRow = ({ - documentSet, - ccPairs, - setPopup, - refreshDocumentSets, -}: { - documentSet: DocumentSet; - ccPairs: ConnectorIndexingStatus[]; - setPopup: (popupSpec: PopupSpec | null) => void; - refreshDocumentSets: () => void; -}) => { - const [isEditPopupOpen, setEditPopupOpen] = useState(false); +const EditRow = ({ documentSet }: { documentSet: DocumentSet }) => { + const router = useRouter(); + const [isSyncingTooltipOpen, setIsSyncingTooltipOpen] = useState(false); return (
- {isEditPopupOpen && ( - { - setEditPopupOpen(false); - refreshDocumentSets(); - }} - setPopup={setPopup} - existingDocumentSet={documentSet} - /> - )} {isSyncingTooltipOpen && ( -
+
Cannot update while syncing! Wait for the sync to finish, then try again.
)}
{ if (documentSet.is_up_to_date) { - setEditPopupOpen(true); + router.push(`/admin/documents/sets/${documentSet.id}`); } }} onMouseEnter={() => { @@ -109,7 +84,6 @@ interface DocumentFeedbackTableProps { const DocumentSetTable = ({ documentSets, - ccPairs, refresh, setPopup, }: DocumentFeedbackTableProps) => { @@ -146,12 +120,7 @@ const DocumentSetTable = ({
- +
@@ -237,7 +206,6 @@ const DocumentSetTable = ({ }; const Main = () => { - const [isOpen, setIsOpen] = useState(false); const { popup, setPopup } = usePopup(); const { data: documentSets, @@ -278,14 +246,11 @@ const Main = () => {
- + + +
{documentSets.length > 0 && ( @@ -299,17 +264,6 @@ const Main = () => { /> )} - - {isOpen && ( - { - refreshDocumentSets(); - setIsOpen(false); - }} - setPopup={setPopup} - /> - )}
); }; diff --git a/web/src/app/admin/keys/openai/page.tsx b/web/src/app/admin/keys/openai/page.tsx deleted file mode 100644 index 70497f719..000000000 --- a/web/src/app/admin/keys/openai/page.tsx +++ /dev/null @@ -1,79 +0,0 @@ -"use client"; - -import { LoadingAnimation } from "@/components/Loading"; -import { AdminPageTitle } from "@/components/admin/Title"; -import { KeyIcon, TrashIcon } from "@/components/icons/icons"; -import { ApiKeyForm } from "@/components/openai/ApiKeyForm"; -import { GEN_AI_API_KEY_URL } from "@/components/openai/constants"; -import { fetcher } from "@/lib/fetcher"; -import { Text, Title } from "@tremor/react"; -import { FiCpu } from "react-icons/fi"; -import useSWR, { mutate } from "swr"; - -const ExistingKeys = () => { - const { data, isLoading, error } = useSWR<{ api_key: string }>( - GEN_AI_API_KEY_URL, - fetcher - ); - - if (isLoading) { - return ; - } - - if (error) { - return
Error loading existing keys
; - } - - if (!data?.api_key) { - return null; - } - - return ( -
- Existing Key -
-

sk- ****...**{data?.api_key}

- -
-
- ); -}; - -const Page = () => { - return ( -
- } - /> - - - - Update Key - - Specify an OpenAI API key and click the "Submit" button. - -
- { - if (response.ok) { - mutate(GEN_AI_API_KEY_URL); - } - }} - /> -
-
- ); -}; - -export default Page; diff --git a/web/src/app/admin/models/embedding/CustomModelForm.tsx b/web/src/app/admin/models/embedding/CustomModelForm.tsx new file mode 100644 index 000000000..23676bc61 --- /dev/null +++ b/web/src/app/admin/models/embedding/CustomModelForm.tsx @@ -0,0 +1,116 @@ +import { + BooleanFormField, + TextFormField, +} from "@/components/admin/connectors/Field"; +import { Button, Divider, Text } from "@tremor/react"; +import { Form, Formik } from "formik"; + +import * as Yup from "yup"; +import { EmbeddingModelDescriptor } from "./embeddingModels"; + +export function CustomModelForm({ + onSubmit, +}: { + onSubmit: (model: EmbeddingModelDescriptor) => void; +}) { + return ( +
+ { + onSubmit({ ...values, model_dim: parseInt(values.model_dim) }); + }} + > + {({ isSubmitting, setFieldValue }) => ( +
+ + + { + const value = e.target.value; + // Allow only integer values + if (value === "" || /^[0-9]+$/.test(value)) { + setFieldValue("model_dim", value); + } + }} + /> + + + The prefix specified by the model creators which should be + prepended to queries before passing them to the model. + Many models do not have this, in which case this should be + left empty. + + } + placeholder="E.g. 'query: '" + autoCompleteDisabled={true} + /> + + + The prefix specified by the model creators which should be + prepended to passages before passing them to the model. + Many models do not have this, in which case this should be + left empty. + + } + placeholder="E.g. 'passage: '" + autoCompleteDisabled={true} + /> + + + +
+ +
+ + )} +
+
+ ); +} diff --git a/web/src/app/admin/models/embedding/ModelSelectionConfirmation.tsx b/web/src/app/admin/models/embedding/ModelSelectionConfirmation.tsx index 949c5d46d..7572ac2ce 100644 --- a/web/src/app/admin/models/embedding/ModelSelectionConfirmation.tsx +++ b/web/src/app/admin/models/embedding/ModelSelectionConfirmation.tsx @@ -1,18 +1,21 @@ import { Modal } from "@/components/Modal"; -import { Button, Text } from "@tremor/react"; +import { Button, Text, Callout } from "@tremor/react"; +import { EmbeddingModelDescriptor } from "./embeddingModels"; export function ModelSelectionConfirmaion({ selectedModel, + isCustom, onConfirm, }: { - selectedModel: string; + selectedModel: EmbeddingModelDescriptor; + isCustom: boolean; onConfirm: () => void; }) { return (
- You have selected: {selectedModel}. Are you sure you want to - update to this new embedding model? + You have selected: {selectedModel.model_name}. Are you sure you + want to update to this new embedding model? We will re-index all your documents in the background so you will be @@ -25,6 +28,18 @@ export function ModelSelectionConfirmaion({ normal. If you are self-hosting, we recommend that you allocate at least 16GB of RAM to Danswer during this process. + + {isCustom && ( + + We've detected that this is a custom-specified embedding model. + Since we have to download the model files before verifying the + configuration's correctness, we won't be able to let you + know if the configuration is valid until after we start + re-indexing your documents. If there is an issue, it will show up on + this page as an indexing error on this page after clicking Confirm. + + )} +
@@ -61,17 +69,19 @@ export function ModelSelector({ setSelectedModel, }: { modelOptions: FullEmbeddingModelDescriptor[]; - setSelectedModel: (modelName: string) => void; + setSelectedModel: (model: EmbeddingModelDescriptor) => void; }) { return ( -
- {modelOptions.map((modelOption) => ( - - ))} +
+
+ {modelOptions.map((modelOption) => ( + + ))} +
); } diff --git a/web/src/app/admin/models/embedding/ReindexingProgressTable.tsx b/web/src/app/admin/models/embedding/ReindexingProgressTable.tsx index 3b366c192..b1f91d24b 100644 --- a/web/src/app/admin/models/embedding/ReindexingProgressTable.tsx +++ b/web/src/app/admin/models/embedding/ReindexingProgressTable.tsx @@ -1,14 +1,14 @@ import { PageSelector } from "@/components/PageSelector"; -import { CCPairStatus, IndexAttemptStatus } from "@/components/Status"; -import { ConnectorIndexingStatus, ValidStatuses } from "@/lib/types"; +import { IndexAttemptStatus } from "@/components/Status"; +import { ConnectorIndexingStatus } from "@/lib/types"; import { - Button, Table, TableBody, TableCell, TableHead, TableHeaderCell, TableRow, + Text, } from "@tremor/react"; import Link from "next/link"; import { useState } from "react"; @@ -30,6 +30,7 @@ export function ReindexingProgressTable({ Connector Name Status Docs Re-Indexed + Error Message @@ -58,6 +59,13 @@ export function ReindexingProgressTable({ {reindexingProgress?.latest_index_attempt ?.total_docs_indexed || "-"} + +
+ + {reindexingProgress.error_msg || "-"} + +
+
); })} diff --git a/web/src/app/admin/models/embedding/embeddingModels.ts b/web/src/app/admin/models/embedding/embeddingModels.ts index 64ccfff95..7c5d09180 100644 --- a/web/src/app/admin/models/embedding/embeddingModels.ts +++ b/web/src/app/admin/models/embedding/embeddingModels.ts @@ -76,3 +76,12 @@ export function checkModelNameIsValid(modelName: string | undefined | null) { } return true; } + +export function fillOutEmeddingModelDescriptor( + embeddingModel: EmbeddingModelDescriptor | FullEmbeddingModelDescriptor +): FullEmbeddingModelDescriptor { + return { + ...embeddingModel, + description: "", + }; +} diff --git a/web/src/app/admin/models/embedding/page.tsx b/web/src/app/admin/models/embedding/page.tsx index 5f4cd1c93..ccda9af19 100644 --- a/web/src/app/admin/models/embedding/page.tsx +++ b/web/src/app/admin/models/embedding/page.tsx @@ -1,13 +1,10 @@ "use client"; -import { LoadingAnimation, ThreeDotsLoader } from "@/components/Loading"; +import { ThreeDotsLoader } from "@/components/Loading"; import { AdminPageTitle } from "@/components/admin/Title"; -import { KeyIcon, TrashIcon } from "@/components/icons/icons"; -import { ApiKeyForm } from "@/components/openai/ApiKeyForm"; -import { GEN_AI_API_KEY_URL } from "@/components/openai/constants"; -import { errorHandlingFetcher, fetcher } from "@/lib/fetcher"; -import { Button, Divider, Text, Title } from "@tremor/react"; -import { FiCpu, FiPackage } from "react-icons/fi"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import { Button, Card, Text, Title } from "@tremor/react"; +import { FiPackage } from "react-icons/fi"; import useSWR, { mutate } from "swr"; import { ModelOption, ModelSelector } from "./ModelSelector"; import { useState } from "react"; @@ -16,17 +13,18 @@ import { ReindexingProgressTable } from "./ReindexingProgressTable"; import { Modal } from "@/components/Modal"; import { AVAILABLE_MODELS, - EmbeddingModelResponse, + EmbeddingModelDescriptor, INVALID_OLD_MODEL, + fillOutEmeddingModelDescriptor, } from "./embeddingModels"; import { ErrorCallout } from "@/components/ErrorCallout"; import { Connector, ConnectorIndexingStatus } from "@/lib/types"; import Link from "next/link"; +import { CustomModelForm } from "./CustomModelForm"; function Main() { - const [tentativeNewEmbeddingModel, setTentativeNewEmbeddingModel] = useState< - string | null - >(null); + const [tentativeNewEmbeddingModel, setTentativeNewEmbeddingModel] = + useState(null); const [isCancelling, setIsCancelling] = useState(false); const [showAddConnectorPopup, setShowAddConnectorPopup] = useState(false); @@ -35,16 +33,16 @@ function Main() { data: currentEmeddingModel, isLoading: isLoadingCurrentModel, error: currentEmeddingModelError, - } = useSWR( + } = useSWR( "/api/secondary-index/get-current-embedding-model", errorHandlingFetcher, { refreshInterval: 5000 } // 5 seconds ); const { - data: futureEmeddingModel, + data: futureEmbeddingModel, isLoading: isLoadingFutureModel, error: futureEmeddingModelError, - } = useSWR( + } = useSWR( "/api/secondary-index/get-secondary-embedding-model", errorHandlingFetcher, { refreshInterval: 5000 } // 5 seconds @@ -63,24 +61,20 @@ function Main() { { refreshInterval: 5000 } // 5 seconds ); - const onSelect = async (modelName: string) => { + const onSelect = async (model: EmbeddingModelDescriptor) => { if (currentEmeddingModel?.model_name === INVALID_OLD_MODEL) { - await onConfirm(modelName); + await onConfirm(model); } else { - setTentativeNewEmbeddingModel(modelName); + setTentativeNewEmbeddingModel(model); } }; - const onConfirm = async (modelName: string) => { - const modelDescriptor = AVAILABLE_MODELS.find( - (model) => model.model_name === modelName - ); - + const onConfirm = async (model: EmbeddingModelDescriptor) => { const response = await fetch( "/api/secondary-index/set-new-embedding-model", { method: "POST", - body: JSON.stringify(modelDescriptor), + body: JSON.stringify(model), headers: { "Content-Type": "application/json", }, @@ -120,26 +114,33 @@ function Main() { if ( currentEmeddingModelError || !currentEmeddingModel || - futureEmeddingModelError || - !futureEmeddingModel + futureEmeddingModelError ) { return ; } const currentModelName = currentEmeddingModel.model_name; - const currentModel = AVAILABLE_MODELS.find( - (model) => model.model_name === currentModelName - ); + const currentModel = + AVAILABLE_MODELS.find((model) => model.model_name === currentModelName) || + fillOutEmeddingModelDescriptor(currentEmeddingModel); - const newModelSelection = AVAILABLE_MODELS.find( - (model) => model.model_name === futureEmeddingModel.model_name - ); + const newModelSelection = futureEmbeddingModel + ? AVAILABLE_MODELS.find( + (model) => model.model_name === futureEmbeddingModel.model_name + ) || fillOutEmeddingModelDescriptor(futureEmbeddingModel) + : null; return (
{tentativeNewEmbeddingModel && ( + model.model_name === tentativeNewEmbeddingModel.model_name + ) === undefined + } onConfirm={() => onConfirm(tentativeNewEmbeddingModel)} onCancel={() => setTentativeNewEmbeddingModel(null)} /> @@ -243,12 +244,49 @@ function Main() { )} + + Below are a curated selection of quality models that we recommend + you choose from. + + modelOption.model_name !== currentModelName )} setSelectedModel={onSelect} /> + + + Alternatively, (if you know what you're doing) you can + specify a{" "} + + SentenceTransformers + + -compatible model of your choice below. The rough list of + supported models can be found{" "} + + here + + . +
+ NOTE: not all models listed will work with Danswer, since + some have unique interfaces or special requirements. If in doubt, + reach out to the Danswer team. + + +
+ + + +
) : ( connectors && @@ -272,10 +310,10 @@ function Main() { The table below shows the re-indexing progress of all existing - connectors. Once all connectors have been re-indexed, the new - model will be used for all search queries. Until then, we will - use the old model so that no downtime is necessary during this - transition. + connectors. Once all connectors have been re-indexed + successfully, the new model will be used for all search + queries. Until then, we will use the old model so that no + downtime is necessary during this transition. {isLoadingOngoingReIndexingStatus ? ( diff --git a/web/src/app/admin/models/llm/ConfiguredLLMProviderDisplay.tsx b/web/src/app/admin/models/llm/ConfiguredLLMProviderDisplay.tsx new file mode 100644 index 000000000..aa8c0f972 --- /dev/null +++ b/web/src/app/admin/models/llm/ConfiguredLLMProviderDisplay.tsx @@ -0,0 +1,191 @@ +import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup"; +import { FullLLMProvider, WellKnownLLMProviderDescriptor } from "./interfaces"; +import { Modal } from "@/components/Modal"; +import { LLMProviderUpdateForm } from "./LLMProviderUpdateForm"; +import { CustomLLMProviderUpdateForm } from "./CustomLLMProviderUpdateForm"; +import { useState } from "react"; +import { LLM_PROVIDERS_ADMIN_URL } from "./constants"; +import { mutate } from "swr"; +import { Badge, Button } from "@tremor/react"; +import isEqual from "lodash/isEqual"; + +function LLMProviderUpdateModal({ + llmProviderDescriptor, + onClose, + existingLlmProvider, + shouldMarkAsDefault, + setPopup, +}: { + llmProviderDescriptor: WellKnownLLMProviderDescriptor | null | undefined; + onClose: () => void; + existingLlmProvider?: FullLLMProvider; + shouldMarkAsDefault?: boolean; + setPopup?: (popup: PopupSpec) => void; +}) { + const providerName = existingLlmProvider?.name + ? `"${existingLlmProvider.name}"` + : null || + llmProviderDescriptor?.display_name || + llmProviderDescriptor?.name || + "Custom LLM Provider"; + return ( + onClose()} + > +
+ {llmProviderDescriptor ? ( + + ) : ( + + )} +
+
+ ); +} + +function LLMProviderDisplay({ + llmProviderDescriptor, + existingLlmProvider, + shouldMarkAsDefault, +}: { + llmProviderDescriptor: WellKnownLLMProviderDescriptor | null | undefined; + existingLlmProvider: FullLLMProvider; + shouldMarkAsDefault?: boolean; +}) { + const [formIsVisible, setFormIsVisible] = useState(false); + const { popup, setPopup } = usePopup(); + + const providerName = + existingLlmProvider?.name || + llmProviderDescriptor?.display_name || + llmProviderDescriptor?.name; + return ( +
+ {popup} +
+
+
{providerName}
+
({existingLlmProvider.provider})
+ {!existingLlmProvider.is_default_provider && ( +
{ + const response = await fetch( + `${LLM_PROVIDERS_ADMIN_URL}/${existingLlmProvider.id}/default`, + { + method: "POST", + } + ); + if (!response.ok) { + const errorMsg = (await response.json()).detail; + setPopup({ + type: "error", + message: `Failed to set provider as default: ${errorMsg}`, + }); + return; + } + + mutate(LLM_PROVIDERS_ADMIN_URL); + setPopup({ + type: "success", + message: "Provider set as default successfully!", + }); + }} + > + Set as default +
+ )} +
+ + {existingLlmProvider && ( +
+ {existingLlmProvider.is_default_provider ? ( + + Default + + ) : ( + + Enabled + + )} +
+ )} + +
+ +
+
+ {formIsVisible && ( + setFormIsVisible(false)} + existingLlmProvider={existingLlmProvider} + shouldMarkAsDefault={shouldMarkAsDefault} + setPopup={setPopup} + /> + )} +
+ ); +} + +export function ConfiguredLLMProviderDisplay({ + existingLlmProviders, + llmProviderDescriptors, +}: { + existingLlmProviders: FullLLMProvider[]; + llmProviderDescriptors: WellKnownLLMProviderDescriptor[]; +}) { + existingLlmProviders = existingLlmProviders.sort((a, b) => { + if (a.is_default_provider && !b.is_default_provider) { + return -1; + } + if (!a.is_default_provider && b.is_default_provider) { + return 1; + } + return a.provider > b.provider ? 1 : -1; + }); + + return ( +
+ {existingLlmProviders.map((provider) => { + const defaultProviderDesciptor = llmProviderDescriptors.find( + (llmProviderDescriptors) => + llmProviderDescriptors.name === provider.provider + ); + + return ( + + ); + })} +
+ ); +} diff --git a/web/src/app/admin/models/llm/CustomLLMProviderUpdateForm.tsx b/web/src/app/admin/models/llm/CustomLLMProviderUpdateForm.tsx new file mode 100644 index 000000000..7ada70203 --- /dev/null +++ b/web/src/app/admin/models/llm/CustomLLMProviderUpdateForm.tsx @@ -0,0 +1,459 @@ +import { LoadingAnimation } from "@/components/Loading"; +import { Button, Divider, Text } from "@tremor/react"; +import { + ArrayHelpers, + ErrorMessage, + Field, + FieldArray, + Form, + Formik, +} from "formik"; +import { FiPlus, FiTrash, FiX } from "react-icons/fi"; +import { LLM_PROVIDERS_ADMIN_URL } from "./constants"; +import { + Label, + SubLabel, + TextArrayField, + TextFormField, +} from "@/components/admin/connectors/Field"; +import { useState } from "react"; +import { useSWRConfig } from "swr"; +import { FullLLMProvider } from "./interfaces"; +import { PopupSpec } from "@/components/admin/connectors/Popup"; +import * as Yup from "yup"; +import isEqual from "lodash/isEqual"; + +function customConfigProcessing(customConfigsList: [string, string][]) { + const customConfig: { [key: string]: string } = {}; + customConfigsList.forEach(([key, value]) => { + customConfig[key] = value; + }); + return customConfig; +} + +export function CustomLLMProviderUpdateForm({ + onClose, + existingLlmProvider, + shouldMarkAsDefault, + setPopup, +}: { + onClose: () => void; + existingLlmProvider?: FullLLMProvider; + shouldMarkAsDefault?: boolean; + setPopup?: (popup: PopupSpec) => void; +}) { + const { mutate } = useSWRConfig(); + + const [isTesting, setIsTesting] = useState(false); + const [testError, setTestError] = useState(""); + const [isTestSuccessful, setTestSuccessful] = useState( + existingLlmProvider ? true : false + ); + + // Define the initial values based on the provider's requirements + const initialValues = { + name: existingLlmProvider?.name ?? "", + provider: existingLlmProvider?.provider ?? "", + api_key: existingLlmProvider?.api_key ?? "", + api_base: existingLlmProvider?.api_base ?? "", + api_version: existingLlmProvider?.api_version ?? "", + default_model_name: existingLlmProvider?.default_model_name ?? null, + default_fast_model_name: + existingLlmProvider?.fast_default_model_name ?? null, + model_names: existingLlmProvider?.model_names ?? [], + custom_config_list: existingLlmProvider?.custom_config + ? Object.entries(existingLlmProvider.custom_config) + : [], + }; + + const [validatedConfig, setValidatedConfig] = useState( + existingLlmProvider ? initialValues : null + ); + + // Setup validation schema if required + const validationSchema = Yup.object({ + name: Yup.string().required("Display Name is required"), + provider: Yup.string().required("Provider Name is required"), + api_key: Yup.string(), + api_base: Yup.string(), + api_version: Yup.string(), + model_names: Yup.array(Yup.string().required("Model name is required")), + default_model_name: Yup.string().required("Model name is required"), + default_fast_model_name: Yup.string().nullable(), + custom_config_list: Yup.array(), + }); + + return ( + { + if (!isEqual(values, validatedConfig)) { + setTestSuccessful(false); + } + }} + onSubmit={async (values, { setSubmitting }) => { + setSubmitting(true); + + if (!isTestSuccessful) { + setSubmitting(false); + return; + } + + if (values.model_names.length === 0) { + const fullErrorMsg = "At least one model name is required"; + if (setPopup) { + setPopup({ + type: "error", + message: fullErrorMsg, + }); + } else { + alert(fullErrorMsg); + } + setSubmitting(false); + return; + } + + const response = await fetch(LLM_PROVIDERS_ADMIN_URL, { + method: "PUT", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + ...values, + custom_config: customConfigProcessing(values.custom_config_list), + }), + }); + + if (!response.ok) { + const errorMsg = (await response.json()).detail; + const fullErrorMsg = existingLlmProvider + ? `Failed to update provider: ${errorMsg}` + : `Failed to enable provider: ${errorMsg}`; + if (setPopup) { + setPopup({ + type: "error", + message: fullErrorMsg, + }); + } else { + alert(fullErrorMsg); + } + return; + } + + if (shouldMarkAsDefault) { + const newLlmProvider = (await response.json()) as FullLLMProvider; + const setDefaultResponse = await fetch( + `${LLM_PROVIDERS_ADMIN_URL}/${newLlmProvider.id}/default`, + { + method: "POST", + } + ); + if (!setDefaultResponse.ok) { + const errorMsg = (await setDefaultResponse.json()).detail; + const fullErrorMsg = `Failed to set provider as default: ${errorMsg}`; + if (setPopup) { + setPopup({ + type: "error", + message: fullErrorMsg, + }); + } else { + alert(fullErrorMsg); + } + return; + } + } + + mutate(LLM_PROVIDERS_ADMIN_URL); + onClose(); + + const successMsg = existingLlmProvider + ? "Provider updated successfully!" + : "Provider enabled successfully!"; + if (setPopup) { + setPopup({ + type: "success", + message: successMsg, + }); + } else { + alert(successMsg); + } + + setSubmitting(false); + }} + > + {({ values }) => ( +
+ + + + + + Should be one of the providers listed at{" "} + + https://docs.litellm.ai/docs/providers + + . + + } + placeholder="Name of the custom provider" + /> + + + + + Fill in the following as is needed. Refer to the LiteLLM + documentation for the model provider name specified above in order + to determine which fields are required. + + + + + + + + + + + <> +
+ Additional configurations needed by the model provider. Are + passed to litellm via environment variables. +
+ +
+ For example, when configuring the Cloudflare provider, you would + need to set `CLOUDFLARE_ACCOUNT_ID` as the key and your + Cloudflare account ID as the value. +
+ +
+ + ) => ( +
+ {values.custom_config_list.map((_, index) => { + return ( +
+
+
+
+ + + +
+ +
+ + + +
+
+
+ arrayHelpers.remove(index)} + /> +
+
+
+ ); + })} + + +
+ )} + /> + + + + + + + + + + + + + +
+ {/* NOTE: this is above the test button to make sure it's visible */} + {!isTestSuccessful && testError && ( + {testError} + )} + {isTestSuccessful && ( + + Test successful! LLM provider is ready to go. + + )} + +
+ {isTestSuccessful ? ( + + ) : ( + + )} + {existingLlmProvider && ( + + )} +
+
+ + )} +
+ ); +} diff --git a/web/src/app/admin/models/llm/LLMConfiguration.tsx b/web/src/app/admin/models/llm/LLMConfiguration.tsx new file mode 100644 index 000000000..f8fb75127 --- /dev/null +++ b/web/src/app/admin/models/llm/LLMConfiguration.tsx @@ -0,0 +1,185 @@ +"use client"; + +import { Modal } from "@/components/Modal"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import { useState } from "react"; +import useSWR from "swr"; +import { Button, Callout, Text, Title } from "@tremor/react"; +import { ThreeDotsLoader } from "@/components/Loading"; +import { FullLLMProvider, WellKnownLLMProviderDescriptor } from "./interfaces"; +import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup"; +import { LLMProviderUpdateForm } from "./LLMProviderUpdateForm"; +import { LLM_PROVIDERS_ADMIN_URL } from "./constants"; +import { CustomLLMProviderUpdateForm } from "./CustomLLMProviderUpdateForm"; +import { ConfiguredLLMProviderDisplay } from "./ConfiguredLLMProviderDisplay"; + +function LLMProviderUpdateModal({ + llmProviderDescriptor, + onClose, + existingLlmProvider, + shouldMarkAsDefault, + setPopup, +}: { + llmProviderDescriptor: WellKnownLLMProviderDescriptor | null; + onClose: () => void; + existingLlmProvider?: FullLLMProvider; + shouldMarkAsDefault?: boolean; + setPopup?: (popup: PopupSpec) => void; +}) { + const providerName = + llmProviderDescriptor?.display_name || + llmProviderDescriptor?.name || + existingLlmProvider?.name || + "Custom LLM Provider"; + return ( + onClose()}> +
+ {llmProviderDescriptor ? ( + + ) : ( + + )} +
+
+ ); +} + +function DefaultLLMProviderDisplay({ + llmProviderDescriptor, + shouldMarkAsDefault, +}: { + llmProviderDescriptor: WellKnownLLMProviderDescriptor | null; + shouldMarkAsDefault?: boolean; +}) { + const [formIsVisible, setFormIsVisible] = useState(false); + const { popup, setPopup } = usePopup(); + + const providerName = + llmProviderDescriptor?.display_name || llmProviderDescriptor?.name; + return ( +
+ {popup} +
+
+
{providerName}
+
+ +
+ +
+
+ {formIsVisible && ( + setFormIsVisible(false)} + shouldMarkAsDefault={shouldMarkAsDefault} + setPopup={setPopup} + /> + )} +
+ ); +} + +function AddCustomLLMProvider({ + existingLlmProviders, +}: { + existingLlmProviders: FullLLMProvider[]; +}) { + const [formIsVisible, setFormIsVisible] = useState(false); + + if (formIsVisible) { + return ( + setFormIsVisible(false)} + > +
+ setFormIsVisible(false)} + shouldMarkAsDefault={existingLlmProviders.length === 0} + /> +
+
+ ); + } + + return ( + + ); +} + +export function LLMConfiguration() { + const { data: llmProviderDescriptors } = useSWR< + WellKnownLLMProviderDescriptor[] + >("/api/admin/llm/built-in/options", errorHandlingFetcher); + const { data: existingLlmProviders } = useSWR( + LLM_PROVIDERS_ADMIN_URL, + errorHandlingFetcher + ); + + if (!llmProviderDescriptors || !existingLlmProviders) { + return ; + } + + return ( + <> + Enabled LLM Providers + + {existingLlmProviders.length > 0 ? ( + <> + + If multiple LLM providers are enabled, the default provider will be + used for all "Default" Assistants. For user-created + Assistants, you can select the LLM provider/model that best fits the + use case! + + + + ) : ( + + Please set one up below in order to start using Danswer! + + )} + + Add LLM Provider + + Add a new LLM provider by either selecting from one of the default + providers or by specifying your own custom LLM provider. + + +
+ {llmProviderDescriptors.map((llmProviderDescriptor) => { + return ( + + ); + })} +
+ +
+ +
+ + ); +} diff --git a/web/src/app/admin/models/llm/LLMProviderUpdateForm.tsx b/web/src/app/admin/models/llm/LLMProviderUpdateForm.tsx new file mode 100644 index 000000000..3c12fe660 --- /dev/null +++ b/web/src/app/admin/models/llm/LLMProviderUpdateForm.tsx @@ -0,0 +1,369 @@ +import { LoadingAnimation } from "@/components/Loading"; +import { Button, Divider, Text } from "@tremor/react"; +import { Form, Formik } from "formik"; +import { FiTrash } from "react-icons/fi"; +import { LLM_PROVIDERS_ADMIN_URL } from "./constants"; +import { + SelectorFormField, + TextFormField, +} from "@/components/admin/connectors/Field"; +import { useState } from "react"; +import { useSWRConfig } from "swr"; +import { FullLLMProvider, WellKnownLLMProviderDescriptor } from "./interfaces"; +import { PopupSpec } from "@/components/admin/connectors/Popup"; +import * as Yup from "yup"; +import isEqual from "lodash/isEqual"; + +export function LLMProviderUpdateForm({ + llmProviderDescriptor, + onClose, + existingLlmProvider, + shouldMarkAsDefault, + setPopup, +}: { + llmProviderDescriptor: WellKnownLLMProviderDescriptor; + onClose: () => void; + existingLlmProvider?: FullLLMProvider; + shouldMarkAsDefault?: boolean; + setPopup?: (popup: PopupSpec) => void; +}) { + const { mutate } = useSWRConfig(); + + const [isTesting, setIsTesting] = useState(false); + const [testError, setTestError] = useState(""); + const [isTestSuccessful, setTestSuccessful] = useState( + existingLlmProvider ? true : false + ); + + // Define the initial values based on the provider's requirements + const initialValues = { + name: existingLlmProvider?.name ?? "", + api_key: existingLlmProvider?.api_key ?? "", + api_base: existingLlmProvider?.api_base ?? "", + api_version: existingLlmProvider?.api_version ?? "", + default_model_name: + existingLlmProvider?.default_model_name ?? + (llmProviderDescriptor.default_model || + llmProviderDescriptor.llm_names[0]), + default_fast_model_name: + existingLlmProvider?.fast_default_model_name ?? + (llmProviderDescriptor.default_fast_model || null), + custom_config: + existingLlmProvider?.custom_config ?? + llmProviderDescriptor.custom_config_keys?.reduce( + (acc, customConfigKey) => { + acc[customConfigKey.name] = ""; + return acc; + }, + {} as { [key: string]: string } + ), + }; + + const [validatedConfig, setValidatedConfig] = useState( + existingLlmProvider ? initialValues : null + ); + + // Setup validation schema if required + const validationSchema = Yup.object({ + name: Yup.string().required("Display Name is required"), + api_key: llmProviderDescriptor.api_key_required + ? Yup.string().required("API Key is required") + : Yup.string(), + api_base: llmProviderDescriptor.api_base_required + ? Yup.string().required("API Base is required") + : Yup.string(), + api_version: llmProviderDescriptor.api_version_required + ? Yup.string().required("API Version is required") + : Yup.string(), + ...(llmProviderDescriptor.custom_config_keys + ? { + custom_config: Yup.object( + llmProviderDescriptor.custom_config_keys.reduce( + (acc, customConfigKey) => { + if (customConfigKey.is_required) { + acc[customConfigKey.name] = Yup.string().required( + `${customConfigKey.name} is required` + ); + } + return acc; + }, + {} as { [key: string]: Yup.StringSchema } + ) + ), + } + : {}), + default_model_name: Yup.string().required("Model name is required"), + default_fast_model_name: Yup.string().nullable(), + }); + + return ( + { + if (!isEqual(values, validatedConfig)) { + setTestSuccessful(false); + } + }} + onSubmit={async (values, { setSubmitting }) => { + setSubmitting(true); + + if (!isTestSuccessful) { + setSubmitting(false); + return; + } + + const response = await fetch(LLM_PROVIDERS_ADMIN_URL, { + method: "PUT", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + provider: llmProviderDescriptor.name, + ...values, + fast_default_model_name: + values.default_fast_model_name || values.default_model_name, + }), + }); + + if (!response.ok) { + const errorMsg = (await response.json()).detail; + const fullErrorMsg = existingLlmProvider + ? `Failed to update provider: ${errorMsg}` + : `Failed to enable provider: ${errorMsg}`; + if (setPopup) { + setPopup({ + type: "error", + message: fullErrorMsg, + }); + } else { + alert(fullErrorMsg); + } + return; + } + + if (shouldMarkAsDefault) { + const newLlmProvider = (await response.json()) as FullLLMProvider; + const setDefaultResponse = await fetch( + `${LLM_PROVIDERS_ADMIN_URL}/${newLlmProvider.id}/default`, + { + method: "POST", + } + ); + if (!setDefaultResponse.ok) { + const errorMsg = (await setDefaultResponse.json()).detail; + const fullErrorMsg = `Failed to set provider as default: ${errorMsg}`; + if (setPopup) { + setPopup({ + type: "error", + message: fullErrorMsg, + }); + } else { + alert(fullErrorMsg); + } + return; + } + } + + mutate(LLM_PROVIDERS_ADMIN_URL); + onClose(); + + const successMsg = existingLlmProvider + ? "Provider updated successfully!" + : "Provider enabled successfully!"; + if (setPopup) { + setPopup({ + type: "success", + message: successMsg, + }); + } else { + alert(successMsg); + } + + setSubmitting(false); + }} + > + {({ values }) => ( +
+ + + + + {llmProviderDescriptor.api_key_required && ( + + )} + + {llmProviderDescriptor.api_base_required && ( + + )} + + {llmProviderDescriptor.api_version_required && ( + + )} + + {llmProviderDescriptor.custom_config_keys?.map((customConfigKey) => ( +
+ +
+ ))} + + + + {llmProviderDescriptor.llm_names.length > 0 ? ( + ({ + name, + value: name, + }))} + maxHeight="max-h-56" + /> + ) : ( + + )} + + {llmProviderDescriptor.llm_names.length > 0 ? ( + ({ + name, + value: name, + }))} + includeDefault + maxHeight="max-h-56" + /> + ) : ( + + )} + + + +
+ {/* NOTE: this is above the test button to make sure it's visible */} + {!isTestSuccessful && testError && ( + {testError} + )} + {isTestSuccessful && ( + + Test successful! LLM provider is ready to go. + + )} + +
+ {isTestSuccessful ? ( + + ) : ( + + )} + {existingLlmProvider && ( + + )} +
+
+ + )} +
+ ); +} diff --git a/web/src/app/admin/models/llm/constants.ts b/web/src/app/admin/models/llm/constants.ts new file mode 100644 index 000000000..2db434ee9 --- /dev/null +++ b/web/src/app/admin/models/llm/constants.ts @@ -0,0 +1 @@ +export const LLM_PROVIDERS_ADMIN_URL = "/api/admin/llm/provider"; diff --git a/web/src/app/admin/models/llm/interfaces.ts b/web/src/app/admin/models/llm/interfaces.ts new file mode 100644 index 000000000..d78ce605e --- /dev/null +++ b/web/src/app/admin/models/llm/interfaces.ts @@ -0,0 +1,46 @@ +export interface CustomConfigKey { + name: string; + description: string | null; + is_required: boolean; + is_secret: boolean; +} + +export interface WellKnownLLMProviderDescriptor { + name: string; + display_name: string; + + api_key_required: boolean; + api_base_required: boolean; + api_version_required: boolean; + custom_config_keys: CustomConfigKey[] | null; + + llm_names: string[]; + default_model: string | null; + default_fast_model: string | null; +} + +export interface LLMProvider { + name: string; + provider: string; + api_key: string | null; + api_base: string | null; + api_version: string | null; + custom_config: { [key: string]: string } | null; + default_model_name: string; + fast_default_model_name: string | null; +} + +export interface FullLLMProvider extends LLMProvider { + id: number; + is_default_provider: boolean | null; + model_names: string[]; +} + +export interface LLMProviderDescriptor { + name: string; + provider: string; + model_names: string[]; + default_model_name: string; + fast_default_model_name: string | null; + is_default_provider: boolean | null; +} diff --git a/web/src/app/admin/models/llm/page.tsx b/web/src/app/admin/models/llm/page.tsx new file mode 100644 index 000000000..330718ee6 --- /dev/null +++ b/web/src/app/admin/models/llm/page.tsx @@ -0,0 +1,189 @@ +"use client"; + +import { Form, Formik } from "formik"; +import { useEffect, useState } from "react"; +import { AdminPageTitle } from "@/components/admin/Title"; +import { + BooleanFormField, + SectionHeader, + TextFormField, +} from "@/components/admin/connectors/Field"; +import { Popup } from "@/components/admin/connectors/Popup"; +import { Button, Divider, Text } from "@tremor/react"; +import { FiCpu } from "react-icons/fi"; +import { LLMConfiguration } from "./LLMConfiguration"; + +const LLMOptions = () => { + const [popup, setPopup] = useState<{ + message: string; + type: "success" | "error"; + } | null>(null); + + const [tokenBudgetGloballyEnabled, setTokenBudgetGloballyEnabled] = + useState(false); + const [initialValues, setInitialValues] = useState({ + enable_token_budget: false, + token_budget: "", + token_budget_time_period: "", + }); + + const fetchConfig = async () => { + const response = await fetch("/api/manage/admin/token-budget-settings"); + if (response.ok) { + const config = await response.json(); + // Assuming the config object directly matches the structure needed for initialValues + setInitialValues({ + enable_token_budget: config.enable_token_budget || false, + token_budget: config.token_budget || "", + token_budget_time_period: config.token_budget_time_period || "", + }); + setTokenBudgetGloballyEnabled(true); + } else { + // Handle error or provide fallback values + setPopup({ + message: "Failed to load current LLM options.", + type: "error", + }); + } + }; + + // Fetch current config when the component mounts + useEffect(() => { + fetchConfig(); + }, []); + + if (!tokenBudgetGloballyEnabled) { + return null; + } + + return ( + <> + {popup && } + { + const response = await fetch( + "/api/manage/admin/token-budget-settings", + { + method: "PUT", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(values), + } + ); + if (response.ok) { + setPopup({ + message: "Updated LLM Options", + type: "success", + }); + await fetchConfig(); + } else { + const body = await response.json(); + if (body.detail) { + setPopup({ message: body.detail, type: "error" }); + } else { + setPopup({ + message: "Unable to update LLM options.", + type: "error", + }); + } + setTimeout(() => { + setPopup(null); + }, 4000); + } + }} + > + {({ isSubmitting, values, setFieldValue }) => { + return ( +
+ + <> + Token Budget + + Set a maximum token use per time period. If the token budget + is exceeded, Danswer will not be able to respond to queries + until the next time period. + +
+ { + setFieldValue("enable_token_budget", e.target.checked); + }} + /> + {values.enable_token_budget && ( + <> + + How many tokens (in thousands) can be used per time + period? If unspecified, no limit will be set. +
+ } + onChange={(e) => { + const value = e.target.value; + // Allow only integer values + if (value === "" || /^[0-9]+$/.test(value)) { + setFieldValue("token_budget", value); + } + }} + /> + + Specify the length of the time period, in hours, over + which the token budget will be applied. +
+ } + onChange={(e) => { + const value = e.target.value; + // Allow only integer values + if (value === "" || /^[0-9]+$/.test(value)) { + setFieldValue("token_budget_time_period", value); + } + }} + /> + + )} + +
+ +
+ + ); + }} + + + ); +}; + +const Page = () => { + return ( +
+ } + /> + + + + +
+ ); +}; + +export default Page; diff --git a/web/src/app/admin/personas/PersonaEditor.tsx b/web/src/app/admin/personas/PersonaEditor.tsx deleted file mode 100644 index 42106108c..000000000 --- a/web/src/app/admin/personas/PersonaEditor.tsx +++ /dev/null @@ -1,458 +0,0 @@ -"use client"; - -import { DocumentSet } from "@/lib/types"; -import { Button, Divider, Text } from "@tremor/react"; -import { ArrayHelpers, FieldArray, Form, Formik } from "formik"; - -import * as Yup from "yup"; -import { buildFinalPrompt, createPersona, updatePersona } from "./lib"; -import { useRouter } from "next/navigation"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { Persona } from "./interfaces"; -import Link from "next/link"; -import { useEffect, useState } from "react"; -import { - BooleanFormField, - SelectorFormField, - TextFormField, -} from "@/components/admin/connectors/Field"; - -function SectionHeader({ children }: { children: string | JSX.Element }) { - return
{children}
; -} - -function Label({ children }: { children: string | JSX.Element }) { - return ( -
{children}
- ); -} - -function SubLabel({ children }: { children: string | JSX.Element }) { - return
{children}
; -} - -export function PersonaEditor({ - existingPersona, - documentSets, - llmOverrideOptions, - defaultLLM, -}: { - existingPersona?: Persona | null; - documentSets: DocumentSet[]; - llmOverrideOptions: string[]; - defaultLLM: string; -}) { - const router = useRouter(); - const { popup, setPopup } = usePopup(); - - const [finalPrompt, setFinalPrompt] = useState(""); - const [finalPromptError, setFinalPromptError] = useState(""); - - const triggerFinalPromptUpdate = async ( - systemPrompt: string, - taskPrompt: string, - retrievalDisabled: boolean - ) => { - const response = await buildFinalPrompt( - systemPrompt, - taskPrompt, - retrievalDisabled - ); - if (response.ok) { - setFinalPrompt((await response.json()).final_prompt_template); - } - }; - - const isUpdate = existingPersona !== undefined && existingPersona !== null; - const existingPrompt = existingPersona?.prompts[0] ?? null; - - useEffect(() => { - if (isUpdate && existingPrompt) { - triggerFinalPromptUpdate( - existingPrompt.system_prompt, - existingPrompt.task_prompt, - existingPersona.num_chunks === 0 - ); - } - }, []); - - return ( -
- {popup} - documentSet.id - ) ?? ([] as number[]), - num_chunks: existingPersona?.num_chunks ?? null, - include_citations: - existingPersona?.prompts[0]?.include_citations ?? true, - llm_relevance_filter: existingPersona?.llm_relevance_filter ?? false, - llm_model_version_override: - existingPersona?.llm_model_version_override ?? null, - }} - validationSchema={Yup.object() - .shape({ - name: Yup.string().required("Must give the Persona a name!"), - description: Yup.string().required( - "Must give the Persona a description!" - ), - system_prompt: Yup.string(), - task_prompt: Yup.string(), - disable_retrieval: Yup.boolean().required(), - document_set_ids: Yup.array().of(Yup.number()), - num_chunks: Yup.number().max(20).nullable(), - include_citations: Yup.boolean().required(), - llm_relevance_filter: Yup.boolean().required(), - llm_model_version_override: Yup.string().nullable(), - }) - .test( - "system-prompt-or-task-prompt", - "Must provide at least one of System Prompt or Task Prompt", - (values) => { - const systemPromptSpecified = values.system_prompt - ? values.system_prompt.length > 0 - : false; - const taskPromptSpecified = values.task_prompt - ? values.task_prompt.length > 0 - : false; - if (systemPromptSpecified || taskPromptSpecified) { - setFinalPromptError(""); - return true; - } // Return true if at least one field has a value - - setFinalPromptError( - "Must provide at least one of System Prompt or Task Prompt" - ); - } - )} - onSubmit={async (values, formikHelpers) => { - if (finalPromptError) { - setPopup({ - type: "error", - message: "Cannot submit while there are errors in the form!", - }); - return; - } - - formikHelpers.setSubmitting(true); - - // if disable_retrieval is set, set num_chunks to 0 - // to tell the backend to not fetch any documents - const numChunks = values.disable_retrieval - ? 0 - : values.num_chunks || 10; - - let promptResponse; - let personaResponse; - if (isUpdate) { - [promptResponse, personaResponse] = await updatePersona({ - id: existingPersona.id, - existingPromptId: existingPrompt?.id, - ...values, - num_chunks: numChunks, - }); - } else { - [promptResponse, personaResponse] = await createPersona({ - ...values, - num_chunks: numChunks, - }); - } - - let error = null; - if (!promptResponse.ok) { - error = await promptResponse.text(); - } - if (personaResponse && !personaResponse.ok) { - error = await personaResponse.text(); - } - - if (error) { - setPopup({ - type: "error", - message: `Failed to create Persona - ${error}`, - }); - formikHelpers.setSubmitting(false); - } else { - router.push(`/admin/personas?u=${Date.now()}`); - } - }} - > - {({ isSubmitting, values, setFieldValue }) => ( -
-
- Who am I? - - - - - - - - Customize my response style - - { - setFieldValue("system_prompt", e.target.value); - triggerFinalPromptUpdate( - e.target.value, - values.task_prompt, - values.disable_retrieval - ); - }} - error={finalPromptError} - /> - - { - setFieldValue("task_prompt", e.target.value); - triggerFinalPromptUpdate( - values.system_prompt, - e.target.value, - values.disable_retrieval - ); - }} - error={finalPromptError} - /> - - {!values.disable_retrieval && ( - - )} - - { - setFieldValue("disable_retrieval", e.target.checked); - triggerFinalPromptUpdate( - values.system_prompt, - values.task_prompt, - e.target.checked - ); - }} - /> - - - - {finalPrompt ? ( -
-                  {finalPrompt}
-                
- ) : ( - "-" - )} - - - - {!values.disable_retrieval && ( - <> - - What data should I have access to? - - - ( -
-
- - <> - Select which{" "} - - Document Sets - {" "} - that this Persona should search through. If none - are specified, the Persona will search through all - available documents in order to try and response - to queries. - - -
-
- {documentSets.map((documentSet) => { - const ind = values.document_set_ids.indexOf( - documentSet.id - ); - let isSelected = ind !== -1; - return ( -
{ - if (isSelected) { - arrayHelpers.remove(ind); - } else { - arrayHelpers.push(documentSet.id); - } - }} - > -
- {documentSet.name} -
-
- ); - })} -
-
- )} - /> - - - - )} - - {llmOverrideOptions.length > 0 && defaultLLM && ( - <> - [Advanced] Model Selection - - - Pick which LLM to use for this Persona. If left as Default, - will use {defaultLLM}. -
-
- For more information on the different LLMs, checkout the{" "} - - OpenAI docs - - . -
- -
- { - return { - name: llmOption, - value: llmOption, - }; - })} - includeDefault={true} - /> -
- - )} - - - - {!values.disable_retrieval && ( - <> - - [Advanced] Retrieval Customization - - - - How many chunks should we feed into the LLM when - generating the final response? Each chunk is ~400 words - long. If you are using gpt-3.5-turbo or other similar - models, setting this to a value greater than 5 will - result in errors at query time due to the model's - input length limit. -
-
- If unspecified, will use 10 chunks. -
- } - onChange={(e) => { - const value = e.target.value; - // Allow only integer values - if (value === "" || /^[0-9]+$/.test(value)) { - setFieldValue("num_chunks", value); - } - }} - /> - - - - - - )} - -
- -
-
- - )} - -
- ); -} diff --git a/web/src/app/admin/personas/[personaId]/page.tsx b/web/src/app/admin/personas/[personaId]/page.tsx deleted file mode 100644 index 0b5213192..000000000 --- a/web/src/app/admin/personas/[personaId]/page.tsx +++ /dev/null @@ -1,95 +0,0 @@ -import { ErrorCallout } from "@/components/ErrorCallout"; -import { fetchSS } from "@/lib/utilsSS"; -import { Persona } from "../interfaces"; -import { PersonaEditor } from "../PersonaEditor"; -import { DocumentSet } from "@/lib/types"; -import { BackButton } from "@/components/BackButton"; -import { Card, Title } from "@tremor/react"; -import { DeletePersonaButton } from "./DeletePersonaButton"; -import { InstantSSRAutoRefresh } from "@/components/SSRAutoRefresh"; - -export default async function Page({ - params, -}: { - params: { personaId: string }; -}) { - const [ - personaResponse, - documentSetsResponse, - llmOverridesResponse, - defaultLLMResponse, - ] = await Promise.all([ - fetchSS(`/persona/${params.personaId}`), - fetchSS("/manage/document-set"), - fetchSS("/admin/persona/utils/list-available-models"), - fetchSS("/admin/persona/utils/default-model"), - ]); - - if (!personaResponse.ok) { - return ( - - ); - } - - if (!documentSetsResponse.ok) { - return ( - - ); - } - - if (!llmOverridesResponse.ok) { - return ( - - ); - } - - if (!defaultLLMResponse.ok) { - return ( - - ); - } - - const documentSets = (await documentSetsResponse.json()) as DocumentSet[]; - const persona = (await personaResponse.json()) as Persona; - const llmOverrideOptions = (await llmOverridesResponse.json()) as string[]; - const defaultLLM = (await defaultLLMResponse.json()) as string; - - return ( -
- - - -
-

Edit Persona

-
- - - - - -
- Delete Persona -
- -
-
-
- ); -} diff --git a/web/src/app/admin/personas/new/page.tsx b/web/src/app/admin/personas/new/page.tsx deleted file mode 100644 index 6fd675ae6..000000000 --- a/web/src/app/admin/personas/new/page.tsx +++ /dev/null @@ -1,66 +0,0 @@ -import { PersonaEditor } from "../PersonaEditor"; -import { fetchSS } from "@/lib/utilsSS"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { DocumentSet } from "@/lib/types"; -import { RobotIcon } from "@/components/icons/icons"; -import { BackButton } from "@/components/BackButton"; -import { Card } from "@tremor/react"; -import { AdminPageTitle } from "@/components/admin/Title"; - -export default async function Page() { - const [documentSetsResponse, llmOverridesResponse, defaultLLMResponse] = - await Promise.all([ - fetchSS("/manage/document-set"), - fetchSS("/admin/persona/utils/list-available-models"), - fetchSS("/admin/persona/utils/default-model"), - ]); - - if (!documentSetsResponse.ok) { - return ( - - ); - } - const documentSets = (await documentSetsResponse.json()) as DocumentSet[]; - - if (!llmOverridesResponse.ok) { - return ( - - ); - } - const llmOverrideOptions = (await llmOverridesResponse.json()) as string[]; - - if (!defaultLLMResponse.ok) { - return ( - - ); - } - const defaultLLM = (await defaultLLMResponse.json()) as string; - - return ( -
- - - } - /> - - - - -
- ); -} diff --git a/web/src/app/admin/settings/SettingsForm.tsx b/web/src/app/admin/settings/SettingsForm.tsx new file mode 100644 index 000000000..761eef060 --- /dev/null +++ b/web/src/app/admin/settings/SettingsForm.tsx @@ -0,0 +1,149 @@ +"use client"; + +import { Label, SubLabel } from "@/components/admin/connectors/Field"; +import { Title } from "@tremor/react"; +import { Settings } from "./interfaces"; +import { useRouter } from "next/navigation"; +import { DefaultDropdown, Option } from "@/components/Dropdown"; + +function Checkbox({ + label, + sublabel, + checked, + onChange, +}: { + label: string; + sublabel: string; + checked: boolean; + onChange: (e: React.ChangeEvent) => void; +}) { + return ( + + ); +} + +function Selector({ + label, + subtext, + options, + selected, + onSelect, +}: { + label: string; + subtext: string; + options: Option[]; + selected: string; + onSelect: (value: string | number | null) => void; +}) { + return ( +
+ {label && } + {subtext && {subtext}} + +
+ +
+
+ ); +} + +export function SettingsForm({ settings }: { settings: Settings }) { + const router = useRouter(); + + async function updateSettingField( + updateRequests: { fieldName: keyof Settings; newValue: any }[] + ) { + const newValues: any = {}; + updateRequests.forEach(({ fieldName, newValue }) => { + newValues[fieldName] = newValue; + }); + + const response = await fetch("/api/admin/settings", { + method: "PUT", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ + ...settings, + ...newValues, + }), + }); + if (response.ok) { + router.refresh(); + } else { + const errorMsg = (await response.json()).detail; + alert(`Failed to update settings. ${errorMsg}`); + } + } + + return ( +
+ Page Visibility + + { + const updates: any[] = [ + { fieldName: "search_page_enabled", newValue: e.target.checked }, + ]; + if (!e.target.checked && settings.default_page === "search") { + updates.push({ fieldName: "default_page", newValue: "chat" }); + } + updateSettingField(updates); + }} + /> + + { + const updates: any[] = [ + { fieldName: "chat_page_enabled", newValue: e.target.checked }, + ]; + if (!e.target.checked && settings.default_page === "chat") { + updates.push({ fieldName: "default_page", newValue: "search" }); + } + updateSettingField(updates); + }} + /> + + { + value && + updateSettingField([ + { fieldName: "default_page", newValue: value }, + ]); + }} + /> +
+ ); +} diff --git a/web/src/app/admin/settings/interfaces.ts b/web/src/app/admin/settings/interfaces.ts new file mode 100644 index 000000000..3e997a562 --- /dev/null +++ b/web/src/app/admin/settings/interfaces.ts @@ -0,0 +1,21 @@ +export interface Settings { + chat_page_enabled: boolean; + search_page_enabled: boolean; + default_page: "search" | "chat"; +} + +export interface EnterpriseSettings { + application_name: string | null; + use_custom_logo: boolean; + + // custom Chat components + custom_header_content: string | null; + custom_popup_header: string | null; + custom_popup_content: string | null; +} + +export interface CombinedSettings { + settings: Settings; + enterpriseSettings: EnterpriseSettings | null; + customAnalyticsScript: string | null; +} diff --git a/web/src/app/admin/settings/page.tsx b/web/src/app/admin/settings/page.tsx new file mode 100644 index 000000000..1a30495b5 --- /dev/null +++ b/web/src/app/admin/settings/page.tsx @@ -0,0 +1,33 @@ +import { AdminPageTitle } from "@/components/admin/Title"; +import { FiSettings } from "react-icons/fi"; +import { Settings } from "./interfaces"; +import { fetchSS } from "@/lib/utilsSS"; +import { SettingsForm } from "./SettingsForm"; +import { Callout, Text } from "@tremor/react"; + +export default async function Page() { + const response = await fetchSS("/settings"); + + if (!response.ok) { + const errorMsg = await response.text(); + return {errorMsg}; + } + + const settings = (await response.json()) as Settings; + + return ( +
+ } + /> + + + Manage general Danswer settings applicable to all users in the + workspace. + + + +
+ ); +} diff --git a/web/src/app/admin/users/page.tsx b/web/src/app/admin/users/page.tsx index 006e9c892..fb2695ef1 100644 --- a/web/src/app/admin/users/page.tsx +++ b/web/src/app/admin/users/page.tsx @@ -45,59 +45,90 @@ const UsersTable = () => { Role
-
Promote
+
Actions
- {users.map((user) => { - return ( - - {user.email} - - {user.role === "admin" ? "Admin" : "User"} - - -
-
- -
-
-
-
- ); - })} + ); + if (!res.ok) { + const errorMsg = await res.text(); + setPopup({ + message: `Unable to promote user - ${errorMsg}`, + type: "error", + }); + } else { + mutate("/api/manage/users"); + setPopup({ + message: "User promoted to admin user!", + type: "success", + }); + } + }} + > + Promote to Admin User + + )} + {user.role === "admin" && ( + + )} +
+ + + ))} diff --git a/web/src/app/assistants/AssistantSharedStatus.tsx b/web/src/app/assistants/AssistantSharedStatus.tsx new file mode 100644 index 000000000..c5127c87e --- /dev/null +++ b/web/src/app/assistants/AssistantSharedStatus.tsx @@ -0,0 +1,62 @@ +import { User } from "@/lib/types"; +import { Persona } from "../admin/assistants/interfaces"; +import { checkUserOwnsAssistant } from "@/lib/assistants/checkOwnership"; +import { FiLock, FiUnlock } from "react-icons/fi"; + +export function AssistantSharedStatusDisplay({ + assistant, + user, +}: { + assistant: Persona; + user: User | null; +}) { + const isOwnedByUser = checkUserOwnsAssistant(user, assistant); + + const assistantSharedUsersWithoutOwner = assistant.users?.filter( + (u) => u.id !== assistant.owner?.id + ); + + if (assistant.is_public) { + return ( +
+ + Public +
+ ); + } + + if (assistantSharedUsersWithoutOwner.length > 0) { + return ( +
+ + {isOwnedByUser ? ( + `Shared with: ${ + assistantSharedUsersWithoutOwner.length <= 4 + ? assistantSharedUsersWithoutOwner.map((u) => u.email).join(", ") + : `${assistantSharedUsersWithoutOwner + .slice(0, 4) + .map((u) => u.email) + .join(", ")} and ${assistant.users.length - 4} others...` + }` + ) : ( +
+ {assistant.owner ? ( +
+ Shared with you by {assistant.owner?.email} +
+ ) : ( + "Shared with you" + )} +
+ )} +
+ ); + } + + return ( +
+ + Private +
+ ); +} diff --git a/web/src/app/assistants/AssistantsPageTitle.tsx b/web/src/app/assistants/AssistantsPageTitle.tsx new file mode 100644 index 000000000..5bcdea7ea --- /dev/null +++ b/web/src/app/assistants/AssistantsPageTitle.tsx @@ -0,0 +1,18 @@ +export function AssistantsPageTitle({ + children, +}: { + children: JSX.Element | string; +}) { + return ( +

+ {children} +

+ ); +} diff --git a/web/src/app/assistants/LargeBackButton.tsx b/web/src/app/assistants/LargeBackButton.tsx new file mode 100644 index 000000000..3b52f36c5 --- /dev/null +++ b/web/src/app/assistants/LargeBackButton.tsx @@ -0,0 +1,16 @@ +"use client"; + +import { useRouter } from "next/navigation"; +import { FiChevronLeft } from "react-icons/fi"; + +export function LargeBackButton() { + const router = useRouter(); + return ( +
router.back()}> + +
+ ); +} diff --git a/web/src/app/assistants/NavigationButton.tsx b/web/src/app/assistants/NavigationButton.tsx new file mode 100644 index 000000000..223c8fbbd --- /dev/null +++ b/web/src/app/assistants/NavigationButton.tsx @@ -0,0 +1,25 @@ +export function NavigationButton({ + children, +}: { + children: JSX.Element | string; +}) { + return ( +
+ {children} +
+ ); +} diff --git a/web/src/app/assistants/ToolsDisplay.tsx b/web/src/app/assistants/ToolsDisplay.tsx new file mode 100644 index 000000000..c30025dc6 --- /dev/null +++ b/web/src/app/assistants/ToolsDisplay.tsx @@ -0,0 +1,31 @@ +import { Bubble } from "@/components/Bubble"; +import { ToolSnapshot } from "@/lib/tools/interfaces"; +import { FiImage, FiSearch } from "react-icons/fi"; + +export function ToolsDisplay({ tools }: { tools: ToolSnapshot[] }) { + return ( +
+ {tools.map((tool) => { + let toolName = tool.name; + let toolIcon = null; + + if (tool.name === "SearchTool") { + toolName = "Search"; + toolIcon = ; + } else if (tool.name === "ImageGenerationTool") { + toolName = "Image Generation"; + toolIcon = ; + } + + return ( + +
+ {toolIcon} + {toolName} +
+
+ ); + })} +
+ ); +} diff --git a/web/src/app/assistants/edit/[id]/page.tsx b/web/src/app/assistants/edit/[id]/page.tsx new file mode 100644 index 000000000..2b56bb00e --- /dev/null +++ b/web/src/app/assistants/edit/[id]/page.tsx @@ -0,0 +1,65 @@ +import { ErrorCallout } from "@/components/ErrorCallout"; +import { Card, Text, Title } from "@tremor/react"; +import { HeaderWrapper } from "@/components/header/HeaderWrapper"; +import { AssistantEditor } from "@/app/admin/assistants/AssistantEditor"; +import { SuccessfulPersonaUpdateRedirectType } from "@/app/admin/assistants/enums"; +import { fetchAssistantEditorInfoSS } from "@/lib/assistants/fetchPersonaEditorInfoSS"; +import { DeletePersonaButton } from "@/app/admin/assistants/[id]/DeletePersonaButton"; +import { LargeBackButton } from "../../LargeBackButton"; + +export default async function Page({ params }: { params: { id: string } }) { + const [values, error] = await fetchAssistantEditorInfoSS(params.id); + + let body; + if (!values) { + body = ( +
+ +
+ ); + } else { + body = ( +
+
+
+ + + + + Delete Assistant + + Click the button below to permanently delete this assistant. + +
+ +
+
+
+
+ ); + } + + return ( +
+ +
+
+ +

+ Edit Assistant +

+
+
+
+ + {body} +
+ ); +} diff --git a/web/src/app/assistants/gallery/AssistantsGallery.tsx b/web/src/app/assistants/gallery/AssistantsGallery.tsx new file mode 100644 index 000000000..4b9eaff71 --- /dev/null +++ b/web/src/app/assistants/gallery/AssistantsGallery.tsx @@ -0,0 +1,205 @@ +"use client"; + +import { Persona } from "@/app/admin/assistants/interfaces"; +import { AssistantIcon } from "@/components/assistants/AssistantIcon"; +import { User } from "@/lib/types"; +import { Button } from "@tremor/react"; +import Link from "next/link"; +import { useState } from "react"; +import { FiMinus, FiPlus, FiX } from "react-icons/fi"; +import { NavigationButton } from "../NavigationButton"; +import { AssistantsPageTitle } from "../AssistantsPageTitle"; +import { + addAssistantToList, + removeAssistantFromList, +} from "@/lib/assistants/updateAssistantPreferences"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { useRouter } from "next/navigation"; +import { ToolsDisplay } from "../ToolsDisplay"; + +export function AssistantsGallery({ + assistants, + user, +}: { + assistants: Persona[]; + user: User | null; +}) { + function filterAssistants(assistants: Persona[], query: string): Persona[] { + return assistants.filter( + (assistant) => + assistant.name.toLowerCase().includes(query.toLowerCase()) || + assistant.description.toLowerCase().includes(query.toLowerCase()) + ); + } + + const router = useRouter(); + + const [searchQuery, setSearchQuery] = useState(""); + const { popup, setPopup } = usePopup(); + + const allAssistantIds = assistants.map((assistant) => assistant.id); + const filteredAssistants = filterAssistants(assistants, searchQuery); + + return ( + <> + {popup} +
+ Assistant Gallery +
+ + View Your Assistants + +
+ +

+ Discover and create custom assistants that combine instructions, extra + knowledge, and any combination of tools. +

+ +
+ setSearchQuery(e.target.value)} + className=" + w-full + p-2 + border + border-gray-300 + rounded + focus:outline-none + focus:ring-2 + focus:ring-blue-500 + " + /> +
+
+ {filteredAssistants.map((assistant) => ( +
+
+ +

+ {assistant.name} +

+ {user && ( +
+ {!user.preferences?.chosen_assistants || + user.preferences?.chosen_assistants?.includes( + assistant.id + ) ? ( + + ) : ( + + )} +
+ )} +
+ {assistant.tools.length > 0 && ( + + )} +

{assistant.description}

+

+ Author: {assistant.owner?.email || "Danswer"} +

+
+ ))} +
+
+ + ); +} diff --git a/web/src/app/assistants/gallery/page.tsx b/web/src/app/assistants/gallery/page.tsx new file mode 100644 index 000000000..c4b9f46f6 --- /dev/null +++ b/web/src/app/assistants/gallery/page.tsx @@ -0,0 +1,81 @@ +import { ChatSidebar } from "@/app/chat/sessionSidebar/ChatSidebar"; +import { InstantSSRAutoRefresh } from "@/components/SSRAutoRefresh"; +import { UserDropdown } from "@/components/UserDropdown"; +import { ChatProvider } from "@/components/context/ChatContext"; +import { WelcomeModal } from "@/components/initialSetup/welcome/WelcomeModalWrapper"; +import { fetchChatData } from "@/lib/chat/fetchChatData"; +import { unstable_noStore as noStore } from "next/cache"; +import { redirect } from "next/navigation"; +import { AssistantsGallery } from "./AssistantsGallery"; + +export default async function GalleryPage({ + searchParams, +}: { + searchParams: { [key: string]: string }; +}) { + noStore(); + + const data = await fetchChatData(searchParams); + + if ("redirect" in data) { + redirect(data.redirect); + } + + const { + user, + chatSessions, + availableSources, + documentSets, + personas, + tags, + llmProviders, + folders, + openedFolders, + shouldShowWelcomeModal, + } = data; + + return ( + <> + + + {shouldShowWelcomeModal && } + + +
+ + +
+
+
+ +
+
+ +
+ +
+
+
+
+ + ); +} diff --git a/web/src/app/assistants/mine/AssistantSharingModal.tsx b/web/src/app/assistants/mine/AssistantSharingModal.tsx new file mode 100644 index 000000000..0fa55fea4 --- /dev/null +++ b/web/src/app/assistants/mine/AssistantSharingModal.tsx @@ -0,0 +1,227 @@ +import { useState } from "react"; +import { Modal } from "@/components/Modal"; +import { MinimalUserSnapshot, User } from "@/lib/types"; +import { Button, Divider, Text } from "@tremor/react"; +import { FiPlus, FiX } from "react-icons/fi"; +import { Persona } from "@/app/admin/assistants/interfaces"; +import { SearchMultiSelectDropdown } from "@/components/Dropdown"; +import { UsersIcon } from "@/components/icons/icons"; +import { AssistantSharedStatusDisplay } from "../AssistantSharedStatus"; +import { + addUsersToAssistantSharedList, + removeUsersFromAssistantSharedList, +} from "@/lib/assistants/shareAssistant"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { Bubble } from "@/components/Bubble"; +import { useRouter } from "next/navigation"; +import { AssistantIcon } from "@/components/assistants/AssistantIcon"; +import { Spinner } from "@/components/Spinner"; + +interface AssistantSharingModalProps { + assistant: Persona; + user: User | null; + allUsers: MinimalUserSnapshot[]; + show: boolean; + onClose: () => void; +} + +export function AssistantSharingModal({ + assistant, + user, + allUsers, + show, + onClose, +}: AssistantSharingModalProps) { + const router = useRouter(); + const { popup, setPopup } = usePopup(); + const [isUpdating, setIsUpdating] = useState(false); + const [selectedUsers, setSelectedUsers] = useState([]); + + const assistantName = assistant.name; + const sharedUsersWithoutOwner = assistant.users.filter( + (u) => u.id !== assistant.owner?.id + ); + + if (!show) { + return null; + } + + const handleShare = async () => { + setIsUpdating(true); + const startTime = Date.now(); + + const error = await addUsersToAssistantSharedList( + assistant, + selectedUsers.map((user) => user.id) + ); + router.refresh(); + + const elapsedTime = Date.now() - startTime; + const remainingTime = Math.max(0, 1000 - elapsedTime); + + setTimeout(() => { + setIsUpdating(false); + if (error) { + setPopup({ + message: `Failed to share assistant - ${error}`, + type: "error", + }); + } + }, remainingTime); + }; + + let sharedStatus = null; + if (assistant.is_public || !sharedUsersWithoutOwner.length) { + sharedStatus = ( + + ); + } else { + sharedStatus = ( +
+ Shared with:{" "} +
+ {sharedUsersWithoutOwner.map((u) => ( + { + setIsUpdating(true); + const startTime = Date.now(); + + const error = await removeUsersFromAssistantSharedList( + assistant, + [u.id] + ); + router.refresh(); + + const elapsedTime = Date.now() - startTime; + const remainingTime = Math.max(0, 1000 - elapsedTime); + + setTimeout(() => { + setIsUpdating(false); + if (error) { + setPopup({ + message: `Failed to remove assistant - ${error}`, + type: "error", + }); + } + }, remainingTime); + }} + > +
+ {u.email} +
+
+ ))} +
+
+ ); + } + + return ( + <> + {popup} + + {" "} +
{assistantName}
+ + } + onOutsideClick={onClose} + > +
+ {isUpdating && } + + Control which other users should have access to this assistant. + + +
+

Current status:

+ {sharedStatus} +
+ +

Share Assistant:

+
+ + !selectedUsers.map((u2) => u2.id).includes(u1.id) && + !sharedUsersWithoutOwner + .map((u2) => u2.id) + .includes(u1.id) && + u1.id !== user?.id + ) + .map((user) => { + return { + name: user.email, + value: user.id, + }; + })} + onSelect={(option) => { + setSelectedUsers([ + ...Array.from( + new Set([ + ...selectedUsers, + { id: option.value as string, email: option.name }, + ]) + ), + ]); + }} + itemComponent={({ option }) => ( +
+ + {option.name} +
+ +
+
+ )} + /> +
+ {selectedUsers.length > 0 && + selectedUsers.map((selectedUser) => ( +
{ + setSelectedUsers( + selectedUsers.filter( + (user) => user.id !== selectedUser.id + ) + ); + }} + className={` + flex + rounded-lg + px-2 + py-1 + border + border-border + hover:bg-hover-light + cursor-pointer`} + > + {selectedUser.email} +
+ ))} +
+ + {selectedUsers.length > 0 && ( + + )} +
+
+
+ + ); +} diff --git a/web/src/app/assistants/mine/AssistantsList.tsx b/web/src/app/assistants/mine/AssistantsList.tsx new file mode 100644 index 000000000..34e792c1e --- /dev/null +++ b/web/src/app/assistants/mine/AssistantsList.tsx @@ -0,0 +1,367 @@ +"use client"; + +import { useState } from "react"; +import { MinimalUserSnapshot, User } from "@/lib/types"; +import { Persona } from "@/app/admin/assistants/interfaces"; +import { Divider, Text } from "@tremor/react"; +import { + FiArrowDown, + FiArrowUp, + FiEdit2, + FiMoreHorizontal, + FiPlus, + FiSearch, + FiX, + FiShare2, +} from "react-icons/fi"; +import Link from "next/link"; +import { orderAssistantsForUser } from "@/lib/assistants/orderAssistants"; +import { + addAssistantToList, + moveAssistantDown, + moveAssistantUp, + removeAssistantFromList, +} from "@/lib/assistants/updateAssistantPreferences"; +import { AssistantIcon } from "@/components/assistants/AssistantIcon"; +import { DefaultPopover } from "@/components/popover/DefaultPopover"; +import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup"; +import { useRouter } from "next/navigation"; +import { NavigationButton } from "../NavigationButton"; +import { AssistantsPageTitle } from "../AssistantsPageTitle"; +import { checkUserOwnsAssistant } from "@/lib/assistants/checkOwnership"; +import { AssistantSharingModal } from "./AssistantSharingModal"; +import { AssistantSharedStatusDisplay } from "../AssistantSharedStatus"; +import useSWR from "swr"; +import { errorHandlingFetcher } from "@/lib/fetcher"; +import { ToolsDisplay } from "../ToolsDisplay"; + +function AssistantListItem({ + assistant, + user, + allAssistantIds, + allUsers, + isFirst, + isLast, + isVisible, + setPopup, +}: { + assistant: Persona; + user: User | null; + allUsers: MinimalUserSnapshot[]; + allAssistantIds: number[]; + isFirst: boolean; + isLast: boolean; + isVisible: boolean; + setPopup: (popupSpec: PopupSpec | null) => void; +}) { + const router = useRouter(); + const [showSharingModal, setShowSharingModal] = useState(false); + + const currentChosenAssistants = user?.preferences?.chosen_assistants; + const isOwnedByUser = checkUserOwnsAssistant(user, assistant); + + return ( + <> + { + setShowSharingModal(false); + router.refresh(); + }} + show={showSharingModal} + /> +
+
+
+ +

+ {assistant.name} +

+
+ {assistant.tools.length > 0 && ( + + )} +
{assistant.description}
+
+ +
+
+ {isOwnedByUser && ( +
+ {!assistant.is_public && ( +
setShowSharingModal(true)} + > + +
+ )} + + + +
+ )} + + +
+ } + side="bottom" + align="start" + sideOffset={5} + > + {[ + ...(!isFirst + ? [ +
{ + const success = await moveAssistantUp( + assistant.id, + currentChosenAssistants || allAssistantIds + ); + if (success) { + setPopup({ + message: `"${assistant.name}" has been moved up.`, + type: "success", + }); + router.refresh(); + } else { + setPopup({ + message: `"${assistant.name}" could not be moved up.`, + type: "error", + }); + } + }} + > + Move Up +
, + ] + : []), + ...(!isLast + ? [ +
{ + const success = await moveAssistantDown( + assistant.id, + currentChosenAssistants || allAssistantIds + ); + if (success) { + setPopup({ + message: `"${assistant.name}" has been moved down.`, + type: "success", + }); + router.refresh(); + } else { + setPopup({ + message: `"${assistant.name}" could not be moved down.`, + type: "error", + }); + } + }} + > + Move Down +
, + ] + : []), + isVisible ? ( +
{ + if ( + currentChosenAssistants && + currentChosenAssistants.length === 1 + ) { + setPopup({ + message: `Cannot remove "${assistant.name}" - you must have at least one assistant.`, + type: "error", + }); + return; + } + + const success = await removeAssistantFromList( + assistant.id, + currentChosenAssistants || allAssistantIds + ); + if (success) { + setPopup({ + message: `"${assistant.name}" has been removed from your list.`, + type: "success", + }); + router.refresh(); + } else { + setPopup({ + message: `"${assistant.name}" could not be removed from your list.`, + type: "error", + }); + } + }} + > + {isOwnedByUser ? "Hide" : "Remove"} +
+ ) : ( +
{ + const success = await addAssistantToList( + assistant.id, + currentChosenAssistants || allAssistantIds + ); + if (success) { + setPopup({ + message: `"${assistant.name}" has been added to your list.`, + type: "success", + }); + router.refresh(); + } else { + setPopup({ + message: `"${assistant.name}" could not be added to your list.`, + type: "error", + }); + } + }} + > + Add +
+ ), + ]} + + + + ); +} + +interface AssistantsListProps { + user: User | null; + assistants: Persona[]; +} + +export function AssistantsList({ user, assistants }: AssistantsListProps) { + const filteredAssistants = orderAssistantsForUser(assistants, user); + const ownedButHiddenAssistants = assistants.filter( + (assistant) => + checkUserOwnsAssistant(user, assistant) && + user?.preferences?.chosen_assistants && + !user?.preferences?.chosen_assistants?.includes(assistant.id) + ); + const allAssistantIds = assistants.map((assistant) => assistant.id); + + const { popup, setPopup } = usePopup(); + + const { data: users } = useSWR( + "/api/users", + errorHandlingFetcher + ); + + return ( + <> + {popup} +
+ My Assistants + +
+ + +
+ + Create New Assistant +
+
+ + + + +
+ + View Public and Shared Assistants +
+
+ +
+ +

+ Assistants allow you to customize your experience for a specific + purpose. Specifically, they combine instructions, extra knowledge, and + any combination of tools. +

+ + + +

Active Assistants

+ + + The order the assistants appear below will be the order they appear in + the Assistants dropdown. The first assistant listed will be your + default assistant when you start a new chat. + + +
+ {filteredAssistants.map((assistant, index) => ( + + ))} +
+ + {ownedButHiddenAssistants.length > 0 && ( + <> + + +

Your Hidden Assistants

+ + + Assistants you've created that aren't currently visible + in the Assistants selector. + + +
+ {ownedButHiddenAssistants.map((assistant, index) => ( + + ))} +
+ + )} +
+ + ); +} diff --git a/web/src/app/assistants/mine/page.tsx b/web/src/app/assistants/mine/page.tsx new file mode 100644 index 000000000..3bbf83e17 --- /dev/null +++ b/web/src/app/assistants/mine/page.tsx @@ -0,0 +1,82 @@ +import { ChatSidebar } from "@/app/chat/sessionSidebar/ChatSidebar"; +import { InstantSSRAutoRefresh } from "@/components/SSRAutoRefresh"; +import { UserDropdown } from "@/components/UserDropdown"; +import { ChatProvider } from "@/components/context/ChatContext"; +import { WelcomeModal } from "@/components/initialSetup/welcome/WelcomeModalWrapper"; +import { ApiKeyModal } from "@/components/llm/ApiKeyModal"; +import { fetchChatData } from "@/lib/chat/fetchChatData"; +import { unstable_noStore as noStore } from "next/cache"; +import { redirect } from "next/navigation"; +import { AssistantsList } from "./AssistantsList"; + +export default async function GalleryPage({ + searchParams, +}: { + searchParams: { [key: string]: string }; +}) { + noStore(); + + const data = await fetchChatData(searchParams); + + if ("redirect" in data) { + redirect(data.redirect); + } + + const { + user, + chatSessions, + availableSources, + documentSets, + personas, + tags, + llmProviders, + folders, + openedFolders, + shouldShowWelcomeModal, + } = data; + + return ( + <> + + + {shouldShowWelcomeModal && } + + +
+ + +
+
+
+ +
+
+ +
+ +
+
+
+
+ + ); +} diff --git a/web/src/app/assistants/new/page.tsx b/web/src/app/assistants/new/page.tsx new file mode 100644 index 000000000..19e0abcaf --- /dev/null +++ b/web/src/app/assistants/new/page.tsx @@ -0,0 +1,54 @@ +import { Card } from "@tremor/react"; +import { HeaderWrapper } from "@/components/header/HeaderWrapper"; +import { AssistantEditor } from "@/app/admin/assistants/AssistantEditor"; +import { SuccessfulPersonaUpdateRedirectType } from "@/app/admin/assistants/enums"; +import { fetchAssistantEditorInfoSS } from "@/lib/assistants/fetchPersonaEditorInfoSS"; +import { ErrorCallout } from "@/components/ErrorCallout"; +import { LargeBackButton } from "../LargeBackButton"; + +export default async function Page() { + const [values, error] = await fetchAssistantEditorInfoSS(); + + let body; + if (!values) { + body = ( +
+ +
+ ); + } else { + body = ( +
+
+
+ + + +
+
+
+ ); + } + + return ( +
+ +
+
+ +

+ New Assistant +

+
+
+
+ + {body} +
+ ); +} diff --git a/web/src/app/chat/Chat.tsx b/web/src/app/chat/Chat.tsx deleted file mode 100644 index f2b5cda7a..000000000 --- a/web/src/app/chat/Chat.tsx +++ /dev/null @@ -1,813 +0,0 @@ -"use client"; - -import { useEffect, useRef, useState } from "react"; -import { FiRefreshCcw, FiSend, FiStopCircle } from "react-icons/fi"; -import { AIMessage, HumanMessage } from "./message/Messages"; -import { AnswerPiecePacket, DanswerDocument } from "@/lib/search/interfaces"; -import { - BackendChatSession, - BackendMessage, - DocumentsResponse, - Message, - RetrievalType, - StreamingError, -} from "./interfaces"; -import { useRouter } from "next/navigation"; -import { FeedbackType } from "./types"; -import { - createChatSession, - getCitedDocumentsFromMessage, - getHumanAndAIMessageFromMessageNumber, - getLastSuccessfulMessageId, - handleAutoScroll, - handleChatFeedback, - nameChatSession, - processRawChatHistory, - sendMessage, -} from "./lib"; -import { ThreeDots } from "react-loader-spinner"; -import { FeedbackModal } from "./modal/FeedbackModal"; -import { DocumentSidebar } from "./documentSidebar/DocumentSidebar"; -import { Persona } from "../admin/personas/interfaces"; -import { ChatPersonaSelector } from "./ChatPersonaSelector"; -import { useFilters } from "@/lib/hooks"; -import { DocumentSet, Tag, ValidSources } from "@/lib/types"; -import { ChatFilters } from "./modifiers/ChatFilters"; -import { buildFilters } from "@/lib/search/utils"; -import { SelectedDocuments } from "./modifiers/SelectedDocuments"; -import { usePopup } from "@/components/admin/connectors/Popup"; -import { ResizableSection } from "@/components/resizable/ResizableSection"; -import { DanswerInitializingLoader } from "@/components/DanswerInitializingLoader"; -import { ChatIntro } from "./ChatIntro"; -import { HEADER_PADDING } from "@/lib/constants"; -import { computeAvailableFilters } from "@/lib/filters"; -import { useDocumentSelection } from "./useDocumentSelection"; - -const MAX_INPUT_HEIGHT = 200; - -export const Chat = ({ - existingChatSessionId, - existingChatSessionPersonaId, - availableSources, - availableDocumentSets, - availablePersonas, - availableTags, - defaultSelectedPersonaId, - documentSidebarInitialWidth, - shouldhideBeforeScroll, -}: { - existingChatSessionId: number | null; - existingChatSessionPersonaId: number | undefined; - availableSources: ValidSources[]; - availableDocumentSets: DocumentSet[]; - availablePersonas: Persona[]; - availableTags: Tag[]; - defaultSelectedPersonaId?: number; // what persona to default to - documentSidebarInitialWidth?: number; - shouldhideBeforeScroll?: boolean; -}) => { - const router = useRouter(); - const { popup, setPopup } = usePopup(); - - // fetch messages for the chat session - const [isFetchingChatMessages, setIsFetchingChatMessages] = useState( - existingChatSessionId !== null - ); - - // needed so closures (e.g. onSubmit) can access the current value - const urlChatSessionId = useRef(); - // this is triggered every time the user switches which chat - // session they are using - useEffect(() => { - urlChatSessionId.current = existingChatSessionId; - - textareaRef.current?.focus(); - - // only clear things if we're going from one chat session to another - if (chatSessionId !== null && existingChatSessionId !== chatSessionId) { - // de-select documents - clearSelectedDocuments(); - // reset all filters - filterManager.setSelectedDocumentSets([]); - filterManager.setSelectedSources([]); - filterManager.setSelectedTags([]); - filterManager.setTimeRange(null); - if (isStreaming) { - setIsCancelled(true); - } - } - - setChatSessionId(existingChatSessionId); - - async function initialSessionFetch() { - if (existingChatSessionId === null) { - setIsFetchingChatMessages(false); - if (defaultSelectedPersonaId !== undefined) { - setSelectedPersona( - availablePersonas.find( - (persona) => persona.id === defaultSelectedPersonaId - ) - ); - } else { - setSelectedPersona(undefined); - } - setMessageHistory([]); - return; - } - - setIsFetchingChatMessages(true); - const response = await fetch( - `/api/chat/get-chat-session/${existingChatSessionId}` - ); - const chatSession = (await response.json()) as BackendChatSession; - setSelectedPersona( - availablePersonas.find( - (persona) => persona.id === chatSession.persona_id - ) - ); - const newMessageHistory = processRawChatHistory(chatSession.messages); - setMessageHistory(newMessageHistory); - - const latestMessageId = - newMessageHistory[newMessageHistory.length - 1]?.messageId; - setSelectedMessageForDocDisplay( - latestMessageId !== undefined ? latestMessageId : null - ); - - setIsFetchingChatMessages(false); - } - - initialSessionFetch(); - }, [existingChatSessionId]); - - const [chatSessionId, setChatSessionId] = useState( - existingChatSessionId - ); - const [message, setMessage] = useState(""); - const [messageHistory, setMessageHistory] = useState([]); - const [isStreaming, setIsStreaming] = useState(false); - - // for document display - // NOTE: -1 is a special designation that means the latest AI message - const [selectedMessageForDocDisplay, setSelectedMessageForDocDisplay] = - useState(null); - const { aiMessage } = selectedMessageForDocDisplay - ? getHumanAndAIMessageFromMessageNumber( - messageHistory, - selectedMessageForDocDisplay - ) - : { aiMessage: null }; - - const [selectedPersona, setSelectedPersona] = useState( - existingChatSessionPersonaId !== undefined - ? availablePersonas.find( - (persona) => persona.id === existingChatSessionPersonaId - ) - : defaultSelectedPersonaId !== undefined - ? availablePersonas.find( - (persona) => persona.id === defaultSelectedPersonaId - ) - : undefined - ); - const livePersona = selectedPersona || availablePersonas[0]; - - useEffect(() => { - if (messageHistory.length === 0) { - setSelectedPersona( - availablePersonas.find( - (persona) => persona.id === defaultSelectedPersonaId - ) - ); - } - }, [defaultSelectedPersonaId]); - - const [ - selectedDocuments, - toggleDocumentSelection, - clearSelectedDocuments, - selectedDocumentTokens, - ] = useDocumentSelection(); - // just choose a conservative default, this will be updated in the - // background on initial load / on persona change - const [maxTokens, setMaxTokens] = useState(4096); - // fetch # of allowed document tokens for the selected Persona - useEffect(() => { - async function fetchMaxTokens() { - const response = await fetch( - `/api/chat/max-selected-document-tokens?persona_id=${livePersona.id}` - ); - if (response.ok) { - const maxTokens = (await response.json()).max_tokens as number; - setMaxTokens(maxTokens); - } - } - - fetchMaxTokens(); - }, [livePersona]); - - const filterManager = useFilters(); - const [finalAvailableSources, finalAvailableDocumentSets] = - computeAvailableFilters({ - selectedPersona, - availableSources, - availableDocumentSets, - }); - - // state for cancelling streaming - const [isCancelled, setIsCancelled] = useState(false); - const isCancelledRef = useRef(isCancelled); - useEffect(() => { - isCancelledRef.current = isCancelled; - }, [isCancelled]); - - const [currentFeedback, setCurrentFeedback] = useState< - [FeedbackType, number] | null - >(null); - - // auto scroll as message comes out - const scrollableDivRef = useRef(null); - const endDivRef = useRef(null); - useEffect(() => { - if (isStreaming || !message) { - handleAutoScroll(endDivRef, scrollableDivRef); - } - }); - - // scroll to bottom initially - const [hasPerformedInitialScroll, setHasPerformedInitialScroll] = useState( - shouldhideBeforeScroll !== true - ); - useEffect(() => { - endDivRef.current?.scrollIntoView(); - setHasPerformedInitialScroll(true); - }, [isFetchingChatMessages]); - - // handle re-sizing of the text area - const textareaRef = useRef(null); - useEffect(() => { - const textarea = textareaRef.current; - if (textarea) { - textarea.style.height = "0px"; - textarea.style.height = `${Math.min( - textarea.scrollHeight, - MAX_INPUT_HEIGHT - )}px`; - } - }, [message]); - - // used for resizing of the document sidebar - const masterFlexboxRef = useRef(null); - const [maxDocumentSidebarWidth, setMaxDocumentSidebarWidth] = useState< - number | null - >(null); - const adjustDocumentSidebarWidth = () => { - if (masterFlexboxRef.current && document.documentElement.clientWidth) { - // numbers below are based on the actual width the center section for different - // screen sizes. `1700` corresponds to the custom "3xl" tailwind breakpoint - // NOTE: some buffer is needed to account for scroll bars - if (document.documentElement.clientWidth > 1700) { - setMaxDocumentSidebarWidth(masterFlexboxRef.current.clientWidth - 950); - } else if (document.documentElement.clientWidth > 1420) { - setMaxDocumentSidebarWidth(masterFlexboxRef.current.clientWidth - 760); - } else { - setMaxDocumentSidebarWidth(masterFlexboxRef.current.clientWidth - 660); - } - } - }; - useEffect(() => { - adjustDocumentSidebarWidth(); // Adjust the width on initial render - window.addEventListener("resize", adjustDocumentSidebarWidth); // Add resize event listener - - return () => { - window.removeEventListener("resize", adjustDocumentSidebarWidth); // Cleanup the event listener - }; - }, []); - - if (!documentSidebarInitialWidth && maxDocumentSidebarWidth) { - documentSidebarInitialWidth = Math.min(700, maxDocumentSidebarWidth); - } - - const onSubmit = async ({ - messageIdToResend, - queryOverride, - forceSearch, - }: { - messageIdToResend?: number; - queryOverride?: string; - forceSearch?: boolean; - } = {}) => { - let currChatSessionId: number; - let isNewSession = chatSessionId === null; - if (isNewSession) { - currChatSessionId = await createChatSession(livePersona?.id || 0); - } else { - currChatSessionId = chatSessionId as number; - } - setChatSessionId(currChatSessionId); - - const messageToResend = messageHistory.find( - (message) => message.messageId === messageIdToResend - ); - const messageToResendIndex = messageToResend - ? messageHistory.indexOf(messageToResend) - : null; - if (!messageToResend && messageIdToResend !== undefined) { - setPopup({ - message: - "Failed to re-send message - please refresh the page and try again.", - type: "error", - }); - return; - } - - const currMessage = messageToResend ? messageToResend.message : message; - const currMessageHistory = - messageToResendIndex !== null - ? messageHistory.slice(0, messageToResendIndex) - : messageHistory; - setMessageHistory([ - ...currMessageHistory, - { - messageId: 0, - message: currMessage, - type: "user", - }, - ]); - setMessage(""); - - setIsStreaming(true); - let answer = ""; - let query: string | null = null; - let retrievalType: RetrievalType = - selectedDocuments.length > 0 - ? RetrievalType.SelectedDocs - : RetrievalType.None; - let documents: DanswerDocument[] = selectedDocuments; - let error: string | null = null; - let finalMessage: BackendMessage | null = null; - try { - const lastSuccessfulMessageId = - getLastSuccessfulMessageId(currMessageHistory); - for await (const packetBunch of sendMessage({ - message: currMessage, - parentMessageId: lastSuccessfulMessageId, - chatSessionId: currChatSessionId, - promptId: selectedPersona?.prompts[0]?.id || 0, - filters: buildFilters( - filterManager.selectedSources, - filterManager.selectedDocumentSets, - filterManager.timeRange, - filterManager.selectedTags - ), - selectedDocumentIds: selectedDocuments - .filter( - (document) => - document.db_doc_id !== undefined && document.db_doc_id !== null - ) - .map((document) => document.db_doc_id as number), - queryOverride, - forceSearch, - })) { - for (const packet of packetBunch) { - if (Object.hasOwn(packet, "answer_piece")) { - answer += (packet as AnswerPiecePacket).answer_piece; - } else if (Object.hasOwn(packet, "top_documents")) { - documents = (packet as DocumentsResponse).top_documents; - query = (packet as DocumentsResponse).rephrased_query; - retrievalType = RetrievalType.Search; - if (documents && documents.length > 0) { - // point to the latest message (we don't know the messageId yet, which is why - // we have to use -1) - setSelectedMessageForDocDisplay(-1); - } - } else if (Object.hasOwn(packet, "error")) { - error = (packet as StreamingError).error; - } else if (Object.hasOwn(packet, "message_id")) { - finalMessage = packet as BackendMessage; - } - } - setMessageHistory([ - ...currMessageHistory, - { - messageId: finalMessage?.parent_message || null, - message: currMessage, - type: "user", - }, - { - messageId: finalMessage?.message_id || null, - message: error || answer, - type: error ? "error" : "assistant", - retrievalType, - query: finalMessage?.rephrased_query || query, - documents: finalMessage?.context_docs?.top_documents || documents, - citations: finalMessage?.citations || {}, - }, - ]); - if (isCancelledRef.current) { - setIsCancelled(false); - break; - } - } - } catch (e: any) { - const errorMsg = e.message; - setMessageHistory([ - ...currMessageHistory, - { - messageId: null, - message: currMessage, - type: "user", - }, - { - messageId: null, - message: errorMsg, - type: "error", - }, - ]); - } - setIsStreaming(false); - if (isNewSession) { - if (finalMessage) { - setSelectedMessageForDocDisplay(finalMessage.message_id); - } - await nameChatSession(currChatSessionId, currMessage); - - // NOTE: don't switch pages if the user has navigated away from the chat - if ( - currChatSessionId === urlChatSessionId.current || - urlChatSessionId.current === null - ) { - router.push(`/chat?chatId=${currChatSessionId}`, { - scroll: false, - }); - } - } - if ( - finalMessage?.context_docs && - finalMessage.context_docs.top_documents.length > 0 && - retrievalType === RetrievalType.Search - ) { - setSelectedMessageForDocDisplay(finalMessage.message_id); - } - }; - - const onFeedback = async ( - messageId: number, - feedbackType: FeedbackType, - feedbackDetails: string - ) => { - if (chatSessionId === null) { - return; - } - - const response = await handleChatFeedback( - messageId, - feedbackType, - feedbackDetails - ); - - if (response.ok) { - setPopup({ - message: "Thanks for your feedback!", - type: "success", - }); - } else { - const responseJson = await response.json(); - const errorMsg = responseJson.detail || responseJson.message; - setPopup({ - message: `Failed to submit feedback - ${errorMsg}`, - type: "error", - }); - } - }; - - return ( -
- {popup} - {currentFeedback && ( - setCurrentFeedback(null)} - onSubmit={(feedbackDetails) => { - onFeedback(currentFeedback[1], currentFeedback[0], feedbackDetails); - setCurrentFeedback(null); - }} - /> - )} - - {documentSidebarInitialWidth !== undefined ? ( - <> -
-
- {livePersona && ( -
-
- { - if (persona) { - setSelectedPersona(persona); - router.push(`/chat?personaId=${persona.id}`); - } - }} - /> -
-
- )} - - {messageHistory.length === 0 && - !isFetchingChatMessages && - !isStreaming && ( - { - setSelectedPersona(persona); - router.push(`/chat?personaId=${persona.id}`); - }} - /> - )} - -
- {messageHistory.map((message, i) => { - if (message.type === "user") { - return ( -
- -
- ); - } else if (message.type === "assistant") { - const isShowingRetrieved = - (selectedMessageForDocDisplay !== null && - selectedMessageForDocDisplay === message.messageId) || - (selectedMessageForDocDisplay === -1 && - i === messageHistory.length - 1); - const previousMessage = - i !== 0 ? messageHistory[i - 1] : null; - return ( -
- 0) === true - } - handleFeedback={ - i === messageHistory.length - 1 && isStreaming - ? undefined - : (feedbackType) => - setCurrentFeedback([ - feedbackType, - message.messageId as number, - ]) - } - handleSearchQueryEdit={ - i === messageHistory.length - 1 && !isStreaming - ? (newQuery) => { - if (!previousMessage) { - setPopup({ - type: "error", - message: - "Cannot edit query of first message - please refresh the page and try again.", - }); - return; - } - - if (previousMessage.messageId === null) { - setPopup({ - type: "error", - message: - "Cannot edit query of a pending message - please wait a few seconds and try again.", - }); - return; - } - onSubmit({ - messageIdToResend: - previousMessage.messageId, - queryOverride: newQuery, - }); - } - : undefined - } - isCurrentlyShowingRetrieved={isShowingRetrieved} - handleShowRetrieved={(messageNumber) => { - if (isShowingRetrieved) { - setSelectedMessageForDocDisplay(null); - } else { - if (messageNumber !== null) { - setSelectedMessageForDocDisplay(messageNumber); - } else { - setSelectedMessageForDocDisplay(-1); - } - } - }} - handleForceSearch={() => { - if (previousMessage && previousMessage.messageId) { - onSubmit({ - messageIdToResend: previousMessage.messageId, - forceSearch: true, - }); - } else { - setPopup({ - type: "error", - message: - "Failed to force search - please refresh the page and try again.", - }); - } - }} - /> -
- ); - } else { - return ( -
- - {message.message} -

- } - /> -
- ); - } - })} - - {isStreaming && - messageHistory.length && - messageHistory[messageHistory.length - 1].type === "user" && ( -
- - -
- } - /> -
- )} - - {/* Some padding at the bottom so the search bar has space at the bottom to not cover the last message*/} -
- -
-
-
- -
-
-
-
- {selectedDocuments.length > 0 ? ( - - ) : ( - - )} -
-
- -
-
-