CONTRIBUTING updates (#2354)

This commit is contained in:
Yuhong Sun 2024-09-07 14:05:36 -07:00 committed by GitHub
parent 491f3254a5
commit 6cec31088d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 171 additions and 110 deletions

2
.gitignore vendored
View File

@ -4,6 +4,6 @@
.mypy_cache
.idea
/deployment/data/nginx/app.conf
.vscode/launch.json
.vscode/
*.sw?
/backend/tests/regression/answer_quality/search_test_config.yaml

View File

@ -1,5 +1,5 @@
# Copy this file to .env at the base of the repo and fill in the <REPLACE THIS> values
# This will help with development iteration speed and reduce repeat tasks for dev
# Copy this file to .env in the .vscode folder
# Fill in the <REPLACE THIS> values as needed, it is recommended to set the GEN_AI_API_KEY value to avoid having to set up an LLM in the UI
# Also check out danswer/backend/scripts/restart_containers.sh for a script to restart the containers which Danswer relies on outside of VSCode/Cursor processes
# For local dev, often user Authentication is not needed
@ -15,7 +15,7 @@ LOG_LEVEL=debug
# This passes top N results to LLM an additional time for reranking prior to answer generation
# This step is quite heavy on token usage so we disable it for dev generally
DISABLE_LLM_DOC_RELEVANCE=True
DISABLE_LLM_DOC_RELEVANCE=False
# Useful if you want to toggle auth on/off (google_oauth/OIDC specifically)
@ -27,9 +27,9 @@ REQUIRE_EMAIL_VERIFICATION=False
# Set these so if you wipe the DB, you don't end up having to go through the UI every time
GEN_AI_API_KEY=<REPLACE THIS>
# If answer quality isn't important for dev, use 3.5 turbo due to it being cheaper
GEN_AI_MODEL_VERSION=gpt-3.5-turbo
FAST_GEN_AI_MODEL_VERSION=gpt-3.5-turbo
# If answer quality isn't important for dev, use gpt-4o-mini since it's cheaper
GEN_AI_MODEL_VERSION=gpt-4o
FAST_GEN_AI_MODEL_VERSION=gpt-4o
# For Danswer Slack Bot, overrides the UI values so no need to set this up via UI every time
# Only needed if using DanswerBot
@ -38,7 +38,7 @@ FAST_GEN_AI_MODEL_VERSION=gpt-3.5-turbo
# Python stuff
PYTHONPATH=./backend
PYTHONPATH=../backend
PYTHONUNBUFFERED=1
@ -49,4 +49,3 @@ BING_API_KEY=<REPLACE THIS>
# Enable the full set of Danswer Enterprise Edition features
# NOTE: DO NOT ENABLE THIS UNLESS YOU HAVE A PAID ENTERPRISE LICENSE (or if you are using this for local testing/development)
ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=False

View File

@ -1,15 +1,23 @@
/*
Copy this file into '.vscode/launch.json' or merge its
contents into your existing configurations.
*/
/* Copy this file into '.vscode/launch.json' or merge its contents into your existing configurations. */
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"compounds": [
{
"name": "Run All Danswer Services",
"configurations": [
"Web Server",
"Model Server",
"API Server",
"Indexing",
"Background Jobs",
"Slack Bot"
]
}
],
"configurations": [
{
"name": "Web Server",
@ -17,7 +25,7 @@
"request": "launch",
"cwd": "${workspaceRoot}/web",
"runtimeExecutable": "npm",
"envFile": "${workspaceFolder}/.env",
"envFile": "${workspaceFolder}/.vscode/.env",
"runtimeArgs": [
"run", "dev"
],
@ -25,11 +33,12 @@
},
{
"name": "Model Server",
"type": "python",
"consoleName": "Model Server",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1"
@ -39,16 +48,16 @@
"--reload",
"--port",
"9000"
],
"consoleTitle": "Model Server"
]
},
{
"name": "API Server",
"type": "python",
"consoleName": "API Server",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_DANSWER_MODEL_INTERACTIONS": "True",
"LOG_LEVEL": "DEBUG",
@ -59,32 +68,32 @@
"--reload",
"--port",
"8080"
],
"consoleTitle": "API Server"
]
},
{
"name": "Indexing",
"type": "python",
"consoleName": "Indexing",
"type": "debugpy",
"request": "launch",
"program": "danswer/background/update.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"ENABLE_MULTIPASS_INDEXING": "false",
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
"PYTHONPATH": "."
},
"consoleTitle": "Indexing"
}
},
// Celery and all async jobs, usually would include indexing as well but this is handled separately above for dev
{
"name": "Background Jobs",
"type": "python",
"consoleName": "Background Jobs",
"type": "debugpy",
"request": "launch",
"program": "scripts/dev_run_background_jobs.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_DANSWER_MODEL_INTERACTIONS": "True",
"LOG_LEVEL": "DEBUG",
@ -93,18 +102,18 @@
},
"args": [
"--no-indexing"
],
"consoleTitle": "Background Jobs"
]
},
// For the listner to access the Slack API,
// DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project
{
"name": "Slack Bot",
"type": "python",
"consoleName": "Slack Bot",
"type": "debugpy",
"request": "launch",
"program": "danswer/danswerbot/slack/listener.py",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
@ -113,11 +122,12 @@
},
{
"name": "Pytest",
"type": "python",
"consoleName": "Pytest",
"type": "debugpy",
"request": "launch",
"module": "pytest",
"cwd": "${workspaceFolder}/backend",
"envFile": "${workspaceFolder}/.env",
"envFile": "${workspaceFolder}/.vscode/.env",
"env": {
"LOG_LEVEL": "DEBUG",
"PYTHONUNBUFFERED": "1",
@ -128,18 +138,16 @@
// Specify a sepcific module/test to run or provide nothing to run all tests
//"tests/unit/danswer/llm/answering/test_prune_and_merge.py"
]
}
],
"compounds": [
},
{
"name": "Run Danswer",
"configurations": [
"Web Server",
"Model Server",
"API Server",
"Indexing",
"Background Jobs",
]
"name": "Clear and Restart External Volumes and Containers",
"type": "node",
"request": "launch",
"runtimeExecutable": "bash",
"runtimeArgs": ["${workspaceFolder}/backend/scripts/restart_containers.sh"],
"cwd": "${workspaceFolder}",
"console": "integratedTerminal",
"stopOnEntry": true
}
]
}

View File

@ -48,34 +48,25 @@ We would love to see you there!
## Get Started 🚀
Danswer being a fully functional app, relies on some external pieces of software, specifically:
Danswer being a fully functional app, relies on some external software, specifically:
- [Postgres](https://www.postgresql.org/) (Relational DB)
- [Vespa](https://vespa.ai/) (Vector DB/Search Engine)
- [Redis](https://redis.io/) (Cache)
- [Nginx](https://nginx.org/) (Not needed for development flows generally)
This guide provides instructions to set up the Danswer specific services outside of Docker because it's easier for
development purposes but also feel free to just use the containers and update with local changes by providing the
`--build` flag.
> **Note:**
> This guide provides instructions to set up the Danswer specific services outside of Docker because it's easier for
> development purposes. However, you can also use the containers and update with local changes by providing the
> `--build` flag.
### Local Set Up
It is recommended to use Python version 3.11.
Be sure to use Python version 3.11.
If using a lower version, modifications will have to be made to the code.
If using a higher version, the version of Tensorflow we use may not be available for your platform.
If using a higher version, sometimes some libraries will not be available (i.e. we had problems with Tensorflow in the past with higher versions of python).
On macOS, ensure Homebrew is already set up. (https://brew.sh/)
Then install python 3.11.
```bash
brew install python@3.11
```
Add python 3.11 to your path: add the following line to ~/.zshrc
```
export PATH="$(brew --prefix)/opt/python@3.11/libexec/bin:$PATH"
```
You will need to open a new terminal for the path change above to take effect.
#### Installing Requirements
Currently, we use pip and recommend creating a virtual environment.
@ -86,8 +77,9 @@ python -m venv .venv
source .venv/bin/activate
```
--> Note that this virtual environment MUST NOT be set up WITHIN the danswer
directory
> **Note:**
> This virtual environment MUST NOT be set up WITHIN the danswer directory if you plan on using mypy within certain IDEs.
> For simplicity, we recommend setting up the virtual environment outside of the danswer directory.
_For Windows, activate the virtual environment using Command Prompt:_
```bash
@ -102,13 +94,10 @@ Install the required python dependencies:
```bash
pip install -r danswer/backend/requirements/default.txt
pip install -r danswer/backend/requirements/dev.txt
pip install -r danswer/backend/requirements/ee.txt
pip install -r danswer/backend/requirements/model_server.txt
```
If developing Enterprise Edition features, also install those requirements.
```bash
pip install -r danswer/backend/requirements/ee.txt
```
Install [Node.js and npm](https://docs.npmjs.com/downloading-and-installing-node-js-and-npm) for the frontend.
Once the above is done, navigate to `danswer/web` run:
@ -116,10 +105,11 @@ Once the above is done, navigate to `danswer/web` run:
npm i
```
Install Playwright (required by the Web Connector)
Install Playwright (headless browser required by the Web Connector)
> Note: If you have just done the pip install, open a new terminal and source the python virtual-env again.
This will update the path to include playwright
> **Note:**
> If you have just run the pip install, open a new terminal and source the python virtual-env again.
> This will pull the updated PATH to include playwright
Then install Playwright by running:
```bash
@ -130,15 +120,13 @@ playwright install
#### Dependent Docker Containers
You will need Docker installed to run these containers.
On macOS, you will need to install [Docker Desktop](https://www.docker.com/products/docker-desktop/) and
ensure it is running before continuing with the following docker commands.
First navigate to `danswer/deployment/docker_compose`, then start up Vespa and Postgres with:
First navigate to `danswer/deployment/docker_compose`, then start up Postgres/Vespa/Redis with:
```bash
docker compose -f docker-compose.dev.yml -p danswer-stack up -d index relational_db cache
```
(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
#### Running Danswer
To start the frontend, navigate to `danswer/web` and run:
```bash
@ -150,11 +138,10 @@ Navigate to `danswer/backend` and run:
```bash
uvicorn model_server.main:app --reload --port 9000
```
_For Windows (for compatibility with both PowerShell and Command Prompt):_
```bash
powershell -Command "
uvicorn model_server.main:app --reload --port 9000
"
powershell -Command "uvicorn model_server.main:app --reload --port 9000"
```
The first time running Danswer, you will need to run the DB migrations for Postgres.
@ -177,6 +164,7 @@ To run the backend API server, navigate back to `danswer/backend` and run:
```bash
AUTH_TYPE=disabled uvicorn danswer.main:app --reload --port 8080
```
_For Windows (for compatibility with both PowerShell and Command Prompt):_
```bash
powershell -Command "
@ -185,7 +173,9 @@ powershell -Command "
"
```
Note: if you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services.
> **Note:**
> If you need finer logging, add the additional environment variable `LOG_LEVEL=DEBUG` to the relevant services.
### Formatting and Linting
#### Backend
@ -193,7 +183,7 @@ For the backend, you'll need to setup pre-commit hooks (black / reorder-python-i
First, install pre-commit (if you don't have it already) following the instructions
[here](https://pre-commit.com/#installation).
On macOS, from the danswer directory you can simply install pre-commit with the following command.
With the virtual environment active, install the pre-commit library with:
```bash
pip install pre-commit
```
@ -203,13 +193,8 @@ Then, from the `danswer/backend` directory, run:
pre-commit install
```
macOS will likely require you to remove some quarantine attributes on some of the hooks for them to execute properly.
```
sudo xattr -r -d com.apple.quarantine ~/.cache/pre-commit
```
Additionally, we use `mypy` for static type checking.
Danswer is fully type-annotated, and we would like to keep it that way!
Danswer is fully type-annotated, and we want to keep it that way!
To run the mypy checks manually, run `python -m mypy .` from the `danswer/backend` directory.
@ -220,6 +205,7 @@ Please double check that prettier passes before creating a pull request.
### Release Process
Danswer follows the semver versioning standard.
Danswer loosely follows the SemVer versioning standard.
Major changes are released with a "minor" version bump. Currently we use patch release versions to indicate small feature changes.
A set of Docker containers will be pushed automatically to DockerHub with every tag.
You can see the containers [here](https://hub.docker.com/search?q=danswer%2F).

31
CONTRIBUTING_MACOS.md Normal file
View File

@ -0,0 +1,31 @@
## Some additional notes for Mac Users
The base instructions to set up the development environment are located in [CONTRIBUTING.md](https://github.com/danswer-ai/danswer/blob/main/CONTRIBUTING.md).
### Setting up Python
Ensure [Homebrew](https://brew.sh/) is already set up.
Then install python 3.11.
```bash
brew install python@3.11
```
Add python 3.11 to your path: add the following line to ~/.zshrc
```
export PATH="$(brew --prefix)/opt/python@3.11/libexec/bin:$PATH"
```
> **Note:**
> You will need to open a new terminal for the path change above to take effect.
### Setting up Docker
On macOS, you will need to install [Docker Desktop](https://www.docker.com/products/docker-desktop/) and
ensure it is running before continuing with the docker commands.
### Formatting and Linting
MacOS will likely require you to remove some quarantine attributes on some of the hooks for them to execute properly.
After installing pre-commit, run the following command:
```bash
sudo xattr -r -d com.apple.quarantine ~/.cache/pre-commit
```

View File

@ -51,8 +51,10 @@ CROSS_ENCODER_RANGE_MIN = 0
# Generative AI Model Configs
#####
# NOTE: settings like `GEN_AI_MODEL_PROVIDER`, `GEN_AI_MODEL_VERSION`, etc. which
# used to be here are now solely configured via the UI and stored in Postgres.
# NOTE: the 3 below should only be used for dev.
GEN_AI_API_KEY = os.environ.get("GEN_AI_API_KEY")
GEN_AI_MODEL_VERSION = os.environ.get("GEN_AI_MODEL_VERSION")
FAST_GEN_AI_MODEL_VERSION = os.environ.get("FAST_GEN_AI_MODEL_VERSION")
# Override the auto-detection of LLM max context length
GEN_AI_MAX_TOKENS = int(os.environ.get("GEN_AI_MAX_TOKENS") or 0) or None

View File

@ -60,7 +60,7 @@ def upsert_cloud_embedding_provider(
def upsert_llm_provider(
db_session: Session, llm_provider: LLMProviderUpsertRequest
llm_provider: LLMProviderUpsertRequest, db_session: Session
) -> FullLLMProvider:
existing_llm_provider = db_session.scalar(
select(LLMProviderModel).where(LLMProviderModel.name == llm_provider.name)
@ -187,7 +187,7 @@ def remove_llm_provider(db_session: Session, provider_id: int) -> None:
db_session.commit()
def update_default_provider(db_session: Session, provider_id: int) -> None:
def update_default_provider(provider_id: int, db_session: Session) -> None:
new_default = db_session.scalar(
select(LLMProviderModel).where(LLMProviderModel.id == provider_id)
)

View File

@ -38,6 +38,9 @@ from danswer.configs.constants import AuthType
from danswer.configs.constants import KV_REINDEX_KEY
from danswer.configs.constants import KV_SEARCH_SETTINGS
from danswer.configs.constants import POSTGRES_WEB_APP_NAME
from danswer.configs.model_configs import FAST_GEN_AI_MODEL_VERSION
from danswer.configs.model_configs import GEN_AI_API_KEY
from danswer.configs.model_configs import GEN_AI_MODEL_VERSION
from danswer.db.connector import check_connectors_exist
from danswer.db.connector import create_initial_default_connector
from danswer.db.connector_credential_pair import associate_default_cc_pair
@ -50,6 +53,9 @@ from danswer.db.engine import init_sqlalchemy_engine
from danswer.db.engine import warm_up_connections
from danswer.db.index_attempt import cancel_indexing_attempts_past_model
from danswer.db.index_attempt import expire_index_attempts
from danswer.db.llm import fetch_default_provider
from danswer.db.llm import update_default_provider
from danswer.db.llm import upsert_llm_provider
from danswer.db.persona import delete_old_default_personas
from danswer.db.search_settings import get_current_search_settings
from danswer.db.search_settings import get_secondary_search_settings
@ -92,6 +98,7 @@ from danswer.server.manage.embedding.api import basic_router as embedding_router
from danswer.server.manage.get_state import router as state_router
from danswer.server.manage.llm.api import admin_router as llm_admin_router
from danswer.server.manage.llm.api import basic_router as llm_router
from danswer.server.manage.llm.models import LLMProviderUpsertRequest
from danswer.server.manage.search_settings import router as search_settings_router
from danswer.server.manage.slack_bot import router as slack_bot_management_router
from danswer.server.manage.standard_answer import router as standard_answer_router
@ -191,6 +198,30 @@ def setup_postgres(db_session: Session) -> None:
refresh_built_in_tools_cache(db_session)
auto_add_search_tool_to_personas(db_session)
if GEN_AI_API_KEY and fetch_default_provider(db_session) is None:
# Only for dev flows
logger.notice("Setting up default OpenAI LLM for dev.")
llm_model = GEN_AI_MODEL_VERSION or "gpt-4o-mini"
fast_model = FAST_GEN_AI_MODEL_VERSION or "gpt-4o-mini"
model_req = LLMProviderUpsertRequest(
name="DevEnvPresetOpenAI",
provider="openai",
api_key=GEN_AI_API_KEY,
api_base=None,
api_version=None,
custom_config=None,
default_model_name=llm_model,
fast_default_model_name=fast_model,
is_public=True,
groups=[],
display_model_names=[llm_model, fast_model],
model_names=[llm_model, fast_model],
)
new_llm_provider = upsert_llm_provider(
llm_provider=model_req, db_session=db_session
)
update_default_provider(provider_id=new_llm_provider.id, db_session=db_session)
def update_default_multipass_indexing(db_session: Session) -> None:
docs_exist = check_docs_exist(db_session)
@ -202,7 +233,7 @@ def update_default_multipass_indexing(db_session: Session) -> None:
"No existing docs or connectors found. Checking GPU availability for multipass indexing."
)
gpu_available = gpu_status_request()
logger.info(f"GPU availability: {gpu_available}")
logger.info(f"GPU available: {gpu_available}")
current_settings = get_current_search_settings(db_session)

View File

@ -121,7 +121,7 @@ def put_llm_provider(
_: User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> FullLLMProvider:
return upsert_llm_provider(db_session, llm_provider)
return upsert_llm_provider(llm_provider=llm_provider, db_session=db_session)
@admin_router.delete("/provider/{provider_id}")
@ -139,7 +139,7 @@ def set_provider_as_default(
_: User | None = Depends(current_admin_user),
db_session: Session = Depends(get_session),
) -> None:
update_default_provider(db_session, provider_id)
update_default_provider(provider_id=provider_id, db_session=db_session)
"""Endpoints for all"""

View File

@ -51,10 +51,12 @@ def _seed_llms(
if llm_upsert_requests:
logger.notice("Seeding LLMs")
seeded_providers = [
upsert_llm_provider(db_session, llm_upsert_request)
upsert_llm_provider(llm_upsert_request, db_session)
for llm_upsert_request in llm_upsert_requests
]
update_default_provider(db_session, seeded_providers[0].id)
update_default_provider(
provider_id=seeded_providers[0].id, db_session=db_session
)
def _seed_personas(db_session: Session, personas: list[CreatePersonaRequest]) -> None:

View File

@ -1,15 +1,16 @@
#!/bin/bash
# Usage of the script with optional volume arguments
# ./restart_containers.sh [vespa_volume] [postgres_volume]
# ./restart_containers.sh [vespa_volume] [postgres_volume] [redis_volume]
VESPA_VOLUME=${1:-""} # Default is empty if not provided
POSTGRES_VOLUME=${2:-""} # Default is empty if not provided
REDIS_VOLUME=${3:-""} # Default is empty if not provided
# Stop and remove the existing containers
echo "Stopping and removing existing containers..."
docker stop danswer_postgres danswer_vespa
docker rm danswer_postgres danswer_vespa
docker stop danswer_postgres danswer_vespa danswer_redis
docker rm danswer_postgres danswer_vespa danswer_redis
# Start the PostgreSQL container with optional volume
echo "Starting PostgreSQL container..."
@ -27,6 +28,14 @@ else
docker run --detach --name danswer_vespa --hostname vespa-container --publish 8081:8081 --publish 19071:19071 vespaengine/vespa:8
fi
# Start the Redis container with optional volume
echo "Starting Redis container..."
if [[ -n "$REDIS_VOLUME" ]]; then
docker run --detach --name danswer_redis --publish 6379:6379 -v $REDIS_VOLUME:/data redis
else
docker run --detach --name danswer_redis --publish 6379:6379 redis
fi
# Ensure alembic runs in the correct directory
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
PARENT_DIR="$(dirname "$SCRIPT_DIR")"

View File

@ -31,8 +31,3 @@ DISABLE_LLM_DOC_RELEVANCE="True"
# Since reranking is turned off and multilingual retrieval is generally harder
# it is advised to turn this one on
ENABLE_MULTIPASS_INDEXING="True"
# Using a stronger LLM will help with multilingual tasks
# Since documents may be in multiple languages, and there are additional instructions to respond
# in the user query's language, it is advised to use the best model possible
GEN_AI_MODEL_VERSION="gpt-4"

View File

@ -368,7 +368,6 @@ auth:
oauth_client_id: ""
oauth_client_secret: ""
oauth_cookie_secret: ""
gen_ai_api_key: ""
danswer_bot_slack_app_token: ""
danswer_bot_slack_bot_token: ""
redis_password: "redis_password"
@ -382,7 +381,6 @@ auth:
oauth_client_id: ""
oauth_client_secret: ""
oauth_cookie_secret: ""
gen_ai_api_key: ""
danswer_bot_slack_app_token: ""
danswer_bot_slack_bot_token: ""
redis_password: "password"