obfuscate api keys

This commit is contained in:
pablodanswer
2024-09-13 16:39:54 -07:00
parent 648c2531f9
commit 001bbb89cc
8 changed files with 34 additions and 556 deletions

View File

@@ -181,12 +181,15 @@ def update_current_search_settings(
logger.warning("No current search settings found to update") logger.warning("No current search settings found to update")
return return
print("current settings", current_settings.__dict__)
print("search settings", search_settings.__dict__)
# Whenever we update the current search settings, we should ensure that the local reranking model is warmed up. # Whenever we update the current search settings, we should ensure that the local reranking model is warmed up.
if ( if (
current_settings.provider_type is None search_settings.rerank_provider_type is None
and search_settings.rerank_model_name is not None and search_settings.rerank_model_name is not None
and current_settings.rerank_model_name != search_settings.rerank_model_name and current_settings.rerank_model_name != search_settings.rerank_model_name
): ):
print("WARMIGN THIS STUFF UP!")
warm_up_cross_encoder(search_settings.rerank_model_name) warm_up_cross_encoder(search_settings.rerank_model_name)
update_search_settings(current_settings, search_settings, preserved_fields) update_search_settings(current_settings, search_settings, preserved_fields)

View File

@@ -16,7 +16,7 @@ from danswer.search.enums import LLMEvaluationType
from danswer.search.enums import OptionalSearchSetting from danswer.search.enums import OptionalSearchSetting
from danswer.search.enums import SearchType from danswer.search.enums import SearchType
from shared_configs.enums import RerankerProvider from shared_configs.enums import RerankerProvider
from shared_configs.utils import obfuscate_api_key
MAX_METRICS_CONTENT = ( MAX_METRICS_CONTENT = (
200 # Just need enough characters to identify where in the doc the chunk is 200 # Just need enough characters to identify where in the doc the chunk is
@@ -87,6 +87,22 @@ class SavedSearchSettings(InferenceSettings, IndexingSetting):
) )
class SearchSettingsSnapshot(SavedSearchSettings):
@classmethod
def from_saved_settings(
cls, settings: SavedSearchSettings
) -> "SearchSettingsSnapshot":
data = settings.dict(exclude={"rerank_api_key"})
data["rerank_api_key"] = obfuscate_api_key(settings.rerank_api_key)
data["api_key"] = obfuscate_api_key(settings.api_key)
return cls(**data)
@classmethod
def from_db_model(cls, settings: SearchSettings) -> "SearchSettingsSnapshot":
return cls.from_saved_settings(SavedSearchSettings.from_db_model(settings))
class Tag(BaseModel): class Tag(BaseModel):
tag_key: str tag_key: str
tag_value: str tag_value: str

View File

@@ -4,7 +4,7 @@ from pydantic import BaseModel
from pydantic import Field from pydantic import Field
from danswer.llm.llm_provider_options import fetch_models_for_provider from danswer.llm.llm_provider_options import fetch_models_for_provider
from shared_configs.utils import obfuscate_api_key
if TYPE_CHECKING: if TYPE_CHECKING:
from danswer.db.models import LLMProvider as LLMProviderModel from danswer.db.models import LLMProvider as LLMProviderModel
@@ -82,10 +82,10 @@ class FullLLMProvider(LLMProvider):
@classmethod @classmethod
def from_model(cls, llm_provider_model: "LLMProviderModel") -> "FullLLMProvider": def from_model(cls, llm_provider_model: "LLMProviderModel") -> "FullLLMProvider":
return cls( return cls(
api_key=obfuscate_api_key(llm_provider_model.api_key),
id=llm_provider_model.id, id=llm_provider_model.id,
name=llm_provider_model.name, name=llm_provider_model.name,
provider=llm_provider_model.provider, provider=llm_provider_model.provider,
api_key=llm_provider_model.api_key,
api_base=llm_provider_model.api_base, api_base=llm_provider_model.api_base,
api_version=llm_provider_model.api_version, api_version=llm_provider_model.api_version,
custom_config=llm_provider_model.custom_config, custom_config=llm_provider_model.custom_config,

View File

@@ -24,6 +24,7 @@ from danswer.document_index.factory import get_default_document_index
from danswer.natural_language_processing.search_nlp_models import clean_model_name from danswer.natural_language_processing.search_nlp_models import clean_model_name
from danswer.search.models import SavedSearchSettings from danswer.search.models import SavedSearchSettings
from danswer.search.models import SearchSettingsCreationRequest from danswer.search.models import SearchSettingsCreationRequest
from danswer.search.models import SearchSettingsSnapshot
from danswer.server.manage.embedding.models import SearchSettingsDeleteRequest from danswer.server.manage.embedding.models import SearchSettingsDeleteRequest
from danswer.server.manage.models import FullModelVersionResponse from danswer.server.manage.models import FullModelVersionResponse
from danswer.server.models import IdReturn from danswer.server.models import IdReturn
@@ -154,21 +155,21 @@ def delete_search_settings_endpoint(
def get_current_search_settings_endpoint( def get_current_search_settings_endpoint(
_: User | None = Depends(current_user), _: User | None = Depends(current_user),
db_session: Session = Depends(get_session), db_session: Session = Depends(get_session),
) -> SavedSearchSettings: ) -> SearchSettingsSnapshot:
current_search_settings = get_current_search_settings(db_session) current_search_settings = get_current_search_settings(db_session)
return SavedSearchSettings.from_db_model(current_search_settings) return SearchSettingsSnapshot.from_db_model(current_search_settings)
@router.get("/get-secondary-search-settings") @router.get("/get-secondary-search-settings")
def get_secondary_search_settings_endpoint( def get_secondary_search_settings_endpoint(
_: User | None = Depends(current_user), _: User | None = Depends(current_user),
db_session: Session = Depends(get_session), db_session: Session = Depends(get_session),
) -> SavedSearchSettings | None: ) -> SearchSettingsSnapshot | None:
secondary_search_settings = get_secondary_search_settings(db_session) secondary_search_settings = get_secondary_search_settings(db_session)
if not secondary_search_settings: if not secondary_search_settings:
return None return None
return SavedSearchSettings.from_db_model(secondary_search_settings) return SearchSettingsSnapshot.from_db_model(secondary_search_settings)
@router.get("/get-all-search-settings") @router.get("/get-all-search-settings")

View File

@@ -9,3 +9,9 @@ def batch_list(
batch_size: int, batch_size: int,
) -> list[list[T]]: ) -> list[list[T]]:
return [lst[i : i + batch_size] for i in range(0, len(lst), batch_size)] return [lst[i : i + batch_size] for i in range(0, len(lst), batch_size)]
def obfuscate_api_key(api_key: str | None) -> str | None:
if api_key is None:
return None
return "*" * len(api_key)

View File

@@ -1,59 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "danswer-stack.fullname" . }}-api-deployment
labels:
{{- include "danswer-stack.labels" . | nindent 4 }}
spec:
{{- if not .Values.api.autoscaling.enabled }}
replicas: {{ .Values.api.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "danswer-stack.selectorLabels" . | nindent 6 }}
{{- if .Values.api.deploymentLabels }}
{{- toYaml .Values.api.deploymentLabels | nindent 6 }}
{{- end }}
template:
metadata:
{{- with .Values.api.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "danswer-stack.labels" . | nindent 8 }}
{{- with .Values.api.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "danswer-stack.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.api.podSecurityContext | nindent 8 }}
containers:
- name: api-server
securityContext:
{{- toYaml .Values.api.securityContext | nindent 12 }}
image: "{{ .Values.api.image.repository }}:{{ .Values.api.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.api.image.pullPolicy }}
command:
- "/bin/sh"
- "-c"
- |
alembic upgrade head &&
echo "Starting Danswer Api Server" &&
uvicorn danswer.main:app --host 0.0.0.0 --port 8080
ports:
- name: api-server-port
containerPort: {{ .Values.api.service.port }}
protocol: TCP
resources:
{{- toYaml .Values.api.resources | nindent 12 }}
envFrom:
- configMapRef:
name: {{ .Values.config.envConfigMapName }}
env:
{{- include "danswer-stack.envSecrets" . | nindent 12}}

View File

@@ -1,16 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Values.config.envConfigMapName }}
labels:
{{- include "danswer-stack.labels" . | nindent 4 }}
data:
INTERNAL_URL: "http://{{ include "danswer-stack.fullname" . }}-api-service:{{ .Values.api.service.port | default 8080 }}"
POSTGRES_HOST: {{ .Release.Name }}-postgresql
VESPA_HOST: "document-index-service"
REDIS_HOST: {{ .Release.Name }}-redis-master
MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-inference-model-service"
INDEXING_MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-indexing-model-service"
{{- range $key, $value := .Values.configMap }}
{{ $key }}: "{{ $value }}"
{{- end }}

View File

@@ -1,473 +0,0 @@
# Default values for danswer-stack.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
inferenceCapability:
service:
name: inference-model-server-service
type: ClusterIP
port: 9000
pvc:
name: inference-model-pvc
accessModes:
- ReadWriteOnce
storage: 3Gi
deployment:
name: inference-model-server-deployment
replicas: 1
labels:
- key: app
value: inference-model-server
image:
repository: danswer/danswer-model-server
tag: latest
pullPolicy: IfNotPresent
command: ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
port: 9000
volumeMounts:
- name: inference-model-storage
mountPath: /root/.cache
volumes:
- name: inference-model-storage
persistentVolumeClaim:
claimName: inference-model-pvc
podLabels:
- key: app
value: inference-model-server
indexCapability:
service:
type: ClusterIP
port: 9000
name: indexing-model-server-port
deploymentLabels:
app: indexing-model-server
podLabels:
app: indexing-model-server
indexingOnly: "True"
podAnnotations: {}
volumeMounts:
- name: indexing-model-storage
mountPath: /root/.cache
volumes:
- name: indexing-model-storage
persistentVolumeClaim:
claimName: indexing-model-storage
indexingModelPVC:
name: indexing-model-storage
accessMode: "ReadWriteOnce"
storage: "3Gi"
config:
envConfigMapName: env-configmap
serviceAccount:
# Specifies whether a service account should be created
create: false
# Automatically mount a ServiceAccount's API credentials?
automount: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
postgresql:
primary:
persistence:
size: 5Gi
enabled: true
auth:
existingSecret: danswer-secrets
secretKeys:
adminPasswordKey: postgres_password # overwriting as postgres typically expects 'postgres-password'
nginx:
containerPorts:
http: 1024
extraEnvVars:
- name: DOMAIN
value: localhost
service:
ports:
http: 80
danswer: 3000
targetPort:
http: http
danswer: http
existingServerBlockConfigmap: danswer-nginx-conf
webserver:
replicaCount: 1
image:
repository: danswer/danswer-web-server
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: ""
deploymentLabels:
app: web-server
podAnnotations: {}
podLabels:
app: web-server
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
service:
type: ClusterIP
port: 3000
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector: {}
tolerations: []
affinity: {}
api:
replicaCount: 1
image:
repository: danswer/danswer-backend
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: ""
deploymentLabels:
app: api-server
podAnnotations: {}
podLabels:
scope: danswer-backend
app: api-server
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
service:
type: ClusterIP
port: 8080
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# requests:
# cpu: 1000m # Requests 1 CPU core
# memory: 1Gi # Requests 1 GiB of memory
# limits:
# cpu: 2000m # Limits to 2 CPU cores
# memory: 2Gi # Limits to 2 GiB of memory
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector: {}
tolerations: []
background:
replicaCount: 1
image:
repository: danswer/danswer-backend
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: latest
podAnnotations: {}
podLabels:
scope: danswer-backend
app: background
deploymentLabels:
app: background
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
enableMiniChunk: "true"
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# requests:
# cpu: 1000m # Requests 1 CPU core
# memory: 1Gi # Requests 1 GiB of memory
# limits:
# cpu: 2000m # Limits to 2 CPU cores
# memory: 2Gi # Limits to 2 GiB of memory
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector: {}
tolerations: []
vespa:
enabled: true
replicaCount: 1
image:
repository: vespa
pullPolicy: IfNotPresent
tag: "8.277.17"
podAnnotations: {}
podLabels:
app: vespa
app.kubernetes.io/instance: danswer
app.kubernetes.io/name: vespa
podSecurityContext: {}
# fsGroup: 2000
securityContext:
privileged: true
runAsUser: 0
resources:
# The Vespa Helm chart specifies default resources, which are quite modest. We override
# them here to increase chances of the chart running successfully.
requests:
cpu: 1500m
memory: 4000Mi
limits:
cpu: 1500m
memory: 4000Mi
nodeSelector: {}
tolerations: []
affinity: {}
redis:
enabled: true
architecture: standalone
commonConfiguration: |-
# Enable AOF https://redis.io/topics/persistence#append-only-file
appendonly no
# Disable RDB persistence, AOF persistence already enabled.
save ""
master:
replicaCount: 1
image:
registry: docker.io
repository: bitnami/redis
tag: "7.4.0"
pullPolicy: IfNotPresent
persistence:
enabled: false
service:
type: ClusterIP
port: 6379
auth:
existingSecret: danswer-secrets
existingSecretPasswordKey: redis_password
# ingress:
# enabled: false
# className: ""
# annotations: {}
# # kubernetes.io/ingress.class: nginx
# # kubernetes.io/tls-acme: "true"
# hosts:
# - host: chart-example.local
# paths:
# - path: /
# pathType: ImplementationSpecific
# tls: []
# # - secretName: chart-example-tls
# # hosts:
# # - chart-example.local
persistence:
vespa:
enabled: true
existingClaim: ""
storageClassName: ""
accessModes:
- ReadWriteOnce
size: 5Gi
auth:
# for storing smtp, oauth, slack, and other secrets
# keys are lowercased version of env vars (e.g. SMTP_USER -> smtp_user)
existingSecret: "" # danswer-secrets
# optionally override the secret keys to reference in the secret
# this is used to populate the env vars in individual deployments
# the values here reference the keys in secrets below
secretKeys:
postgres_password: "postgres_password"
smtp_pass: ""
oauth_client_id: ""
oauth_client_secret: ""
oauth_cookie_secret: ""
danswer_bot_slack_app_token: ""
danswer_bot_slack_bot_token: ""
redis_password: "redis_password"
# will be overridden by the existingSecret if set
secretName: "danswer-secrets"
# set values as strings, they will be base64 encoded
# this is used to populate the secrets yaml
secrets:
postgres_password: "postgres"
smtp_pass: ""
oauth_client_id: ""
oauth_client_secret: ""
oauth_cookie_secret: ""
danswer_bot_slack_app_token: ""
danswer_bot_slack_bot_token: ""
redis_password: "password"
configMap:
AUTH_TYPE: "disabled" # Change this for production uses unless Danswer is only accessible behind VPN
SESSION_EXPIRE_TIME_SECONDS: "86400" # 1 Day Default
VALID_EMAIL_DOMAINS: "" # Can be something like danswer.ai, as an extra double-check
SMTP_SERVER: "" # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com'
SMTP_PORT: "" # For sending verification emails, if unspecified then defaults to '587'
SMTP_USER: "" # 'your-email@company.com'
# SMTP_PASS: "" # 'your-gmail-password'
EMAIL_FROM: "" # 'your-email@company.com' SMTP_USER missing used instead
# Gen AI Settings
GEN_AI_MAX_TOKENS: ""
QA_TIMEOUT: "60"
MAX_CHUNKS_FED_TO_CHAT: ""
DISABLE_LLM_DOC_RELEVANCE: ""
DISABLE_LLM_CHOOSE_SEARCH: ""
DISABLE_LLM_QUERY_REPHRASE: ""
# Query Options
DOC_TIME_DECAY: ""
HYBRID_ALPHA: ""
EDIT_KEYWORD_QUERY: ""
MULTILINGUAL_QUERY_EXPANSION: ""
LANGUAGE_HINT: ""
LANGUAGE_CHAT_NAMING_HINT: ""
QA_PROMPT_OVERRIDE: ""
# Internet Search Tool
BING_API_KEY: ""
# Don't change the NLP models unless you know what you're doing
EMBEDDING_BATCH_SIZE: ""
DOCUMENT_ENCODER_MODEL: ""
NORMALIZE_EMBEDDINGS: ""
ASYM_QUERY_PREFIX: ""
ASYM_PASSAGE_PREFIX: ""
DISABLE_RERANK_FOR_STREAMING: ""
MODEL_SERVER_PORT: ""
MIN_THREADS_ML_MODELS: ""
# Indexing Configs
VESPA_SEARCHER_THREADS: ""
NUM_INDEXING_WORKERS: ""
DISABLE_INDEX_UPDATE_ON_SWAP: ""
DASK_JOB_CLIENT_ENABLED: ""
CONTINUE_ON_CONNECTOR_FAILURE: ""
EXPERIMENTAL_CHECKPOINTING_ENABLED: ""
CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: ""
JIRA_API_VERSION: ""
GONG_CONNECTOR_START_TIME: ""
NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: ""
# DanswerBot SlackBot Configs
# DANSWER_BOT_SLACK_APP_TOKEN: ""
# DANSWER_BOT_SLACK_BOT_TOKEN: ""
DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER: ""
DANSWER_BOT_DISPLAY_ERROR_MSGS: ""
DANSWER_BOT_RESPOND_EVERY_CHANNEL: ""
DANSWER_BOT_DISABLE_COT: "" # Currently unused
NOTIFY_SLACKBOT_NO_ANSWER: ""
# Logging
# Optional Telemetry, please keep it on (nothing sensitive is collected)? <3
# https://docs.danswer.dev/more/telemetry
DISABLE_TELEMETRY: ""
LOG_LEVEL: ""
LOG_ALL_MODEL_INTERACTIONS: ""
LOG_DANSWER_MODEL_INTERACTIONS: ""
LOG_VESPA_TIMING_INFORMATION: ""
# Shared or Non-backend Related
WEB_DOMAIN: "http://localhost:3000" # for web server and api server
DOMAIN: "localhost" # for nginx
# Chat Configs
HARD_DELETE_CHATS: ""