obfuscate api keys

This commit is contained in:
pablodanswer
2024-09-13 16:39:54 -07:00
parent 648c2531f9
commit 001bbb89cc
8 changed files with 34 additions and 556 deletions

View File

@@ -181,12 +181,15 @@ def update_current_search_settings(
logger.warning("No current search settings found to update")
return
print("current settings", current_settings.__dict__)
print("search settings", search_settings.__dict__)
# Whenever we update the current search settings, we should ensure that the local reranking model is warmed up.
if (
current_settings.provider_type is None
search_settings.rerank_provider_type is None
and search_settings.rerank_model_name is not None
and current_settings.rerank_model_name != search_settings.rerank_model_name
):
print("WARMIGN THIS STUFF UP!")
warm_up_cross_encoder(search_settings.rerank_model_name)
update_search_settings(current_settings, search_settings, preserved_fields)

View File

@@ -16,7 +16,7 @@ from danswer.search.enums import LLMEvaluationType
from danswer.search.enums import OptionalSearchSetting
from danswer.search.enums import SearchType
from shared_configs.enums import RerankerProvider
from shared_configs.utils import obfuscate_api_key
MAX_METRICS_CONTENT = (
200 # Just need enough characters to identify where in the doc the chunk is
@@ -87,6 +87,22 @@ class SavedSearchSettings(InferenceSettings, IndexingSetting):
)
class SearchSettingsSnapshot(SavedSearchSettings):
@classmethod
def from_saved_settings(
cls, settings: SavedSearchSettings
) -> "SearchSettingsSnapshot":
data = settings.dict(exclude={"rerank_api_key"})
data["rerank_api_key"] = obfuscate_api_key(settings.rerank_api_key)
data["api_key"] = obfuscate_api_key(settings.api_key)
return cls(**data)
@classmethod
def from_db_model(cls, settings: SearchSettings) -> "SearchSettingsSnapshot":
return cls.from_saved_settings(SavedSearchSettings.from_db_model(settings))
class Tag(BaseModel):
tag_key: str
tag_value: str

View File

@@ -4,7 +4,7 @@ from pydantic import BaseModel
from pydantic import Field
from danswer.llm.llm_provider_options import fetch_models_for_provider
from shared_configs.utils import obfuscate_api_key
if TYPE_CHECKING:
from danswer.db.models import LLMProvider as LLMProviderModel
@@ -82,10 +82,10 @@ class FullLLMProvider(LLMProvider):
@classmethod
def from_model(cls, llm_provider_model: "LLMProviderModel") -> "FullLLMProvider":
return cls(
api_key=obfuscate_api_key(llm_provider_model.api_key),
id=llm_provider_model.id,
name=llm_provider_model.name,
provider=llm_provider_model.provider,
api_key=llm_provider_model.api_key,
api_base=llm_provider_model.api_base,
api_version=llm_provider_model.api_version,
custom_config=llm_provider_model.custom_config,

View File

@@ -24,6 +24,7 @@ from danswer.document_index.factory import get_default_document_index
from danswer.natural_language_processing.search_nlp_models import clean_model_name
from danswer.search.models import SavedSearchSettings
from danswer.search.models import SearchSettingsCreationRequest
from danswer.search.models import SearchSettingsSnapshot
from danswer.server.manage.embedding.models import SearchSettingsDeleteRequest
from danswer.server.manage.models import FullModelVersionResponse
from danswer.server.models import IdReturn
@@ -154,21 +155,21 @@ def delete_search_settings_endpoint(
def get_current_search_settings_endpoint(
_: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> SavedSearchSettings:
) -> SearchSettingsSnapshot:
current_search_settings = get_current_search_settings(db_session)
return SavedSearchSettings.from_db_model(current_search_settings)
return SearchSettingsSnapshot.from_db_model(current_search_settings)
@router.get("/get-secondary-search-settings")
def get_secondary_search_settings_endpoint(
_: User | None = Depends(current_user),
db_session: Session = Depends(get_session),
) -> SavedSearchSettings | None:
) -> SearchSettingsSnapshot | None:
secondary_search_settings = get_secondary_search_settings(db_session)
if not secondary_search_settings:
return None
return SavedSearchSettings.from_db_model(secondary_search_settings)
return SearchSettingsSnapshot.from_db_model(secondary_search_settings)
@router.get("/get-all-search-settings")

View File

@@ -9,3 +9,9 @@ def batch_list(
batch_size: int,
) -> list[list[T]]:
return [lst[i : i + batch_size] for i in range(0, len(lst), batch_size)]
def obfuscate_api_key(api_key: str | None) -> str | None:
if api_key is None:
return None
return "*" * len(api_key)

View File

@@ -1,59 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "danswer-stack.fullname" . }}-api-deployment
labels:
{{- include "danswer-stack.labels" . | nindent 4 }}
spec:
{{- if not .Values.api.autoscaling.enabled }}
replicas: {{ .Values.api.replicaCount }}
{{- end }}
selector:
matchLabels:
{{- include "danswer-stack.selectorLabels" . | nindent 6 }}
{{- if .Values.api.deploymentLabels }}
{{- toYaml .Values.api.deploymentLabels | nindent 6 }}
{{- end }}
template:
metadata:
{{- with .Values.api.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "danswer-stack.labels" . | nindent 8 }}
{{- with .Values.api.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "danswer-stack.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.api.podSecurityContext | nindent 8 }}
containers:
- name: api-server
securityContext:
{{- toYaml .Values.api.securityContext | nindent 12 }}
image: "{{ .Values.api.image.repository }}:{{ .Values.api.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.api.image.pullPolicy }}
command:
- "/bin/sh"
- "-c"
- |
alembic upgrade head &&
echo "Starting Danswer Api Server" &&
uvicorn danswer.main:app --host 0.0.0.0 --port 8080
ports:
- name: api-server-port
containerPort: {{ .Values.api.service.port }}
protocol: TCP
resources:
{{- toYaml .Values.api.resources | nindent 12 }}
envFrom:
- configMapRef:
name: {{ .Values.config.envConfigMapName }}
env:
{{- include "danswer-stack.envSecrets" . | nindent 12}}

View File

@@ -1,16 +0,0 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ .Values.config.envConfigMapName }}
labels:
{{- include "danswer-stack.labels" . | nindent 4 }}
data:
INTERNAL_URL: "http://{{ include "danswer-stack.fullname" . }}-api-service:{{ .Values.api.service.port | default 8080 }}"
POSTGRES_HOST: {{ .Release.Name }}-postgresql
VESPA_HOST: "document-index-service"
REDIS_HOST: {{ .Release.Name }}-redis-master
MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-inference-model-service"
INDEXING_MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-indexing-model-service"
{{- range $key, $value := .Values.configMap }}
{{ $key }}: "{{ $value }}"
{{- end }}

View File

@@ -1,473 +0,0 @@
# Default values for danswer-stack.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
imagePullSecrets: []
nameOverride: ""
fullnameOverride: ""
inferenceCapability:
service:
name: inference-model-server-service
type: ClusterIP
port: 9000
pvc:
name: inference-model-pvc
accessModes:
- ReadWriteOnce
storage: 3Gi
deployment:
name: inference-model-server-deployment
replicas: 1
labels:
- key: app
value: inference-model-server
image:
repository: danswer/danswer-model-server
tag: latest
pullPolicy: IfNotPresent
command: ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
port: 9000
volumeMounts:
- name: inference-model-storage
mountPath: /root/.cache
volumes:
- name: inference-model-storage
persistentVolumeClaim:
claimName: inference-model-pvc
podLabels:
- key: app
value: inference-model-server
indexCapability:
service:
type: ClusterIP
port: 9000
name: indexing-model-server-port
deploymentLabels:
app: indexing-model-server
podLabels:
app: indexing-model-server
indexingOnly: "True"
podAnnotations: {}
volumeMounts:
- name: indexing-model-storage
mountPath: /root/.cache
volumes:
- name: indexing-model-storage
persistentVolumeClaim:
claimName: indexing-model-storage
indexingModelPVC:
name: indexing-model-storage
accessMode: "ReadWriteOnce"
storage: "3Gi"
config:
envConfigMapName: env-configmap
serviceAccount:
# Specifies whether a service account should be created
create: false
# Automatically mount a ServiceAccount's API credentials?
automount: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
postgresql:
primary:
persistence:
size: 5Gi
enabled: true
auth:
existingSecret: danswer-secrets
secretKeys:
adminPasswordKey: postgres_password # overwriting as postgres typically expects 'postgres-password'
nginx:
containerPorts:
http: 1024
extraEnvVars:
- name: DOMAIN
value: localhost
service:
ports:
http: 80
danswer: 3000
targetPort:
http: http
danswer: http
existingServerBlockConfigmap: danswer-nginx-conf
webserver:
replicaCount: 1
image:
repository: danswer/danswer-web-server
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: ""
deploymentLabels:
app: web-server
podAnnotations: {}
podLabels:
app: web-server
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
service:
type: ClusterIP
port: 3000
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector: {}
tolerations: []
affinity: {}
api:
replicaCount: 1
image:
repository: danswer/danswer-backend
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: ""
deploymentLabels:
app: api-server
podAnnotations: {}
podLabels:
scope: danswer-backend
app: api-server
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
service:
type: ClusterIP
port: 8080
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# requests:
# cpu: 1000m # Requests 1 CPU core
# memory: 1Gi # Requests 1 GiB of memory
# limits:
# cpu: 2000m # Limits to 2 CPU cores
# memory: 2Gi # Limits to 2 GiB of memory
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector: {}
tolerations: []
background:
replicaCount: 1
image:
repository: danswer/danswer-backend
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: latest
podAnnotations: {}
podLabels:
scope: danswer-backend
app: background
deploymentLabels:
app: background
podSecurityContext: {}
# fsGroup: 2000
securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
enableMiniChunk: "true"
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# requests:
# cpu: 1000m # Requests 1 CPU core
# memory: 1Gi # Requests 1 GiB of memory
# limits:
# cpu: 2000m # Limits to 2 CPU cores
# memory: 2Gi # Limits to 2 GiB of memory
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector: {}
tolerations: []
vespa:
enabled: true
replicaCount: 1
image:
repository: vespa
pullPolicy: IfNotPresent
tag: "8.277.17"
podAnnotations: {}
podLabels:
app: vespa
app.kubernetes.io/instance: danswer
app.kubernetes.io/name: vespa
podSecurityContext: {}
# fsGroup: 2000
securityContext:
privileged: true
runAsUser: 0
resources:
# The Vespa Helm chart specifies default resources, which are quite modest. We override
# them here to increase chances of the chart running successfully.
requests:
cpu: 1500m
memory: 4000Mi
limits:
cpu: 1500m
memory: 4000Mi
nodeSelector: {}
tolerations: []
affinity: {}
redis:
enabled: true
architecture: standalone
commonConfiguration: |-
# Enable AOF https://redis.io/topics/persistence#append-only-file
appendonly no
# Disable RDB persistence, AOF persistence already enabled.
save ""
master:
replicaCount: 1
image:
registry: docker.io
repository: bitnami/redis
tag: "7.4.0"
pullPolicy: IfNotPresent
persistence:
enabled: false
service:
type: ClusterIP
port: 6379
auth:
existingSecret: danswer-secrets
existingSecretPasswordKey: redis_password
# ingress:
# enabled: false
# className: ""
# annotations: {}
# # kubernetes.io/ingress.class: nginx
# # kubernetes.io/tls-acme: "true"
# hosts:
# - host: chart-example.local
# paths:
# - path: /
# pathType: ImplementationSpecific
# tls: []
# # - secretName: chart-example-tls
# # hosts:
# # - chart-example.local
persistence:
vespa:
enabled: true
existingClaim: ""
storageClassName: ""
accessModes:
- ReadWriteOnce
size: 5Gi
auth:
# for storing smtp, oauth, slack, and other secrets
# keys are lowercased version of env vars (e.g. SMTP_USER -> smtp_user)
existingSecret: "" # danswer-secrets
# optionally override the secret keys to reference in the secret
# this is used to populate the env vars in individual deployments
# the values here reference the keys in secrets below
secretKeys:
postgres_password: "postgres_password"
smtp_pass: ""
oauth_client_id: ""
oauth_client_secret: ""
oauth_cookie_secret: ""
danswer_bot_slack_app_token: ""
danswer_bot_slack_bot_token: ""
redis_password: "redis_password"
# will be overridden by the existingSecret if set
secretName: "danswer-secrets"
# set values as strings, they will be base64 encoded
# this is used to populate the secrets yaml
secrets:
postgres_password: "postgres"
smtp_pass: ""
oauth_client_id: ""
oauth_client_secret: ""
oauth_cookie_secret: ""
danswer_bot_slack_app_token: ""
danswer_bot_slack_bot_token: ""
redis_password: "password"
configMap:
AUTH_TYPE: "disabled" # Change this for production uses unless Danswer is only accessible behind VPN
SESSION_EXPIRE_TIME_SECONDS: "86400" # 1 Day Default
VALID_EMAIL_DOMAINS: "" # Can be something like danswer.ai, as an extra double-check
SMTP_SERVER: "" # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com'
SMTP_PORT: "" # For sending verification emails, if unspecified then defaults to '587'
SMTP_USER: "" # 'your-email@company.com'
# SMTP_PASS: "" # 'your-gmail-password'
EMAIL_FROM: "" # 'your-email@company.com' SMTP_USER missing used instead
# Gen AI Settings
GEN_AI_MAX_TOKENS: ""
QA_TIMEOUT: "60"
MAX_CHUNKS_FED_TO_CHAT: ""
DISABLE_LLM_DOC_RELEVANCE: ""
DISABLE_LLM_CHOOSE_SEARCH: ""
DISABLE_LLM_QUERY_REPHRASE: ""
# Query Options
DOC_TIME_DECAY: ""
HYBRID_ALPHA: ""
EDIT_KEYWORD_QUERY: ""
MULTILINGUAL_QUERY_EXPANSION: ""
LANGUAGE_HINT: ""
LANGUAGE_CHAT_NAMING_HINT: ""
QA_PROMPT_OVERRIDE: ""
# Internet Search Tool
BING_API_KEY: ""
# Don't change the NLP models unless you know what you're doing
EMBEDDING_BATCH_SIZE: ""
DOCUMENT_ENCODER_MODEL: ""
NORMALIZE_EMBEDDINGS: ""
ASYM_QUERY_PREFIX: ""
ASYM_PASSAGE_PREFIX: ""
DISABLE_RERANK_FOR_STREAMING: ""
MODEL_SERVER_PORT: ""
MIN_THREADS_ML_MODELS: ""
# Indexing Configs
VESPA_SEARCHER_THREADS: ""
NUM_INDEXING_WORKERS: ""
DISABLE_INDEX_UPDATE_ON_SWAP: ""
DASK_JOB_CLIENT_ENABLED: ""
CONTINUE_ON_CONNECTOR_FAILURE: ""
EXPERIMENTAL_CHECKPOINTING_ENABLED: ""
CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: ""
JIRA_API_VERSION: ""
GONG_CONNECTOR_START_TIME: ""
NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: ""
# DanswerBot SlackBot Configs
# DANSWER_BOT_SLACK_APP_TOKEN: ""
# DANSWER_BOT_SLACK_BOT_TOKEN: ""
DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER: ""
DANSWER_BOT_DISPLAY_ERROR_MSGS: ""
DANSWER_BOT_RESPOND_EVERY_CHANNEL: ""
DANSWER_BOT_DISABLE_COT: "" # Currently unused
NOTIFY_SLACKBOT_NO_ANSWER: ""
# Logging
# Optional Telemetry, please keep it on (nothing sensitive is collected)? <3
# https://docs.danswer.dev/more/telemetry
DISABLE_TELEMETRY: ""
LOG_LEVEL: ""
LOG_ALL_MODEL_INTERACTIONS: ""
LOG_DANSWER_MODEL_INTERACTIONS: ""
LOG_VESPA_TIMING_INFORMATION: ""
# Shared or Non-backend Related
WEB_DOMAIN: "http://localhost:3000" # for web server and api server
DOMAIN: "localhost" # for nginx
# Chat Configs
HARD_DELETE_CHATS: ""