obfuscate api keys

2025-09-18 19:43:26 +02:00 · 2024-09-13 16:39:54 -07:00
parent 648c2531f9
commit 001bbb89cc
8 changed files with 34 additions and 556 deletions
--- a/backend/danswer/db/search_settings.py
+++ b/backend/danswer/db/search_settings.py
@@ -181,12 +181,15 @@ def update_current_search_settings(
        logger.warning("No current search settings found to update")
        return

+    print("current settings", current_settings.__dict__)
+    print("search settings", search_settings.__dict__)
    # Whenever we update the current search settings, we should ensure that the local reranking model is warmed up.
    if (
-        current_settings.provider_type is None
+        search_settings.rerank_provider_type is None
        and search_settings.rerank_model_name is not None
        and current_settings.rerank_model_name != search_settings.rerank_model_name
    ):
+        print("WARMIGN THIS STUFF UP!")
        warm_up_cross_encoder(search_settings.rerank_model_name)

    update_search_settings(current_settings, search_settings, preserved_fields)
--- a/backend/danswer/search/models.py
+++ b/backend/danswer/search/models.py
@@ -16,7 +16,7 @@ from danswer.search.enums import LLMEvaluationType
 from danswer.search.enums import OptionalSearchSetting
 from danswer.search.enums import SearchType
 from shared_configs.enums import RerankerProvider
-
+from shared_configs.utils import obfuscate_api_key

 MAX_METRICS_CONTENT = (
    200  # Just need enough characters to identify where in the doc the chunk is
@@ -87,6 +87,22 @@ class SavedSearchSettings(InferenceSettings, IndexingSetting):
        )


+class SearchSettingsSnapshot(SavedSearchSettings):
+    @classmethod
+    def from_saved_settings(
+        cls, settings: SavedSearchSettings
+    ) -> "SearchSettingsSnapshot":
+        data = settings.dict(exclude={"rerank_api_key"})
+        data["rerank_api_key"] = obfuscate_api_key(settings.rerank_api_key)
+        data["api_key"] = obfuscate_api_key(settings.api_key)
+
+        return cls(**data)
+
+    @classmethod
+    def from_db_model(cls, settings: SearchSettings) -> "SearchSettingsSnapshot":
+        return cls.from_saved_settings(SavedSearchSettings.from_db_model(settings))
+
+
 class Tag(BaseModel):
    tag_key: str
    tag_value: str
--- a/backend/danswer/server/manage/llm/models.py
+++ b/backend/danswer/server/manage/llm/models.py
@@ -4,7 +4,7 @@ from pydantic import BaseModel
 from pydantic import Field

 from danswer.llm.llm_provider_options import fetch_models_for_provider
-
+from shared_configs.utils import obfuscate_api_key

 if TYPE_CHECKING:
    from danswer.db.models import LLMProvider as LLMProviderModel
@@ -82,10 +82,10 @@ class FullLLMProvider(LLMProvider):
    @classmethod
    def from_model(cls, llm_provider_model: "LLMProviderModel") -> "FullLLMProvider":
        return cls(
+            api_key=obfuscate_api_key(llm_provider_model.api_key),
            id=llm_provider_model.id,
            name=llm_provider_model.name,
            provider=llm_provider_model.provider,
-            api_key=llm_provider_model.api_key,
            api_base=llm_provider_model.api_base,
            api_version=llm_provider_model.api_version,
            custom_config=llm_provider_model.custom_config,
--- a/backend/danswer/server/manage/search_settings.py
+++ b/backend/danswer/server/manage/search_settings.py
@@ -24,6 +24,7 @@ from danswer.document_index.factory import get_default_document_index
 from danswer.natural_language_processing.search_nlp_models import clean_model_name
 from danswer.search.models import SavedSearchSettings
 from danswer.search.models import SearchSettingsCreationRequest
+from danswer.search.models import SearchSettingsSnapshot
 from danswer.server.manage.embedding.models import SearchSettingsDeleteRequest
 from danswer.server.manage.models import FullModelVersionResponse
 from danswer.server.models import IdReturn
@@ -154,21 +155,21 @@ def delete_search_settings_endpoint(
 def get_current_search_settings_endpoint(
    _: User | None = Depends(current_user),
    db_session: Session = Depends(get_session),
-) -> SavedSearchSettings:
+) -> SearchSettingsSnapshot:
    current_search_settings = get_current_search_settings(db_session)
-    return SavedSearchSettings.from_db_model(current_search_settings)
+    return SearchSettingsSnapshot.from_db_model(current_search_settings)


@router.get("/get-secondary-search-settings")
 def get_secondary_search_settings_endpoint(
    _: User | None = Depends(current_user),
    db_session: Session = Depends(get_session),
-) -> SavedSearchSettings | None:
+) -> SearchSettingsSnapshot | None:
    secondary_search_settings = get_secondary_search_settings(db_session)
    if not secondary_search_settings:
        return None

-    return SavedSearchSettings.from_db_model(secondary_search_settings)
+    return SearchSettingsSnapshot.from_db_model(secondary_search_settings)


@router.get("/get-all-search-settings")
--- a/backend/shared_configs/utils.py
+++ b/backend/shared_configs/utils.py
@@ -9,3 +9,9 @@ def batch_list(
    batch_size: int,
 ) -> list[list[T]]:
    return [lst[i : i + batch_size] for i in range(0, len(lst), batch_size)]
+
+
+def obfuscate_api_key(api_key: str | None) -> str | None:
+    if api_key is None:
+        return None
+    return "*" * len(api_key)
--- a/deployment/helm/charts/danswer/templates/api-deployment.yaml
+++ b/deployment/helm/charts/danswer/templates/api-deployment.yaml
@@ -1,59 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "danswer-stack.fullname" . }}-api-deployment
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-spec:
-  {{- if not .Values.api.autoscaling.enabled }}
-  replicas: {{ .Values.api.replicaCount }}
-  {{- end }}
-  selector:
-    matchLabels:
-      {{- include "danswer-stack.selectorLabels" . | nindent 6 }}
-      {{- if .Values.api.deploymentLabels }}
-      {{- toYaml .Values.api.deploymentLabels | nindent 6 }}
-      {{- end }}
-  template:
-    metadata:
-      {{- with .Values.api.podAnnotations }}
-      annotations:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      labels:
-        {{- include "danswer-stack.labels" . | nindent 8 }}
-        {{- with .Values.api.podLabels }}
-        {{- toYaml . | nindent 8 }}
-        {{- end }}
-    spec:
-      {{- with .Values.imagePullSecrets }}
-      imagePullSecrets:
-        {{- toYaml . | nindent 8 }}
-      {{- end }}
-      serviceAccountName: {{ include "danswer-stack.serviceAccountName" . }}
-      securityContext:
-        {{- toYaml .Values.api.podSecurityContext | nindent 8 }}
-      containers:
-        - name: api-server
-          securityContext:
-            {{- toYaml .Values.api.securityContext | nindent 12 }}
-          image: "{{ .Values.api.image.repository }}:{{ .Values.api.image.tag | default .Chart.AppVersion }}"
-          imagePullPolicy: {{ .Values.api.image.pullPolicy }}
-          command:
-            - "/bin/sh"
-            - "-c"
-            - |
-              alembic upgrade head &&
-              echo "Starting Danswer Api Server" &&
-              uvicorn danswer.main:app --host 0.0.0.0 --port 8080
-          ports:
-            - name: api-server-port
-              containerPort: {{ .Values.api.service.port }}
-              protocol: TCP
-          resources:
-            {{- toYaml .Values.api.resources | nindent 12 }}
-          envFrom:
-            - configMapRef:
-                name: {{ .Values.config.envConfigMapName }}
-          env:
-            {{- include "danswer-stack.envSecrets" . | nindent 12}}
--- a/deployment/helm/charts/danswer/templates/configmap.yaml
+++ b/deployment/helm/charts/danswer/templates/configmap.yaml
@@ -1,16 +0,0 @@
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  name: {{ .Values.config.envConfigMapName }}
-  labels:
-    {{- include "danswer-stack.labels" . | nindent 4 }}
-data:
-  INTERNAL_URL: "http://{{ include "danswer-stack.fullname" . }}-api-service:{{ .Values.api.service.port | default 8080 }}"
-  POSTGRES_HOST: {{ .Release.Name }}-postgresql
-  VESPA_HOST: "document-index-service"
-  REDIS_HOST: {{ .Release.Name }}-redis-master
-  MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-inference-model-service"
-  INDEXING_MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-indexing-model-service"
-{{- range $key, $value := .Values.configMap }}
-  {{ $key }}: "{{ $value }}"
-{{- end }}
--- a/deployment/helm/charts/danswer/values.yaml
+++ b/deployment/helm/charts/danswer/values.yaml
@@ -1,473 +0,0 @@
-# Default values for danswer-stack.
-# This is a YAML-formatted file.
-# Declare variables to be passed into your templates.
-
-imagePullSecrets: []
-nameOverride: ""
-fullnameOverride: ""
-
-inferenceCapability:
-  service:
-    name: inference-model-server-service
-    type: ClusterIP
-    port: 9000
-  pvc:
-    name: inference-model-pvc
-    accessModes:
-      - ReadWriteOnce
-    storage: 3Gi
-  deployment:
-    name: inference-model-server-deployment
-    replicas: 1
-    labels:
-      - key: app
-        value: inference-model-server
-    image:
-      repository: danswer/danswer-model-server
-      tag: latest
-      pullPolicy: IfNotPresent
-    command: ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
-    port: 9000
-    volumeMounts:
-      - name: inference-model-storage
-        mountPath: /root/.cache
-    volumes:
-      - name: inference-model-storage
-        persistentVolumeClaim:
-          claimName: inference-model-pvc
-  podLabels:
-    - key: app
-      value: inference-model-server
-
-indexCapability:
-  service:
-    type: ClusterIP
-    port: 9000
-    name: indexing-model-server-port
-  deploymentLabels:
-    app: indexing-model-server
-  podLabels:
-    app: indexing-model-server
-  indexingOnly: "True"
-  podAnnotations: {}
-  volumeMounts:
-    - name: indexing-model-storage
-      mountPath: /root/.cache
-  volumes:
-    - name: indexing-model-storage
-      persistentVolumeClaim:
-        claimName: indexing-model-storage
-  indexingModelPVC:
-    name: indexing-model-storage
-    accessMode: "ReadWriteOnce"
-    storage: "3Gi"
-
-config:
-  envConfigMapName: env-configmap
-
-serviceAccount:
-  # Specifies whether a service account should be created
-  create: false
-  # Automatically mount a ServiceAccount's API credentials?
-  automount: true
-  # Annotations to add to the service account
-  annotations: {}
-  # The name of the service account to use.
-  # If not set and create is true, a name is generated using the fullname template
-  name: ""
-
-postgresql:
-  primary:
-    persistence:
-      size: 5Gi
-  enabled: true
-  auth:
-    existingSecret: danswer-secrets
-    secretKeys:
-      adminPasswordKey: postgres_password  # overwriting as postgres typically expects 'postgres-password'
-
-nginx:
-  containerPorts:
-    http: 1024
-  extraEnvVars:
-    - name: DOMAIN
-      value: localhost
-  service:
-    ports:
-      http: 80
-      danswer: 3000
-    targetPort:
-      http: http
-      danswer: http
-
-  existingServerBlockConfigmap: danswer-nginx-conf
-
-webserver:
-  replicaCount: 1
-  image:
-    repository: danswer/danswer-web-server
-    pullPolicy: IfNotPresent
-    # Overrides the image tag whose default is the chart appVersion.
-    tag: ""
-  deploymentLabels:
-    app: web-server
-  podAnnotations: {}
-  podLabels:
-    app: web-server
-  podSecurityContext: {}
-    # fsGroup: 2000
-
-  securityContext: {}
-    # capabilities:
-    #   drop:
-    #   - ALL
-    # readOnlyRootFilesystem: true
-    # runAsNonRoot: true
-    # runAsUser: 1000
-
-  service:
-    type: ClusterIP
-    port: 3000
-
-  resources: {}
-  # We usually recommend not to specify default resources and to leave this as a conscious
-  # choice for the user. This also increases chances charts run on environments with little
-  # resources, such as Minikube. If you do want to specify resources, uncomment the following
-  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-  # limits:
-  #   cpu: 100m
-  #   memory: 128Mi
-  # requests:
-  #   cpu: 100m
-  #   memory: 128Mi
-
-  autoscaling:
-    enabled: false
-    minReplicas: 1
-    maxReplicas: 100
-    targetCPUUtilizationPercentage: 80
-    # targetMemoryUtilizationPercentage: 80
-
-  # Additional volumes on the output Deployment definition.
-  volumes: []
-  # - name: foo
-  #   secret:
-  #     secretName: mysecret
-  #     optional: false
-
-  # Additional volumeMounts on the output Deployment definition.
-  volumeMounts: []
-  # - name: foo
-  #   mountPath: "/etc/foo"
-  #   readOnly: true
-
-  nodeSelector: {}
-  tolerations: []
-  affinity: {}
-
-api:
-  replicaCount: 1
-  image:
-    repository: danswer/danswer-backend
-    pullPolicy: IfNotPresent
-    # Overrides the image tag whose default is the chart appVersion.
-    tag: ""
-  deploymentLabels:
-    app: api-server
-  podAnnotations: {}
-  podLabels:
-    scope: danswer-backend
-    app: api-server
-
-  podSecurityContext: {}
-    # fsGroup: 2000
-
-  securityContext: {}
-    # capabilities:
-    #   drop:
-    #   - ALL
-    # readOnlyRootFilesystem: true
-    # runAsNonRoot: true
-    # runAsUser: 1000
-
-  service:
-    type: ClusterIP
-    port: 8080
-
-  resources: {}
-  # We usually recommend not to specify default resources and to leave this as a conscious
-  # choice for the user. This also increases chances charts run on environments with little
-  # resources, such as Minikube. If you do want to specify resources, uncomment the following
-  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-  #  requests:
-  #    cpu: 1000m  # Requests 1 CPU core
-  #    memory: 1Gi  # Requests 1 GiB of memory
-  #  limits:
-  #    cpu: 2000m  # Limits to 2 CPU cores
-  #    memory: 2Gi  # Limits to 2 GiB of memory
-
-  autoscaling:
-    enabled: false
-    minReplicas: 1
-    maxReplicas: 100
-    targetCPUUtilizationPercentage: 80
-    # targetMemoryUtilizationPercentage: 80
-
-  # Additional volumes on the output Deployment definition.
-  volumes: []
-  # - name: foo
-  #   secret:
-  #     secretName: mysecret
-  #     optional: false
-
-  # Additional volumeMounts on the output Deployment definition.
-  volumeMounts: []
-  # - name: foo
-  #   mountPath: "/etc/foo"
-  #   readOnly: true
-
-  nodeSelector: {}
-  tolerations: []
-
-
-background:
-  replicaCount: 1
-  image:
-    repository: danswer/danswer-backend
-    pullPolicy: IfNotPresent
-    # Overrides the image tag whose default is the chart appVersion.
-    tag: latest
-  podAnnotations: {}
-  podLabels:
-    scope: danswer-backend
-    app: background
-  deploymentLabels:
-    app: background
-  podSecurityContext: {}
-    # fsGroup: 2000
-
-  securityContext: {}
-    # capabilities:
-    #   drop:
-    #   - ALL
-    # readOnlyRootFilesystem: true
-    # runAsNonRoot: true
-    # runAsUser: 1000
-  enableMiniChunk: "true"
-  resources: {}
-  # We usually recommend not to specify default resources and to leave this as a conscious
-  # choice for the user. This also increases chances charts run on environments with little
-  # resources, such as Minikube. If you do want to specify resources, uncomment the following
-  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
-  #  requests:
-  #    cpu: 1000m  # Requests 1 CPU core
-  #    memory: 1Gi  # Requests 1 GiB of memory
-  #  limits:
-  #    cpu: 2000m  # Limits to 2 CPU cores
-  #    memory: 2Gi  # Limits to 2 GiB of memory
-
-  autoscaling:
-    enabled: false
-    minReplicas: 1
-    maxReplicas: 100
-    targetCPUUtilizationPercentage: 80
-    # targetMemoryUtilizationPercentage: 80
-
-  # Additional volumes on the output Deployment definition.
-  volumes: []
-  # - name: foo
-  #   secret:
-  #     secretName: mysecret
-  #     optional: false
-
-  # Additional volumeMounts on the output Deployment definition.
-  volumeMounts: []
-  # - name: foo
-  #   mountPath: "/etc/foo"
-  #   readOnly: true
-
-  nodeSelector: {}
-  tolerations: []
-
-vespa:
-  enabled: true
-  replicaCount: 1
-  image:
-    repository: vespa
-    pullPolicy: IfNotPresent
-    tag: "8.277.17"
-  podAnnotations: {}
-  podLabels:
-    app: vespa
-    app.kubernetes.io/instance: danswer
-    app.kubernetes.io/name: vespa
-
-  podSecurityContext: {}
-    # fsGroup: 2000
-
-  securityContext:
-    privileged: true
-    runAsUser: 0
-
-  resources:
-  # The Vespa Helm chart specifies default resources, which are quite modest. We override
-  # them here to increase chances of the chart running successfully.
-    requests:
-      cpu: 1500m
-      memory: 4000Mi
-    limits:
-      cpu: 1500m
-      memory: 4000Mi
-
-  nodeSelector: {}
-  tolerations: []
-  affinity: {}
-
-
-redis:
-  enabled: true
-  architecture: standalone
-  commonConfiguration: |-
-    # Enable AOF https://redis.io/topics/persistence#append-only-file
-    appendonly no
-    # Disable RDB persistence, AOF persistence already enabled.
-    save ""
-  master:
-    replicaCount: 1
-    image:
-      registry: docker.io
-      repository: bitnami/redis
-      tag: "7.4.0"
-      pullPolicy: IfNotPresent
-    persistence:
-      enabled: false
-  service:
-    type: ClusterIP
-    port: 6379
-  auth:
-    existingSecret: danswer-secrets
-    existingSecretPasswordKey: redis_password
-
-
-# ingress:
-#  enabled: false
-#  className: ""
-#  annotations: {}
-#    # kubernetes.io/ingress.class: nginx
-#    # kubernetes.io/tls-acme: "true"
-#  hosts:
-#    - host: chart-example.local
-#      paths:
-#        - path: /
-#          pathType: ImplementationSpecific
-#  tls: []
-#  #  - secretName: chart-example-tls
-#  #    hosts:
-#  #      - chart-example.local
-
-persistence:
-  vespa:
-    enabled: true
-    existingClaim: ""
-    storageClassName: ""
-    accessModes:
-      - ReadWriteOnce
-    size: 5Gi
-
-auth:
-  # for storing smtp, oauth, slack, and other secrets
-  # keys are lowercased version of env vars (e.g. SMTP_USER -> smtp_user)
-  existingSecret: ""  # danswer-secrets
-  # optionally override the secret keys to reference in the secret
-  # this is used to populate the env vars in individual deployments
-  # the values here reference the keys in secrets below
-  secretKeys:
-    postgres_password: "postgres_password"
-    smtp_pass: ""
-    oauth_client_id: ""
-    oauth_client_secret: ""
-    oauth_cookie_secret: ""
-    danswer_bot_slack_app_token: ""
-    danswer_bot_slack_bot_token: ""
-    redis_password: "redis_password"
-  # will be overridden by the existingSecret if set
-  secretName: "danswer-secrets"
-  # set values as strings, they will be base64 encoded
-  # this is used to populate the secrets yaml
-  secrets:
-    postgres_password: "postgres"
-    smtp_pass: ""
-    oauth_client_id: ""
-    oauth_client_secret: ""
-    oauth_cookie_secret: ""
-    danswer_bot_slack_app_token: ""
-    danswer_bot_slack_bot_token: ""
-    redis_password: "password"
-
-configMap:
-  AUTH_TYPE: "disabled"  # Change this for production uses unless Danswer is only accessible behind VPN
-  SESSION_EXPIRE_TIME_SECONDS: "86400"  # 1 Day Default
-  VALID_EMAIL_DOMAINS: ""  # Can be something like danswer.ai, as an extra double-check
-  SMTP_SERVER: ""  # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com'
-  SMTP_PORT: ""  # For sending verification emails, if unspecified then defaults to '587'
-  SMTP_USER: ""  # 'your-email@company.com'
-  # SMTP_PASS: ""  # 'your-gmail-password'
-  EMAIL_FROM: ""  # 'your-email@company.com' SMTP_USER missing used instead
-  # Gen AI Settings
-  GEN_AI_MAX_TOKENS: ""
-  QA_TIMEOUT: "60"
-  MAX_CHUNKS_FED_TO_CHAT: ""
-  DISABLE_LLM_DOC_RELEVANCE: ""
-  DISABLE_LLM_CHOOSE_SEARCH: ""
-  DISABLE_LLM_QUERY_REPHRASE: ""
-  # Query Options
-  DOC_TIME_DECAY: ""
-  HYBRID_ALPHA: ""
-  EDIT_KEYWORD_QUERY: ""
-  MULTILINGUAL_QUERY_EXPANSION: ""
-  LANGUAGE_HINT: ""
-  LANGUAGE_CHAT_NAMING_HINT: ""
-  QA_PROMPT_OVERRIDE: ""
-  # Internet Search Tool
-  BING_API_KEY: ""
-  # Don't change the NLP models unless you know what you're doing
-  EMBEDDING_BATCH_SIZE: ""
-  DOCUMENT_ENCODER_MODEL: ""
-  NORMALIZE_EMBEDDINGS: ""
-  ASYM_QUERY_PREFIX: ""
-  ASYM_PASSAGE_PREFIX: ""
-  DISABLE_RERANK_FOR_STREAMING: ""
-  MODEL_SERVER_PORT: ""
-  MIN_THREADS_ML_MODELS: ""
-  # Indexing Configs
-  VESPA_SEARCHER_THREADS: ""
-  NUM_INDEXING_WORKERS: ""
-  DISABLE_INDEX_UPDATE_ON_SWAP: ""
-  DASK_JOB_CLIENT_ENABLED: ""
-  CONTINUE_ON_CONNECTOR_FAILURE: ""
-  EXPERIMENTAL_CHECKPOINTING_ENABLED: ""
-  CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: ""
-  JIRA_API_VERSION: ""
-  GONG_CONNECTOR_START_TIME: ""
-  NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: ""
-  # DanswerBot SlackBot Configs
-  # DANSWER_BOT_SLACK_APP_TOKEN: ""
-  # DANSWER_BOT_SLACK_BOT_TOKEN: ""
-  DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER: ""
-  DANSWER_BOT_DISPLAY_ERROR_MSGS: ""
-  DANSWER_BOT_RESPOND_EVERY_CHANNEL: ""
-  DANSWER_BOT_DISABLE_COT: ""  # Currently unused
-  NOTIFY_SLACKBOT_NO_ANSWER: ""
-  # Logging
-  # Optional Telemetry, please keep it on (nothing sensitive is collected)? <3
-  # https://docs.danswer.dev/more/telemetry
-  DISABLE_TELEMETRY: ""
-  LOG_LEVEL: ""
-  LOG_ALL_MODEL_INTERACTIONS: ""
-  LOG_DANSWER_MODEL_INTERACTIONS: ""
-  LOG_VESPA_TIMING_INFORMATION: ""
-  # Shared or Non-backend Related
-  WEB_DOMAIN: "http://localhost:3000"  # for web server and api server
-  DOMAIN: "localhost"  # for nginx
-  # Chat Configs
-  HARD_DELETE_CHATS: ""