obfuscate api keys

2025-09-19 12:03:54 +02:00 · 2024-09-13 16:39:54 -07:00
parent 648c2531f9
commit 001bbb89cc
8 changed files with 34 additions and 556 deletions
--- a/backend/danswer/db/search_settings.py
+++ b/backend/danswer/db/search_settings.py
@@ -181,12 +181,15 @@ def update_current_search_settings(
        logger.warning("No current search settings found to update")
        return
    print("current settings", current_settings.__dict__)
    print("search settings", search_settings.__dict__)
    # Whenever we update the current search settings, we should ensure that the local reranking model is warmed up.
    if (
-        current_settings.provider_type is None
+        search_settings.rerank_provider_type is None
        and search_settings.rerank_model_name is not None
        and current_settings.rerank_model_name != search_settings.rerank_model_name
    ):
        print("WARMIGN THIS STUFF UP!")
        warm_up_cross_encoder(search_settings.rerank_model_name)
    update_search_settings(current_settings, search_settings, preserved_fields)
--- a/backend/danswer/search/models.py
+++ b/backend/danswer/search/models.py
@@ -16,7 +16,7 @@ from danswer.search.enums import LLMEvaluationType
 from danswer.search.enums import OptionalSearchSetting
 from danswer.search.enums import SearchType
 from shared_configs.enums import RerankerProvider
-
+from shared_configs.utils import obfuscate_api_key
 MAX_METRICS_CONTENT = (
    200  # Just need enough characters to identify where in the doc the chunk is
@@ -87,6 +87,22 @@ class SavedSearchSettings(InferenceSettings, IndexingSetting):
        )
 class SearchSettingsSnapshot(SavedSearchSettings):
    @classmethod
    def from_saved_settings(
        cls, settings: SavedSearchSettings
    ) -> "SearchSettingsSnapshot":
        data = settings.dict(exclude={"rerank_api_key"})
        data["rerank_api_key"] = obfuscate_api_key(settings.rerank_api_key)
        data["api_key"] = obfuscate_api_key(settings.api_key)
        return cls(**data)
    @classmethod
    def from_db_model(cls, settings: SearchSettings) -> "SearchSettingsSnapshot":
        return cls.from_saved_settings(SavedSearchSettings.from_db_model(settings))
 class Tag(BaseModel):
    tag_key: str
    tag_value: str
--- a/backend/danswer/server/manage/llm/models.py
+++ b/backend/danswer/server/manage/llm/models.py
@@ -4,7 +4,7 @@ from pydantic import BaseModel
 from pydantic import Field
 from danswer.llm.llm_provider_options import fetch_models_for_provider
-
+from shared_configs.utils import obfuscate_api_key
 if TYPE_CHECKING:
    from danswer.db.models import LLMProvider as LLMProviderModel
@@ -82,10 +82,10 @@ class FullLLMProvider(LLMProvider):
    @classmethod
    def from_model(cls, llm_provider_model: "LLMProviderModel") -> "FullLLMProvider":
        return cls(
            api_key=obfuscate_api_key(llm_provider_model.api_key),
            id=llm_provider_model.id,
            name=llm_provider_model.name,
            provider=llm_provider_model.provider,
            api_key=llm_provider_model.api_key,
            api_base=llm_provider_model.api_base,
            api_version=llm_provider_model.api_version,
            custom_config=llm_provider_model.custom_config,
--- a/backend/danswer/server/manage/search_settings.py
+++ b/backend/danswer/server/manage/search_settings.py
@@ -24,6 +24,7 @@ from danswer.document_index.factory import get_default_document_index
 from danswer.natural_language_processing.search_nlp_models import clean_model_name
 from danswer.search.models import SavedSearchSettings
 from danswer.search.models import SearchSettingsCreationRequest
 from danswer.search.models import SearchSettingsSnapshot
 from danswer.server.manage.embedding.models import SearchSettingsDeleteRequest
 from danswer.server.manage.models import FullModelVersionResponse
 from danswer.server.models import IdReturn
@@ -154,21 +155,21 @@ def delete_search_settings_endpoint(
 def get_current_search_settings_endpoint(
    _: User | None = Depends(current_user),
    db_session: Session = Depends(get_session),
-) -> SavedSearchSettings:
+) -> SearchSettingsSnapshot:
    current_search_settings = get_current_search_settings(db_session)
-    return SavedSearchSettings.from_db_model(current_search_settings)
+    return SearchSettingsSnapshot.from_db_model(current_search_settings)
@router.get("/get-secondary-search-settings")
 def get_secondary_search_settings_endpoint(
    _: User | None = Depends(current_user),
    db_session: Session = Depends(get_session),
-) -> SavedSearchSettings | None:
+) -> SearchSettingsSnapshot | None:
    secondary_search_settings = get_secondary_search_settings(db_session)
    if not secondary_search_settings:
        return None
-    return SavedSearchSettings.from_db_model(secondary_search_settings)
+    return SearchSettingsSnapshot.from_db_model(secondary_search_settings)
@router.get("/get-all-search-settings")
--- a/backend/shared_configs/utils.py
+++ b/backend/shared_configs/utils.py
@@ -9,3 +9,9 @@ def batch_list(
    batch_size: int,
 ) -> list[list[T]]:
    return [lst[i : i + batch_size] for i in range(0, len(lst), batch_size)]
 def obfuscate_api_key(api_key: str | None) -> str | None:
    if api_key is None:
        return None
    return "*" * len(api_key)
--- a/deployment/helm/charts/danswer/templates/api-deployment.yaml
+++ b/deployment/helm/charts/danswer/templates/api-deployment.yaml
@@ -1,59 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ include "danswer-stack.fullname" . }}-api-deployment
  labels:
    {{- include "danswer-stack.labels" . | nindent 4 }}
 spec:
  {{- if not .Values.api.autoscaling.enabled }}
  replicas: {{ .Values.api.replicaCount }}
  {{- end }}
  selector:
    matchLabels:
      {{- include "danswer-stack.selectorLabels" . | nindent 6 }}
      {{- if .Values.api.deploymentLabels }}
      {{- toYaml .Values.api.deploymentLabels | nindent 6 }}
      {{- end }}
  template:
    metadata:
      {{- with .Values.api.podAnnotations }}
      annotations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      labels:
        {{- include "danswer-stack.labels" . | nindent 8 }}
        {{- with .Values.api.podLabels }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
    spec:
      {{- with .Values.imagePullSecrets }}
      imagePullSecrets:
        {{- toYaml . | nindent 8 }}
      {{- end }}
      serviceAccountName: {{ include "danswer-stack.serviceAccountName" . }}
      securityContext:
        {{- toYaml .Values.api.podSecurityContext | nindent 8 }}
      containers:
        - name: api-server
          securityContext:
            {{- toYaml .Values.api.securityContext | nindent 12 }}
          image: "{{ .Values.api.image.repository }}:{{ .Values.api.image.tag | default .Chart.AppVersion }}"
          imagePullPolicy: {{ .Values.api.image.pullPolicy }}
          command:
            - "/bin/sh"
            - "-c"
            - |
              alembic upgrade head &&
              echo "Starting Danswer Api Server" &&
              uvicorn danswer.main:app --host 0.0.0.0 --port 8080
          ports:
            - name: api-server-port
              containerPort: {{ .Values.api.service.port }}
              protocol: TCP
          resources:
            {{- toYaml .Values.api.resources | nindent 12 }}
          envFrom:
            - configMapRef:
                name: {{ .Values.config.envConfigMapName }}
          env:
            {{- include "danswer-stack.envSecrets" . | nindent 12}}
--- a/deployment/helm/charts/danswer/templates/configmap.yaml
+++ b/deployment/helm/charts/danswer/templates/configmap.yaml
@@ -1,16 +0,0 @@
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ .Values.config.envConfigMapName }}
  labels:
    {{- include "danswer-stack.labels" . | nindent 4 }}
 data:
  INTERNAL_URL: "http://{{ include "danswer-stack.fullname" . }}-api-service:{{ .Values.api.service.port | default 8080 }}"
  POSTGRES_HOST: {{ .Release.Name }}-postgresql
  VESPA_HOST: "document-index-service"
  REDIS_HOST: {{ .Release.Name }}-redis-master
  MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-inference-model-service"
  INDEXING_MODEL_SERVER_HOST: "{{ include "danswer-stack.fullname" . }}-indexing-model-service"
 {{- range $key, $value := .Values.configMap }}
  {{ $key }}: "{{ $value }}"
 {{- end }}
--- a/deployment/helm/charts/danswer/values.yaml
+++ b/deployment/helm/charts/danswer/values.yaml
@@ -1,473 +0,0 @@
 # Default values for danswer-stack.
 # This is a YAML-formatted file.
 # Declare variables to be passed into your templates.
 imagePullSecrets: []
 nameOverride: ""
 fullnameOverride: ""
 inferenceCapability:
  service:
    name: inference-model-server-service
    type: ClusterIP
    port: 9000
  pvc:
    name: inference-model-pvc
    accessModes:
      - ReadWriteOnce
    storage: 3Gi
  deployment:
    name: inference-model-server-deployment
    replicas: 1
    labels:
      - key: app
        value: inference-model-server
    image:
      repository: danswer/danswer-model-server
      tag: latest
      pullPolicy: IfNotPresent
    command: ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"]
    port: 9000
    volumeMounts:
      - name: inference-model-storage
        mountPath: /root/.cache
    volumes:
      - name: inference-model-storage
        persistentVolumeClaim:
          claimName: inference-model-pvc
  podLabels:
    - key: app
      value: inference-model-server
 indexCapability:
  service:
    type: ClusterIP
    port: 9000
    name: indexing-model-server-port
  deploymentLabels:
    app: indexing-model-server
  podLabels:
    app: indexing-model-server
  indexingOnly: "True"
  podAnnotations: {}
  volumeMounts:
    - name: indexing-model-storage
      mountPath: /root/.cache
  volumes:
    - name: indexing-model-storage
      persistentVolumeClaim:
        claimName: indexing-model-storage
  indexingModelPVC:
    name: indexing-model-storage
    accessMode: "ReadWriteOnce"
    storage: "3Gi"
 config:
  envConfigMapName: env-configmap
 serviceAccount:
  # Specifies whether a service account should be created
  create: false
  # Automatically mount a ServiceAccount's API credentials?
  automount: true
  # Annotations to add to the service account
  annotations: {}
  # The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""
 postgresql:
  primary:
    persistence:
      size: 5Gi
  enabled: true
  auth:
    existingSecret: danswer-secrets
    secretKeys:
      adminPasswordKey: postgres_password  # overwriting as postgres typically expects 'postgres-password'
 nginx:
  containerPorts:
    http: 1024
  extraEnvVars:
    - name: DOMAIN
      value: localhost
  service:
    ports:
      http: 80
      danswer: 3000
    targetPort:
      http: http
      danswer: http
  existingServerBlockConfigmap: danswer-nginx-conf
 webserver:
  replicaCount: 1
  image:
    repository: danswer/danswer-web-server
    pullPolicy: IfNotPresent
    # Overrides the image tag whose default is the chart appVersion.
    tag: ""
  deploymentLabels:
    app: web-server
  podAnnotations: {}
  podLabels:
    app: web-server
  podSecurityContext: {}
    # fsGroup: 2000
  securityContext: {}
    # capabilities:
    #   drop:
    #   - ALL
    # readOnlyRootFilesystem: true
    # runAsNonRoot: true
    # runAsUser: 1000
  service:
    type: ClusterIP
    port: 3000
  resources: {}
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  # limits:
  #   cpu: 100m
  #   memory: 128Mi
  # requests:
  #   cpu: 100m
  #   memory: 128Mi
  autoscaling:
    enabled: false
    minReplicas: 1
    maxReplicas: 100
    targetCPUUtilizationPercentage: 80
    # targetMemoryUtilizationPercentage: 80
  # Additional volumes on the output Deployment definition.
  volumes: []
  # - name: foo
  #   secret:
  #     secretName: mysecret
  #     optional: false
  # Additional volumeMounts on the output Deployment definition.
  volumeMounts: []
  # - name: foo
  #   mountPath: "/etc/foo"
  #   readOnly: true
  nodeSelector: {}
  tolerations: []
  affinity: {}
 api:
  replicaCount: 1
  image:
    repository: danswer/danswer-backend
    pullPolicy: IfNotPresent
    # Overrides the image tag whose default is the chart appVersion.
    tag: ""
  deploymentLabels:
    app: api-server
  podAnnotations: {}
  podLabels:
    scope: danswer-backend
    app: api-server
  podSecurityContext: {}
    # fsGroup: 2000
  securityContext: {}
    # capabilities:
    #   drop:
    #   - ALL
    # readOnlyRootFilesystem: true
    # runAsNonRoot: true
    # runAsUser: 1000
  service:
    type: ClusterIP
    port: 8080
  resources: {}
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  #  requests:
  #    cpu: 1000m  # Requests 1 CPU core
  #    memory: 1Gi  # Requests 1 GiB of memory
  #  limits:
  #    cpu: 2000m  # Limits to 2 CPU cores
  #    memory: 2Gi  # Limits to 2 GiB of memory
  autoscaling:
    enabled: false
    minReplicas: 1
    maxReplicas: 100
    targetCPUUtilizationPercentage: 80
    # targetMemoryUtilizationPercentage: 80
  # Additional volumes on the output Deployment definition.
  volumes: []
  # - name: foo
  #   secret:
  #     secretName: mysecret
  #     optional: false
  # Additional volumeMounts on the output Deployment definition.
  volumeMounts: []
  # - name: foo
  #   mountPath: "/etc/foo"
  #   readOnly: true
  nodeSelector: {}
  tolerations: []
 background:
  replicaCount: 1
  image:
    repository: danswer/danswer-backend
    pullPolicy: IfNotPresent
    # Overrides the image tag whose default is the chart appVersion.
    tag: latest
  podAnnotations: {}
  podLabels:
    scope: danswer-backend
    app: background
  deploymentLabels:
    app: background
  podSecurityContext: {}
    # fsGroup: 2000
  securityContext: {}
    # capabilities:
    #   drop:
    #   - ALL
    # readOnlyRootFilesystem: true
    # runAsNonRoot: true
    # runAsUser: 1000
  enableMiniChunk: "true"
  resources: {}
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  #  requests:
  #    cpu: 1000m  # Requests 1 CPU core
  #    memory: 1Gi  # Requests 1 GiB of memory
  #  limits:
  #    cpu: 2000m  # Limits to 2 CPU cores
  #    memory: 2Gi  # Limits to 2 GiB of memory
  autoscaling:
    enabled: false
    minReplicas: 1
    maxReplicas: 100
    targetCPUUtilizationPercentage: 80
    # targetMemoryUtilizationPercentage: 80
  # Additional volumes on the output Deployment definition.
  volumes: []
  # - name: foo
  #   secret:
  #     secretName: mysecret
  #     optional: false
  # Additional volumeMounts on the output Deployment definition.
  volumeMounts: []
  # - name: foo
  #   mountPath: "/etc/foo"
  #   readOnly: true
  nodeSelector: {}
  tolerations: []
 vespa:
  enabled: true
  replicaCount: 1
  image:
    repository: vespa
    pullPolicy: IfNotPresent
    tag: "8.277.17"
  podAnnotations: {}
  podLabels:
    app: vespa
    app.kubernetes.io/instance: danswer
    app.kubernetes.io/name: vespa
  podSecurityContext: {}
    # fsGroup: 2000
  securityContext:
    privileged: true
    runAsUser: 0
  resources:
  # The Vespa Helm chart specifies default resources, which are quite modest. We override
  # them here to increase chances of the chart running successfully.
    requests:
      cpu: 1500m
      memory: 4000Mi
    limits:
      cpu: 1500m
      memory: 4000Mi
  nodeSelector: {}
  tolerations: []
  affinity: {}
 redis:
  enabled: true
  architecture: standalone
  commonConfiguration: |-
    # Enable AOF https://redis.io/topics/persistence#append-only-file
    appendonly no
    # Disable RDB persistence, AOF persistence already enabled.
    save ""
  master:
    replicaCount: 1
    image:
      registry: docker.io
      repository: bitnami/redis
      tag: "7.4.0"
      pullPolicy: IfNotPresent
    persistence:
      enabled: false
  service:
    type: ClusterIP
    port: 6379
  auth:
    existingSecret: danswer-secrets
    existingSecretPasswordKey: redis_password
 # ingress:
 #  enabled: false
 #  className: ""
 #  annotations: {}
 #    # kubernetes.io/ingress.class: nginx
 #    # kubernetes.io/tls-acme: "true"
 #  hosts:
 #    - host: chart-example.local
 #      paths:
 #        - path: /
 #          pathType: ImplementationSpecific
 #  tls: []
 #  #  - secretName: chart-example-tls
 #  #    hosts:
 #  #      - chart-example.local
 persistence:
  vespa:
    enabled: true
    existingClaim: ""
    storageClassName: ""
    accessModes:
      - ReadWriteOnce
    size: 5Gi
 auth:
  # for storing smtp, oauth, slack, and other secrets
  # keys are lowercased version of env vars (e.g. SMTP_USER -> smtp_user)
  existingSecret: ""  # danswer-secrets
  # optionally override the secret keys to reference in the secret
  # this is used to populate the env vars in individual deployments
  # the values here reference the keys in secrets below
  secretKeys:
    postgres_password: "postgres_password"
    smtp_pass: ""
    oauth_client_id: ""
    oauth_client_secret: ""
    oauth_cookie_secret: ""
    danswer_bot_slack_app_token: ""
    danswer_bot_slack_bot_token: ""
    redis_password: "redis_password"
  # will be overridden by the existingSecret if set
  secretName: "danswer-secrets"
  # set values as strings, they will be base64 encoded
  # this is used to populate the secrets yaml
  secrets:
    postgres_password: "postgres"
    smtp_pass: ""
    oauth_client_id: ""
    oauth_client_secret: ""
    oauth_cookie_secret: ""
    danswer_bot_slack_app_token: ""
    danswer_bot_slack_bot_token: ""
    redis_password: "password"
 configMap:
  AUTH_TYPE: "disabled"  # Change this for production uses unless Danswer is only accessible behind VPN
  SESSION_EXPIRE_TIME_SECONDS: "86400"  # 1 Day Default
  VALID_EMAIL_DOMAINS: ""  # Can be something like danswer.ai, as an extra double-check
  SMTP_SERVER: ""  # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com'
  SMTP_PORT: ""  # For sending verification emails, if unspecified then defaults to '587'
  SMTP_USER: ""  # 'your-email@company.com'
  # SMTP_PASS: ""  # 'your-gmail-password'
  EMAIL_FROM: ""  # 'your-email@company.com' SMTP_USER missing used instead
  # Gen AI Settings
  GEN_AI_MAX_TOKENS: ""
  QA_TIMEOUT: "60"
  MAX_CHUNKS_FED_TO_CHAT: ""
  DISABLE_LLM_DOC_RELEVANCE: ""
  DISABLE_LLM_CHOOSE_SEARCH: ""
  DISABLE_LLM_QUERY_REPHRASE: ""
  # Query Options
  DOC_TIME_DECAY: ""
  HYBRID_ALPHA: ""
  EDIT_KEYWORD_QUERY: ""
  MULTILINGUAL_QUERY_EXPANSION: ""
  LANGUAGE_HINT: ""
  LANGUAGE_CHAT_NAMING_HINT: ""
  QA_PROMPT_OVERRIDE: ""
  # Internet Search Tool
  BING_API_KEY: ""
  # Don't change the NLP models unless you know what you're doing
  EMBEDDING_BATCH_SIZE: ""
  DOCUMENT_ENCODER_MODEL: ""
  NORMALIZE_EMBEDDINGS: ""
  ASYM_QUERY_PREFIX: ""
  ASYM_PASSAGE_PREFIX: ""
  DISABLE_RERANK_FOR_STREAMING: ""
  MODEL_SERVER_PORT: ""
  MIN_THREADS_ML_MODELS: ""
  # Indexing Configs
  VESPA_SEARCHER_THREADS: ""
  NUM_INDEXING_WORKERS: ""
  DISABLE_INDEX_UPDATE_ON_SWAP: ""
  DASK_JOB_CLIENT_ENABLED: ""
  CONTINUE_ON_CONNECTOR_FAILURE: ""
  EXPERIMENTAL_CHECKPOINTING_ENABLED: ""
  CONFLUENCE_CONNECTOR_LABELS_TO_SKIP: ""
  JIRA_API_VERSION: ""
  GONG_CONNECTOR_START_TIME: ""
  NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP: ""
  # DanswerBot SlackBot Configs
  # DANSWER_BOT_SLACK_APP_TOKEN: ""
  # DANSWER_BOT_SLACK_BOT_TOKEN: ""
  DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER: ""
  DANSWER_BOT_DISPLAY_ERROR_MSGS: ""
  DANSWER_BOT_RESPOND_EVERY_CHANNEL: ""
  DANSWER_BOT_DISABLE_COT: ""  # Currently unused
  NOTIFY_SLACKBOT_NO_ANSWER: ""
  # Logging
  # Optional Telemetry, please keep it on (nothing sensitive is collected)? <3
  # https://docs.danswer.dev/more/telemetry
  DISABLE_TELEMETRY: ""
  LOG_LEVEL: ""
  LOG_ALL_MODEL_INTERACTIONS: ""
  LOG_DANSWER_MODEL_INTERACTIONS: ""
  LOG_VESPA_TIMING_INFORMATION: ""
  # Shared or Non-backend Related
  WEB_DOMAIN: "http://localhost:3000"  # for web server and api server
  DOMAIN: "localhost"  # for nginx
  # Chat Configs
  HARD_DELETE_CHATS: ""