mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-09 12:47:13 +02:00
Always Use Model Server (#1306)
This commit is contained in:
@@ -43,9 +43,9 @@ data:
|
||||
ASYM_PASSAGE_PREFIX: ""
|
||||
ENABLE_RERANKING_REAL_TIME_FLOW: ""
|
||||
ENABLE_RERANKING_ASYNC_FLOW: ""
|
||||
MODEL_SERVER_HOST: ""
|
||||
MODEL_SERVER_HOST: "inference-model-server-service"
|
||||
MODEL_SERVER_PORT: ""
|
||||
INDEXING_MODEL_SERVER_HOST: ""
|
||||
INDEXING_MODEL_SERVER_HOST: "indexing-model-server-service"
|
||||
MIN_THREADS_ML_MODELS: ""
|
||||
# Indexing Configs
|
||||
NUM_INDEXING_WORKERS: ""
|
||||
|
@@ -0,0 +1,59 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: indexing-model-server-service
|
||||
spec:
|
||||
selector:
|
||||
app: indexing-model-server
|
||||
ports:
|
||||
- name: indexing-model-server-port
|
||||
protocol: TCP
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: indexing-model-server-deployment
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: indexing-model-server
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: indexing-model-server
|
||||
spec:
|
||||
containers:
|
||||
- name: indexing-model-server
|
||||
image: danswer/danswer-model-server:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ]
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: env-configmap
|
||||
env:
|
||||
- name: INDEXING_ONLY
|
||||
value: "True"
|
||||
volumeMounts:
|
||||
- name: indexing-model-storage
|
||||
mountPath: /root/.cache
|
||||
volumes:
|
||||
- name: indexing-model-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: indexing-model-pvc
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: indexing-model-pvc
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 3Gi
|
@@ -0,0 +1,56 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: inference-model-server-service
|
||||
spec:
|
||||
selector:
|
||||
app: inference-model-server
|
||||
ports:
|
||||
- name: inference-model-server-port
|
||||
protocol: TCP
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: inference-model-server-deployment
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: inference-model-server
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: inference-model-server
|
||||
spec:
|
||||
containers:
|
||||
- name: inference-model-server
|
||||
image: danswer/danswer-model-server:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ]
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: env-configmap
|
||||
volumeMounts:
|
||||
- name: inference-model-storage
|
||||
mountPath: /root/.cache
|
||||
volumes:
|
||||
- name: inference-model-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: inference-model-pvc
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: inference-model-pvc
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 3Gi
|
Reference in New Issue
Block a user