Always Use Model Server (#1306)

This commit is contained in:
Yuhong Sun
2024-04-07 21:25:06 -07:00
committed by GitHub
parent 795243283d
commit 2db906b7a2
35 changed files with 724 additions and 550 deletions

View File

@@ -43,9 +43,9 @@ data:
ASYM_PASSAGE_PREFIX: ""
ENABLE_RERANKING_REAL_TIME_FLOW: ""
ENABLE_RERANKING_ASYNC_FLOW: ""
MODEL_SERVER_HOST: ""
MODEL_SERVER_HOST: "inference-model-server-service"
MODEL_SERVER_PORT: ""
INDEXING_MODEL_SERVER_HOST: ""
INDEXING_MODEL_SERVER_HOST: "indexing-model-server-service"
MIN_THREADS_ML_MODELS: ""
# Indexing Configs
NUM_INDEXING_WORKERS: ""

View File

@@ -0,0 +1,59 @@
apiVersion: v1
kind: Service
metadata:
name: indexing-model-server-service
spec:
selector:
app: indexing-model-server
ports:
- name: indexing-model-server-port
protocol: TCP
port: 9000
targetPort: 9000
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: indexing-model-server-deployment
spec:
replicas: 1
selector:
matchLabels:
app: indexing-model-server
template:
metadata:
labels:
app: indexing-model-server
spec:
containers:
- name: indexing-model-server
image: danswer/danswer-model-server:latest
imagePullPolicy: IfNotPresent
command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ]
ports:
- containerPort: 9000
envFrom:
- configMapRef:
name: env-configmap
env:
- name: INDEXING_ONLY
value: "True"
volumeMounts:
- name: indexing-model-storage
mountPath: /root/.cache
volumes:
- name: indexing-model-storage
persistentVolumeClaim:
claimName: indexing-model-pvc
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: indexing-model-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 3Gi

View File

@@ -0,0 +1,56 @@
apiVersion: v1
kind: Service
metadata:
name: inference-model-server-service
spec:
selector:
app: inference-model-server
ports:
- name: inference-model-server-port
protocol: TCP
port: 9000
targetPort: 9000
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: inference-model-server-deployment
spec:
replicas: 1
selector:
matchLabels:
app: inference-model-server
template:
metadata:
labels:
app: inference-model-server
spec:
containers:
- name: inference-model-server
image: danswer/danswer-model-server:latest
imagePullPolicy: IfNotPresent
command: [ "uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000" ]
ports:
- containerPort: 9000
envFrom:
- configMapRef:
name: env-configmap
volumeMounts:
- name: inference-model-storage
mountPath: /root/.cache
volumes:
- name: inference-model-storage
persistentVolumeClaim:
claimName: inference-model-pvc
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: inference-model-pvc
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 3Gi