mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-10 21:26:01 +02:00
Model Server (#695)
Provides the ability to pull out the NLP models into a separate model server which can then be hosted on a GPU instance if desired.
This commit is contained in:
40
backend/model_server/custom_models.py
Normal file
40
backend/model_server/custom_models.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import numpy as np
|
||||
import tensorflow as tf # type:ignore
|
||||
from fastapi import APIRouter
|
||||
|
||||
from danswer.search.search_nlp_models import get_intent_model_tokenizer
|
||||
from danswer.search.search_nlp_models import get_local_intent_model
|
||||
from danswer.utils.timing import log_function_time
|
||||
from shared_models.model_server_models import IntentRequest
|
||||
from shared_models.model_server_models import IntentResponse
|
||||
|
||||
router = APIRouter(prefix="/custom")
|
||||
|
||||
|
||||
@log_function_time()
|
||||
def classify_intent(query: str) -> list[float]:
|
||||
tokenizer = get_intent_model_tokenizer()
|
||||
intent_model = get_local_intent_model()
|
||||
model_input = tokenizer(query, return_tensors="tf", truncation=True, padding=True)
|
||||
|
||||
predictions = intent_model(model_input)[0]
|
||||
probabilities = tf.nn.softmax(predictions, axis=-1)
|
||||
|
||||
class_percentages = np.round(probabilities.numpy() * 100, 2)
|
||||
return list(class_percentages.tolist()[0])
|
||||
|
||||
|
||||
@router.post("/intent-model")
|
||||
def process_intent_request(
|
||||
intent_request: IntentRequest,
|
||||
) -> IntentResponse:
|
||||
class_percentages = classify_intent(intent_request.query)
|
||||
return IntentResponse(class_probs=class_percentages)
|
||||
|
||||
|
||||
def warm_up_intent_model() -> None:
|
||||
intent_tokenizer = get_intent_model_tokenizer()
|
||||
inputs = intent_tokenizer(
|
||||
"danswer", return_tensors="tf", truncation=True, padding=True
|
||||
)
|
||||
get_local_intent_model()(inputs)
|
Reference in New Issue
Block a user