From 710850e442f98d5308d36cb8cd3a5556a41a8a46 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 20 Apr 2024 15:15:59 -0500 Subject: [PATCH] refac: audio --- backend/apps/audio/main.py | 79 ++++++++++++++++++- backend/apps/images/main.py | 8 +- backend/apps/rag/main.py | 6 +- backend/apps/rag/utils.py | 4 +- backend/config.py | 10 +++ src/lib/apis/audio/index.ts | 33 +++++++- .../chat/Messages/ResponseMessage.svelte | 2 +- 7 files changed, 133 insertions(+), 9 deletions(-) diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index f93b50f6e..94c1c3595 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -10,9 +10,18 @@ from fastapi import ( File, Form, ) + +from fastapi.responses import StreamingResponse, JSONResponse, FileResponse + from fastapi.middleware.cors import CORSMiddleware from faster_whisper import WhisperModel +import requests +import hashlib +from pathlib import Path +import json + + from constants import ERROR_MESSAGES from utils.utils import ( decode_token, @@ -30,6 +39,8 @@ from config import ( WHISPER_MODEL_DIR, WHISPER_MODEL_AUTO_UPDATE, DEVICE_TYPE, + OPENAI_API_BASE_URL, + OPENAI_API_KEY, ) log = logging.getLogger(__name__) @@ -44,12 +55,78 @@ app.add_middleware( allow_headers=["*"], ) + +app.state.OPENAI_API_BASE_URL = OPENAI_API_BASE_URL +app.state.OPENAI_API_KEY = OPENAI_API_KEY + # setting device type for whisper model whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu" log.info(f"whisper_device_type: {whisper_device_type}") +SPEECH_CACHE_DIR = Path(CACHE_DIR).joinpath("./audio/speech/") +SPEECH_CACHE_DIR.mkdir(parents=True, exist_ok=True) -@app.post("/transcribe") + +@app.post("/speech") +async def speech(request: Request, user=Depends(get_verified_user)): + idx = None + try: + body = await request.body() + name = hashlib.sha256(body).hexdigest() + + file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3") + file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json") + + # Check if the file already exists in the cache + if file_path.is_file(): + return FileResponse(file_path) + + headers = {} + headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}" + headers["Content-Type"] = "application/json" + + r = None + try: + r = requests.post( + url=f"{app.state.OPENAI_API_BASE_URL}/audio/speech", + data=body, + headers=headers, + stream=True, + ) + + r.raise_for_status() + + # Save the streaming content to a file + with open(file_path, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + + with open(file_body_path, "w") as f: + json.dump(json.loads(body.decode("utf-8")), f) + + # Return the saved file + return FileResponse(file_path) + + except Exception as e: + log.exception(e) + error_detail = "Open WebUI: Server Connection Error" + if r is not None: + try: + res = r.json() + if "error" in res: + error_detail = f"External: {res['error']}" + except: + error_detail = f"External: {e}" + + raise HTTPException( + status_code=r.status_code if r else 500, detail=error_detail + ) + + except ValueError: + raise HTTPException(status_code=401, detail=ERROR_MESSAGES.OPENAI_NOT_FOUND) + + +@app.post("/transcriptions") def transcribe( file: UploadFile = File(...), user=Depends(get_current_user), diff --git a/backend/apps/images/main.py b/backend/apps/images/main.py index f39984de0..a3939d206 100644 --- a/backend/apps/images/main.py +++ b/backend/apps/images/main.py @@ -35,6 +35,8 @@ from config import ( ENABLE_IMAGE_GENERATION, AUTOMATIC1111_BASE_URL, COMFYUI_BASE_URL, + OPENAI_API_BASE_URL, + OPENAI_API_KEY, ) @@ -56,7 +58,9 @@ app.add_middleware( app.state.ENGINE = "" app.state.ENABLED = ENABLE_IMAGE_GENERATION -app.state.OPENAI_API_KEY = "" +app.state.OPENAI_API_BASE_URL = OPENAI_API_BASE_URL +app.state.OPENAI_API_KEY = OPENAI_API_KEY + app.state.MODEL = "" @@ -360,7 +364,7 @@ def generate_image( } r = requests.post( - url=f"https://api.openai.com/v1/images/generations", + url=f"{app.state.OPENAI_API_BASE_URL}/images/generations", json=data, headers=headers, ) diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 5e9564f7d..47ffc0170 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -70,6 +70,8 @@ from config import ( RAG_EMBEDDING_ENGINE, RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL_AUTO_UPDATE, + RAG_OPENAI_API_BASE_URL, + RAG_OPENAI_API_KEY, DEVICE_TYPE, CHROMA_CLIENT, CHUNK_SIZE, @@ -94,8 +96,8 @@ app.state.RAG_EMBEDDING_ENGINE = RAG_EMBEDDING_ENGINE app.state.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL app.state.RAG_TEMPLATE = RAG_TEMPLATE -app.state.RAG_OPENAI_API_BASE_URL = "https://api.openai.com" -app.state.RAG_OPENAI_API_KEY = "" +app.state.RAG_OPENAI_API_BASE_URL = RAG_OPENAI_API_BASE_URL +app.state.RAG_OPENAI_API_KEY = RAG_OPENAI_API_KEY app.state.PDF_EXTRACT_IMAGES = False diff --git a/backend/apps/rag/utils.py b/backend/apps/rag/utils.py index daea36863..f4d1246c7 100644 --- a/backend/apps/rag/utils.py +++ b/backend/apps/rag/utils.py @@ -324,11 +324,11 @@ def get_embedding_model_path( def generate_openai_embeddings( - model: str, text: str, key: str, url: str = "https://api.openai.com" + model: str, text: str, key: str, url: str = "https://api.openai.com/v1" ): try: r = requests.post( - f"{url}/v1/embeddings", + f"{url}/embeddings", headers={ "Content-Type": "application/json", "Authorization": f"Bearer {key}", diff --git a/backend/config.py b/backend/config.py index 938df9961..4c41542c2 100644 --- a/backend/config.py +++ b/backend/config.py @@ -321,6 +321,13 @@ OPENAI_API_BASE_URLS = [ for url in OPENAI_API_BASE_URLS.split(";") ] +OPENAI_API_KEY = "" +OPENAI_API_KEY = OPENAI_API_KEYS[ + OPENAI_API_BASE_URLS.index("https://api.openai.com/v1") +] +OPENAI_API_BASE_URL = "https://api.openai.com/v1" + + #################################### # WEBUI #################################### @@ -447,6 +454,9 @@ And answer according to the language of the user's question. Given the context information, answer the query. Query: [query]""" +RAG_OPENAI_API_BASE_URL = os.getenv("RAG_OPENAI_API_BASE_URL", OPENAI_API_BASE_URL) +RAG_OPENAI_API_KEY = os.getenv("RAG_OPENAI_API_KEY", OPENAI_API_KEY) + #################################### # Transcribe #################################### diff --git a/src/lib/apis/audio/index.ts b/src/lib/apis/audio/index.ts index d28483394..1919d0ee7 100644 --- a/src/lib/apis/audio/index.ts +++ b/src/lib/apis/audio/index.ts @@ -5,7 +5,7 @@ export const transcribeAudio = async (token: string, file: File) => { data.append('file', file); let error = null; - const res = await fetch(`${AUDIO_API_BASE_URL}/transcribe`, { + const res = await fetch(`${AUDIO_API_BASE_URL}/transcriptions`, { method: 'POST', headers: { Accept: 'application/json', @@ -29,3 +29,34 @@ export const transcribeAudio = async (token: string, file: File) => { return res; }; + +export const synthesizeOpenAISpeech = async ( + token: string = '', + speaker: string = 'alloy', + text: string = '' +) => { + let error = null; + + const res = await fetch(`${AUDIO_API_BASE_URL}/speech`, { + method: 'POST', + headers: { + Authorization: `Bearer ${token}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + model: 'tts-1', + input: text, + voice: speaker + }) + }).catch((err) => { + console.log(err); + error = err; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index 3789faaa9..fd2de7273 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -15,7 +15,7 @@ const dispatch = createEventDispatcher(); import { config, settings } from '$lib/stores'; - import { synthesizeOpenAISpeech } from '$lib/apis/openai'; + import { synthesizeOpenAISpeech } from '$lib/apis/audio'; import { imageGenerations } from '$lib/apis/images'; import { approximateToHumanReadable,