diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index c565bf481..167db77ba 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -10,12 +10,12 @@ from fastapi import ( File, Form, ) - from fastapi.responses import StreamingResponse, JSONResponse, FileResponse from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel +from typing import List import uuid import requests import hashlib @@ -31,6 +31,7 @@ from utils.utils import ( ) from utils.misc import calculate_sha256 + from config import ( SRC_LOG_LEVELS, CACHE_DIR, @@ -252,15 +253,15 @@ async def speech(request: Request, user=Depends(get_verified_user)): ) elif app.state.config.TTS_ENGINE == "elevenlabs": - payload = None try: payload = json.loads(body.decode("utf-8")) except Exception as e: log.exception(e) - pass + raise HTTPException(status_code=400, detail="Invalid JSON payload") - url = f"https://api.elevenlabs.io/v1/text-to-speech/{payload['voice']}" + voice_id = payload.get("voice", "") + url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" headers = { "Accept": "audio/mpeg", @@ -435,3 +436,69 @@ def transcribe( status_code=status.HTTP_400_BAD_REQUEST, detail=ERROR_MESSAGES.DEFAULT(e), ) + + +def get_available_models() -> List[dict]: + if app.state.config.TTS_ENGINE == "openai": + return [{"id": "tts-1"}, {"id": "tts-1-hd"}] + elif app.state.config.TTS_ENGINE == "elevenlabs": + headers = { + "xi-api-key": app.state.config.TTS_API_KEY, + "Content-Type": "application/json", + } + + try: + response = requests.get( + "https://api.elevenlabs.io/v1/models", headers=headers + ) + response.raise_for_status() + models = response.json() + return [ + {"name": model["name"], "id": model["model_id"]} for model in models + ] + except requests.RequestException as e: + log.error(f"Error fetching voices: {str(e)}") + return [] + + +@app.get("/models") +async def get_models(user=Depends(get_verified_user)): + return {"models": get_available_models()} + + +def get_available_voices() -> List[dict]: + if app.state.config.TTS_ENGINE == "openai": + return [ + {"name": "alloy", "id": "alloy"}, + {"name": "echo", "id": "echo"}, + {"name": "fable", "id": "fable"}, + {"name": "onyx", "id": "onyx"}, + {"name": "nova", "id": "nova"}, + {"name": "shimmer", "id": "shimmer"}, + ] + elif app.state.config.TTS_ENGINE == "elevenlabs": + headers = { + "xi-api-key": app.state.config.TTS_API_KEY, + "Content-Type": "application/json", + } + + try: + response = requests.get( + "https://api.elevenlabs.io/v1/voices", headers=headers + ) + response.raise_for_status() + voices_data = response.json() + + voices = [] + for voice in voices_data.get("voices", []): + voices.append({"name": voice["name"], "id": voice["voice_id"]}) + return voices + except requests.RequestException as e: + log.error(f"Error fetching voices: {str(e)}") + + return [] + + +@app.get("/voices") +async def get_voices(user=Depends(get_verified_user)): + return {"voices": get_available_voices()} diff --git a/src/lib/apis/audio/index.ts b/src/lib/apis/audio/index.ts index 9716c552a..af09af990 100644 --- a/src/lib/apis/audio/index.ts +++ b/src/lib/apis/audio/index.ts @@ -131,3 +131,59 @@ export const synthesizeOpenAISpeech = async ( return res; }; + +export const getModels = async (token: string = '') => { + let error = null; + + const res = await fetch(`${AUDIO_API_BASE_URL}/models`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const getVoices = async (token: string = '') => { + let error = null; + + const res = await fetch(`${AUDIO_API_BASE_URL}/voices`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + + return null; + }); + + if (error) { + throw error; + } + + return res; +}; diff --git a/src/lib/components/admin/Settings/Audio.svelte b/src/lib/components/admin/Settings/Audio.svelte index 50ce7418e..7c3300568 100644 --- a/src/lib/components/admin/Settings/Audio.svelte +++ b/src/lib/components/admin/Settings/Audio.svelte @@ -1,13 +1,19 @@ @@ -185,13 +198,15 @@ class=" dark:bg-gray-900 w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right" bind:value={TTS_ENGINE} placeholder="Select a mode" - on:change={(e) => { + on:change={async (e) => { + await updateConfigHandler(); + await getVoices(); + await getModels(); + if (e.target.value === 'openai') { - getOpenAIVoices(); TTS_VOICE = 'alloy'; TTS_MODEL = 'tts-1'; } else { - getWebAPIVoices(); TTS_VOICE = ''; TTS_MODEL = ''; } @@ -268,7 +283,7 @@ {#each voices as voice} - {/each} @@ -279,15 +294,15 @@
- + {#each models as model} -
@@ -309,7 +324,7 @@ {#each voices as voice} - {/each}
@@ -320,15 +335,15 @@
- + {#each models as model} -
diff --git a/src/lib/components/chat/Settings/Audio.svelte b/src/lib/components/chat/Settings/Audio.svelte index 2876321aa..75280ade3 100644 --- a/src/lib/components/chat/Settings/Audio.svelte +++ b/src/lib/components/chat/Settings/Audio.svelte @@ -1,7 +1,10 @@ @@ -195,7 +194,7 @@ {#each voices as voice} - {/each}