From 55dc6c1b3be1ef611d96b95e2b940a2dfe7c926d Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Fri, 7 Jun 2024 20:18:48 -0700 Subject: [PATCH] refac: audio --- backend/apps/audio/main.py | 111 +++++-- backend/config.py | 66 +++- backend/main.py | 9 + src/lib/components/admin/Settings.svelte | 227 +++++++++++++ .../components/admin/Settings/Audio.svelte | 310 ++++++++++++++++++ .../components/admin/Settings/Banners.svelte | 2 +- .../components/admin/Settings/Database.svelte | 2 +- .../components/admin/Settings/General.svelte | 2 +- .../admin/Settings/Pipelines.svelte | 2 +- .../components/admin/Settings/Users.svelte | 2 +- src/lib/components/admin/SettingsModal.svelte | 176 ---------- .../chat/MessageInput/CallOverlay.svelte | 37 ++- .../chat/MessageInput/VoiceRecording.svelte | 4 +- .../chat/Messages/ResponseMessage.svelte | 12 +- src/lib/components/chat/Settings/Audio.svelte | 217 ++---------- src/routes/(app)/+layout.svelte | 1 - src/routes/(app)/+page.svelte | 2 + src/routes/(app)/admin/+layout.svelte | 18 +- src/routes/(app)/admin/+page.svelte | 28 -- src/routes/(app)/admin/settings/+page.svelte | 5 + 20 files changed, 769 insertions(+), 464 deletions(-) create mode 100644 src/lib/components/admin/Settings.svelte create mode 100644 src/lib/components/admin/Settings/Audio.svelte create mode 100644 src/routes/(app)/admin/settings/+page.svelte diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index 216c6d42b..7fe9ed28b 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -41,10 +41,15 @@ from config import ( WHISPER_MODEL_DIR, WHISPER_MODEL_AUTO_UPDATE, DEVICE_TYPE, - AUDIO_OPENAI_API_BASE_URL, - AUDIO_OPENAI_API_KEY, - AUDIO_OPENAI_API_MODEL, - AUDIO_OPENAI_API_VOICE, + AUDIO_STT_OPENAI_API_BASE_URL, + AUDIO_STT_OPENAI_API_KEY, + AUDIO_TTS_OPENAI_API_BASE_URL, + AUDIO_TTS_OPENAI_API_KEY, + AUDIO_STT_ENGINE, + AUDIO_STT_MODEL, + AUDIO_TTS_ENGINE, + AUDIO_TTS_MODEL, + AUDIO_TTS_VOICE, AppConfig, ) @@ -61,10 +66,17 @@ app.add_middleware( ) app.state.config = AppConfig() -app.state.config.OPENAI_API_BASE_URL = AUDIO_OPENAI_API_BASE_URL -app.state.config.OPENAI_API_KEY = AUDIO_OPENAI_API_KEY -app.state.config.OPENAI_API_MODEL = AUDIO_OPENAI_API_MODEL -app.state.config.OPENAI_API_VOICE = AUDIO_OPENAI_API_VOICE + +app.state.config.STT_OPENAI_API_BASE_URL = AUDIO_STT_OPENAI_API_BASE_URL +app.state.config.STT_OPENAI_API_KEY = AUDIO_STT_OPENAI_API_KEY +app.state.config.STT_ENGINE = AUDIO_STT_ENGINE +app.state.config.STT_MODEL = AUDIO_STT_MODEL + +app.state.config.TTS_OPENAI_API_BASE_URL = AUDIO_TTS_OPENAI_API_BASE_URL +app.state.config.TTS_OPENAI_API_KEY = AUDIO_TTS_OPENAI_API_KEY +app.state.config.TTS_ENGINE = AUDIO_TTS_ENGINE +app.state.config.TTS_MODEL = AUDIO_TTS_MODEL +app.state.config.TTS_VOICE = AUDIO_TTS_VOICE # setting device type for whisper model whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu" @@ -74,41 +86,74 @@ SPEECH_CACHE_DIR = Path(CACHE_DIR).joinpath("./audio/speech/") SPEECH_CACHE_DIR.mkdir(parents=True, exist_ok=True) -class OpenAIConfigUpdateForm(BaseModel): - url: str - key: str - model: str - speaker: str +class TTSConfigForm(BaseModel): + OPENAI_API_BASE_URL: str + OPENAI_API_KEY: str + ENGINE: str + MODEL: str + VOICE: str + + +class STTConfigForm(BaseModel): + OPENAI_API_BASE_URL: str + OPENAI_API_KEY: str + ENGINE: str + MODEL: str + + +class AudioConfigUpdateForm(BaseModel): + tts: TTSConfigForm + stt: STTConfigForm @app.get("/config") -async def get_openai_config(user=Depends(get_admin_user)): +async def get_audio_config(user=Depends(get_admin_user)): return { - "OPENAI_API_BASE_URL": app.state.config.OPENAI_API_BASE_URL, - "OPENAI_API_KEY": app.state.config.OPENAI_API_KEY, - "OPENAI_API_MODEL": app.state.config.OPENAI_API_MODEL, - "OPENAI_API_VOICE": app.state.config.OPENAI_API_VOICE, + "tts": { + "OPENAI_API_BASE_URL": app.state.config.TTS_OPENAI_API_BASE_URL, + "OPENAI_API_KEY": app.state.config.TTS_OPENAI_API_KEY, + "ENGINE": app.state.config.TTS_ENGINE, + "MODEL": app.state.config.TTS_MODEL, + "VOICE": app.state.config.TTS_VOICE, + }, + "stt": { + "OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL, + "OPENAI_API_KEY": app.state.config.STT_OPENAI_API_KEY, + "ENGINE": app.state.config.STT_ENGINE, + "MODEL": app.state.config.STT_MODEL, + }, } @app.post("/config/update") -async def update_openai_config( - form_data: OpenAIConfigUpdateForm, user=Depends(get_admin_user) +async def update_audio_config( + form_data: AudioConfigUpdateForm, user=Depends(get_admin_user) ): - if form_data.key == "": - raise HTTPException(status_code=400, detail=ERROR_MESSAGES.API_KEY_NOT_FOUND) + app.state.config.TTS_OPENAI_API_BASE_URL = form_data.tts.OPENAI_API_BASE_URL + app.state.config.TTS_OPENAI_API_KEY = form_data.tts.OPENAI_API_KEY + app.state.config.TTS_ENGINE = form_data.tts.ENGINE + app.state.config.TTS_MODEL = form_data.tts.MODEL + app.state.config.TTS_VOICE = form_data.tts.VOICE - app.state.config.OPENAI_API_BASE_URL = form_data.url - app.state.config.OPENAI_API_KEY = form_data.key - app.state.config.OPENAI_API_MODEL = form_data.model - app.state.config.OPENAI_API_VOICE = form_data.speaker + app.state.config.STT_OPENAI_API_BASE_URL = form_data.stt.OPENAI_API_BASE_URL + app.state.config.STT_OPENAI_API_KEY = form_data.stt.OPENAI_API_KEY + app.state.config.STT_ENGINE = form_data.stt.ENGINE + app.state.config.STT_MODEL = form_data.stt.MODEL return { - "status": True, - "OPENAI_API_BASE_URL": app.state.config.OPENAI_API_BASE_URL, - "OPENAI_API_KEY": app.state.config.OPENAI_API_KEY, - "OPENAI_API_MODEL": app.state.config.OPENAI_API_MODEL, - "OPENAI_API_VOICE": app.state.config.OPENAI_API_VOICE, + "tts": { + "OPENAI_API_BASE_URL": app.state.config.TTS_OPENAI_API_BASE_URL, + "OPENAI_API_KEY": app.state.config.TTS_OPENAI_API_KEY, + "ENGINE": app.state.config.TTS_ENGINE, + "MODEL": app.state.config.TTS_MODEL, + "VOICE": app.state.config.TTS_VOICE, + }, + "stt": { + "OPENAI_API_BASE_URL": app.state.config.STT_OPENAI_API_BASE_URL, + "OPENAI_API_KEY": app.state.config.STT_OPENAI_API_KEY, + "ENGINE": app.state.config.STT_ENGINE, + "MODEL": app.state.config.STT_MODEL, + }, } @@ -125,13 +170,13 @@ async def speech(request: Request, user=Depends(get_verified_user)): return FileResponse(file_path) headers = {} - headers["Authorization"] = f"Bearer {app.state.config.OPENAI_API_KEY}" + headers["Authorization"] = f"Bearer {app.state.config.TTS_OPENAI_API_KEY}" headers["Content-Type"] = "application/json" r = None try: r = requests.post( - url=f"{app.state.config.OPENAI_API_BASE_URL}/audio/speech", + url=f"{app.state.config.TTS_OPENAI_API_BASE_URL}/audio/speech", data=body, headers=headers, stream=True, diff --git a/backend/config.py b/backend/config.py index dd3bc9e4b..fb61a8380 100644 --- a/backend/config.py +++ b/backend/config.py @@ -933,25 +933,59 @@ IMAGE_GENERATION_MODEL = PersistentConfig( # Audio #################################### -AUDIO_OPENAI_API_BASE_URL = PersistentConfig( - "AUDIO_OPENAI_API_BASE_URL", - "audio.openai.api_base_url", - os.getenv("AUDIO_OPENAI_API_BASE_URL", OPENAI_API_BASE_URL), +AUDIO_STT_OPENAI_API_BASE_URL = PersistentConfig( + "AUDIO_STT_OPENAI_API_BASE_URL", + "audio.stt.openai.api_base_url", + os.getenv("AUDIO_STT_OPENAI_API_BASE_URL", OPENAI_API_BASE_URL), ) -AUDIO_OPENAI_API_KEY = PersistentConfig( - "AUDIO_OPENAI_API_KEY", - "audio.openai.api_key", - os.getenv("AUDIO_OPENAI_API_KEY", OPENAI_API_KEY), + +AUDIO_STT_OPENAI_API_KEY = PersistentConfig( + "AUDIO_STT_OPENAI_API_KEY", + "audio.stt.openai.api_key", + os.getenv("AUDIO_STT_OPENAI_API_KEY", OPENAI_API_KEY), ) -AUDIO_OPENAI_API_MODEL = PersistentConfig( - "AUDIO_OPENAI_API_MODEL", - "audio.openai.api_model", - os.getenv("AUDIO_OPENAI_API_MODEL", "tts-1"), + +AUDIO_STT_ENGINE = PersistentConfig( + "AUDIO_STT_ENGINE", + "audio.stt.engine", + os.getenv("AUDIO_STT_ENGINE", ""), ) -AUDIO_OPENAI_API_VOICE = PersistentConfig( - "AUDIO_OPENAI_API_VOICE", - "audio.openai.api_voice", - os.getenv("AUDIO_OPENAI_API_VOICE", "alloy"), + +AUDIO_STT_MODEL = PersistentConfig( + "AUDIO_STT_MODEL", + "audio.stt.model", + os.getenv("AUDIO_STT_MODEL", "whisper-1"), +) + +AUDIO_TTS_OPENAI_API_BASE_URL = PersistentConfig( + "AUDIO_TTS_OPENAI_API_BASE_URL", + "audio.tts.openai.api_base_url", + os.getenv("AUDIO_TTS_OPENAI_API_BASE_URL", OPENAI_API_BASE_URL), +) +AUDIO_TTS_OPENAI_API_KEY = PersistentConfig( + "AUDIO_TTS_OPENAI_API_KEY", + "audio.tts.openai.api_key", + os.getenv("AUDIO_TTS_OPENAI_API_KEY", OPENAI_API_KEY), +) + + +AUDIO_TTS_ENGINE = PersistentConfig( + "AUDIO_TTS_ENGINE", + "audio.tts.engine", + os.getenv("AUDIO_TTS_ENGINE", ""), +) + + +AUDIO_TTS_MODEL = PersistentConfig( + "AUDIO_TTS_MODEL", + "audio.tts.model", + os.getenv("AUDIO_TTS_MODEL", "tts-1"), +) + +AUDIO_TTS_VOICE = PersistentConfig( + "AUDIO_TTS_VOICE", + "audio.tts.voice", + os.getenv("AUDIO_TTS_VOICE", "alloy"), ) diff --git a/backend/main.py b/backend/main.py index b0b089fb8..d7fa940ff 100644 --- a/backend/main.py +++ b/backend/main.py @@ -900,6 +900,15 @@ async def get_app_config(): "enable_community_sharing": webui_app.state.config.ENABLE_COMMUNITY_SHARING, "enable_admin_export": ENABLE_ADMIN_EXPORT, }, + "audio": { + "tts": { + "engine": audio_app.state.config.TTS_ENGINE, + "voice": audio_app.state.config.TTS_VOICE, + }, + "stt": { + "engine": audio_app.state.config.STT_ENGINE, + }, + }, } diff --git a/src/lib/components/admin/Settings.svelte b/src/lib/components/admin/Settings.svelte new file mode 100644 index 000000000..785f41c06 --- /dev/null +++ b/src/lib/components/admin/Settings.svelte @@ -0,0 +1,227 @@ + + +
+
+ + + + + + + + + + + +
+ +
+ {#if selectedTab === 'general'} + { + toast.success($i18n.t('Settings saved successfully!')); + }} + /> + {:else if selectedTab === 'users'} + { + toast.success($i18n.t('Settings saved successfully!')); + }} + /> + {:else if selectedTab === 'audio'} +
+
diff --git a/src/lib/components/admin/Settings/Audio.svelte b/src/lib/components/admin/Settings/Audio.svelte new file mode 100644 index 000000000..7d7f24a70 --- /dev/null +++ b/src/lib/components/admin/Settings/Audio.svelte @@ -0,0 +1,310 @@ + + +
{ + await updateConfigHandler(); + dispatch('save'); + }} +> +
+
+
{$i18n.t('STT Settings')}
+ +
+
{$i18n.t('Speech-to-Text Engine')}
+
+ +
+
+ + {#if STT_ENGINE === 'openai'} +
+
+ + + +
+
+ +
+ +
+
{$i18n.t('STT Model')}
+
+
+ + + + +
+
+
+ {/if} +
+ +
+ +
+
{$i18n.t('TTS Settings')}
+ +
+
{$i18n.t('Text-to-Speech Engine')}
+
+ +
+
+ + {#if TTS_ENGINE === 'openai'} +
+
+ + + +
+
+ {/if} + +
+ + {#if TTS_ENGINE === ''} +
+
{$i18n.t('TTS Voice')}
+
+
+ +
+
+
+
+ {$i18n.t('Allow non-local voices')} +
+ +
+ +
+
+
+ {:else if TTS_ENGINE === 'openai'} +
+
+
{$i18n.t('TTS Voice')}
+
+
+ + + + {#each voices as voice} + +
+
+
+
+
{$i18n.t('TTS Model')}
+
+
+ + + + {#each models as model} + +
+
+
+
+ {/if} +
+
+ +
+ +
+
diff --git a/src/lib/components/admin/Settings/Banners.svelte b/src/lib/components/admin/Settings/Banners.svelte index 418edc9a5..d7f16c31b 100644 --- a/src/lib/components/admin/Settings/Banners.svelte +++ b/src/lib/components/admin/Settings/Banners.svelte @@ -33,7 +33,7 @@ saveHandler(); }} > -
+
diff --git a/src/lib/components/admin/Settings/Database.svelte b/src/lib/components/admin/Settings/Database.svelte index 9ce8e9d8d..b92873a5a 100644 --- a/src/lib/components/admin/Settings/Database.svelte +++ b/src/lib/components/admin/Settings/Database.svelte @@ -30,7 +30,7 @@ saveHandler(); }} > -
+
{$i18n.t('Database')}
diff --git a/src/lib/components/admin/Settings/General.svelte b/src/lib/components/admin/Settings/General.svelte index a7ffdabfb..5a964eb4f 100644 --- a/src/lib/components/admin/Settings/General.svelte +++ b/src/lib/components/admin/Settings/General.svelte @@ -56,7 +56,7 @@ saveHandler(); }} > -
+
{#if adminConfig !== null}
{$i18n.t('General Settings')}
diff --git a/src/lib/components/admin/Settings/Pipelines.svelte b/src/lib/components/admin/Settings/Pipelines.svelte index be557bee1..669a07e5e 100644 --- a/src/lib/components/admin/Settings/Pipelines.svelte +++ b/src/lib/components/admin/Settings/Pipelines.svelte @@ -200,7 +200,7 @@ updateHandler(); }} > -
+
{#if PIPELINES_LIST !== null}
diff --git a/src/lib/components/admin/Settings/Users.svelte b/src/lib/components/admin/Settings/Users.svelte index 44e38f40c..0fde8b62c 100644 --- a/src/lib/components/admin/Settings/Users.svelte +++ b/src/lib/components/admin/Settings/Users.svelte @@ -48,7 +48,7 @@ await config.set(await getBackendConfig()); }} > -
+
{$i18n.t('User Permissions')}
diff --git a/src/lib/components/admin/SettingsModal.svelte b/src/lib/components/admin/SettingsModal.svelte index 78f48cdfc..0ed32e551 100644 --- a/src/lib/components/admin/SettingsModal.svelte +++ b/src/lib/components/admin/SettingsModal.svelte @@ -39,181 +39,5 @@
- -
-
- - - - - - - - - -
-
- {#if selectedTab === 'general'} - { - toast.success($i18n.t('Settings saved successfully!')); - }} - /> - {:else if selectedTab === 'users'} - { - toast.success($i18n.t('Settings saved successfully!')); - }} - /> - {:else if selectedTab === 'db'} - { - toast.success($i18n.t('Settings saved successfully!')); - }} - /> - {:else if selectedTab === 'banners'} - { - toast.success($i18n.t('Settings saved successfully!')); - }} - /> - {:else if selectedTab === 'pipelines'} - { - toast.success($i18n.t('Settings saved successfully!')); - }} - /> - {/if} -
-
diff --git a/src/lib/components/chat/MessageInput/CallOverlay.svelte b/src/lib/components/chat/MessageInput/CallOverlay.svelte index fa9c28b1f..15c649306 100644 --- a/src/lib/components/chat/MessageInput/CallOverlay.svelte +++ b/src/lib/components/chat/MessageInput/CallOverlay.svelte @@ -1,5 +1,5 @@
{ - if ($user.role === 'admin') { - await updateConfigHandler(); - } saveSettings({ audio: { - STTEngine: STTEngine !== '' ? STTEngine : undefined, - TTSEngine: TTSEngine !== '' ? TTSEngine : undefined, - speaker: - (TTSEngine === 'openai' ? OpenAISpeaker : speaker) !== '' - ? TTSEngine === 'openai' - ? OpenAISpeaker - : speaker - : undefined, - model: model !== '' ? model : undefined, - nonLocalVoices: nonLocalVoices + stt: { + engine: STTEngine !== '' ? STTEngine : undefined + }, + tts: { + voice: $config.audio.tts.engine === 'openai' ? voice : voice !== '' ? voice : undefined, + nonLocalVoices: $config.audio.tts.engine === '' ? nonLocalVoices : undefined + } } }); dispatch('save'); @@ -162,31 +90,21 @@
{$i18n.t('STT Settings')}
-
-
{$i18n.t('Speech-to-Text Engine')}
-
- + {#if $config.audio.stt.engine !== 'web'} +
+
{$i18n.t('Speech-to-Text Engine')}
+
+ +
-
+ {/if}
@@ -212,50 +130,6 @@
{$i18n.t('TTS Settings')}
-
-
{$i18n.t('Text-to-Speech Engine')}
-
- -
-
- - {#if $user.role === 'admin'} - {#if TTSEngine === 'openai'} -
- - - -
- {/if} - {/if} -
{$i18n.t('Auto-playback response')}
@@ -277,21 +151,21 @@
- {#if TTSEngine === ''} + {#if $config.audio.tts.engine === ''}
{$i18n.t('Set Voice')}
@@ -307,7 +181,7 @@
- {:else if TTSEngine === 'openai'} + {:else if $config.audio.tts.engine === 'openai'}
{$i18n.t('Set Voice')}
@@ -315,7 +189,7 @@ @@ -327,25 +201,6 @@
-
-
{$i18n.t('Set Model')}
-
-
- - - - {#each models as model} - -
-
-
{/if}
diff --git a/src/routes/(app)/+layout.svelte b/src/routes/(app)/+layout.svelte index 58da2a170..c397c54a6 100644 --- a/src/routes/(app)/+layout.svelte +++ b/src/routes/(app)/+layout.svelte @@ -183,7 +183,6 @@ }); - diff --git a/src/routes/(app)/+page.svelte b/src/routes/(app)/+page.svelte index dcc0d4547..08026e7aa 100644 --- a/src/routes/(app)/+page.svelte +++ b/src/routes/(app)/+page.svelte @@ -1,5 +1,7 @@ + diff --git a/src/routes/(app)/admin/+layout.svelte b/src/routes/(app)/admin/+layout.svelte index 5957b5054..03476f615 100644 --- a/src/routes/(app)/admin/+layout.svelte +++ b/src/routes/(app)/admin/+layout.svelte @@ -30,29 +30,29 @@
-
{$i18n.t('Workspace')}
+
{$i18n.t('Admin Panel')}
-
-
--> +

diff --git a/src/routes/(app)/admin/+page.svelte b/src/routes/(app)/admin/+page.svelte index 58857ae43..8ae25479d 100644 --- a/src/routes/(app)/admin/+page.svelte +++ b/src/routes/(app)/admin/+page.svelte @@ -11,12 +11,8 @@ import { toast } from 'svelte-sonner'; import { updateUserRole, getUsers, deleteUserById } from '$lib/apis/users'; - import { getSignUpEnabledStatus, toggleSignUpEnabledStatus } from '$lib/apis/auths'; - - import MenuLines from '$lib/components/icons/MenuLines.svelte'; import EditUserModal from '$lib/components/admin/EditUserModal.svelte'; - import SettingsModal from '$lib/components/admin/SettingsModal.svelte'; import Pagination from '$lib/components/common/Pagination.svelte'; import ChatBubbles from '$lib/components/icons/ChatBubbles.svelte'; import Tooltip from '$lib/components/common/Tooltip.svelte'; @@ -34,7 +30,6 @@ let page = 1; - let showSettingsModal = false; let showAddUserModal = false; let showUserChatsModal = false; @@ -100,7 +95,6 @@ }} /> - {#if loaded}
@@ -137,28 +131,6 @@ - - - -
diff --git a/src/routes/(app)/admin/settings/+page.svelte b/src/routes/(app)/admin/settings/+page.svelte new file mode 100644 index 000000000..a0a86f435 --- /dev/null +++ b/src/routes/(app)/admin/settings/+page.svelte @@ -0,0 +1,5 @@ + + +