From 20c4cdbdda6b85d27a323b1606a0d9a603281f71 Mon Sep 17 00:00:00 2001
From: Yuhong Sun <yuhongsun96@gmail.com>
Date: Wed, 26 Jun 2024 10:44:22 -0700
Subject: [PATCH] Catch LLM Generation Failure (#1712)

---
 backend/danswer/llm/chat_llm.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/backend/danswer/llm/chat_llm.py b/backend/danswer/llm/chat_llm.py
index d450efa91..f5c3f34ac 100644
--- a/backend/danswer/llm/chat_llm.py
+++ b/backend/danswer/llm/chat_llm.py
@@ -5,6 +5,7 @@ from typing import Any
 from typing import cast
 
 import litellm  # type: ignore
+from httpx import RemoteProtocolError
 from langchain.schema.language_model import LanguageModelInput
 from langchain_core.messages import AIMessage
 from langchain_core.messages import AIMessageChunk
@@ -338,17 +339,23 @@ class DefaultMultiLLM(LLM):
 
         output = None
         response = self._completion(prompt, tools, tool_choice, True)
-        for part in response:
-            if len(part["choices"]) == 0:
-                continue
-            delta = part["choices"][0]["delta"]
-            message_chunk = _convert_delta_to_message_chunk(delta, output)
-            if output is None:
-                output = message_chunk
-            else:
-                output += message_chunk
+        try:
+            for part in response:
+                if len(part["choices"]) == 0:
+                    continue
+                delta = part["choices"][0]["delta"]
+                message_chunk = _convert_delta_to_message_chunk(delta, output)
+                if output is None:
+                    output = message_chunk
+                else:
+                    output += message_chunk
 
-            yield message_chunk
+                yield message_chunk
+
+        except RemoteProtocolError:
+            raise RuntimeError(
+                "The AI model failed partway through generation, please try again."
+            )
 
         if LOG_ALL_MODEL_INTERACTIONS and output:
             content = output.content or ""