Fix missing json ()

* initial steps

* k

* remove logs

* k

* k
This commit is contained in:
pablodanswer 2024-11-20 13:24:43 -08:00 committed by GitHub
parent 8309f4a802
commit bf291d0c0a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 15 additions and 5 deletions
backend
danswer
llm/answering/stream_processing
utils
tests/unit/danswer/llm/answering/stream_processing

@ -231,16 +231,16 @@ class QuotesProcessor:
model_previous = self.model_output
self.model_output += token
if not self.found_answer_start:
m = answer_pattern.search(self.model_output)
if m:
self.found_answer_start = True
# Prevent heavy cases of hallucinations
if self.is_json_prompt and len(self.model_output) > 70:
logger.warning("LLM did not produce json as prompted")
if self.is_json_prompt and len(self.model_output) > 400:
self.found_answer_end = True
logger.warning("LLM did not produce json as prompted")
logger.debug("Model output thus far:", self.model_output)
return
remaining = self.model_output[m.end() :]

@ -4,6 +4,10 @@ import re
import string
from urllib.parse import quote
from danswer.utils.logger import setup_logger
logger = setup_logger(__name__)
ESCAPE_SEQUENCE_RE = re.compile(
r"""
@ -77,7 +81,8 @@ def extract_embedded_json(s: str) -> dict:
last_brace_index = s.rfind("}")
if first_brace_index == -1 or last_brace_index == -1:
raise ValueError("No valid json found")
logger.warning("No valid json found, assuming answer is entire string")
return {"answer": s, "quotes": []}
json_str = s[first_brace_index : last_brace_index + 1]
try:

@ -324,8 +324,13 @@ def test_lengthy_prefixed_json_with_quotes() -> None:
assert quotes[0] == "Document"
def test_prefixed_json_with_quotes() -> None:
def test_json_with_lengthy_prefix_and_quotes() -> None:
tokens = [
"*** Based on the provided documents, there does not appear to be any information ",
"directly relevant to answering which documents are my favorite. ",
"The documents seem to be focused on describing the Danswer product ",
"and its features/use cases. Since I do not have personal preferences ",
"for documents, I will provide a general response:\n\n",
"```",
"json",
"\n",