don't skip the start of the json answer value (#2067)

2025-06-04 03:59:25 +02:00 · 2024-08-06 16:59:13 -07:00 · 2024-08-06 16:59:13 -07:00 · fcc4c30ead
commit fcc4c30ead
parent f20984ea1d
3 changed files with 215 additions and 20 deletions
--- a/backend/danswer/llm/answering/stream_processing/quotes_processing.py
+++ b/backend/danswer/llm/answering/stream_processing/quotes_processing.py
@ -17,7 +17,6 @@ from danswer.chat.models import LlmDoc
 from danswer.configs.chat_configs import QUOTE_ALLOWED_ERROR_PERCENT
 from danswer.prompts.constants import ANSWER_PAT
 from danswer.prompts.constants import QUOTE_PAT
-from danswer.prompts.constants import UNCERTAINTY_PAT
 from danswer.search.models import InferenceChunk
 from danswer.utils.logger import setup_logger
 from danswer.utils.text_processing import clean_model_quote
@ -28,6 +27,8 @@ from danswer.utils.text_processing import shared_precompare_cleanup

 logger = setup_logger()

+answer_pattern = re.compile(r'{\s*"answer"\s*:\s*"')
+

 def _extract_answer_quotes_freeform(
    answer_raw: str,
@ -166,11 +167,8 @@ def process_answer(
    into an Answer and Quotes AND (2) after the complete streaming response
    has been received to process the model output into an Answer and Quotes."""
    answer, quote_strings = separate_answer_quotes(answer_raw, is_json_prompt)
-    if answer == UNCERTAINTY_PAT or not answer:
-        if answer == UNCERTAINTY_PAT:
-            logger.debug("Answer matched UNCERTAINTY_PAT")
-        else:
-            logger.debug("No answer extracted from raw output")
+    if not answer:
+        logger.debug("No answer extracted from raw output")
        return DanswerAnswer(answer=None), DanswerQuotes(quotes=[])

    logger.info(f"Answer: {answer}")
@ -227,22 +225,26 @@ def process_model_tokens(
    found_answer_start = False if is_json_prompt else True
    found_answer_end = False
    hold_quote = ""
+
    for token in tokens:
        model_previous = model_output
        model_output += token

-        if not found_answer_start and '{"answer":"' in re.sub(r"\s", "", model_output):
-            # Note, if the token that completes the pattern has additional text, for example if the token is "?
-            # Then the chars after " will not be streamed, but this is ok as it prevents streaming the ? in the
-            # event that the model outputs the UNCERTAINTY_PAT
-            found_answer_start = True
+        if not found_answer_start:
+            m = answer_pattern.match(model_output)
+            if m:
+                found_answer_start = True

-            # Prevent heavy cases of hallucinations where model is not even providing a json until later
-            if is_json_prompt and len(model_output) > 40:
-                logger.warning("LLM did not produce json as prompted")
-                found_answer_end = True
+                # Prevent heavy cases of hallucinations where model is not even providing a json until later
+                if is_json_prompt and len(model_output) > 40:
+                    logger.warning("LLM did not produce json as prompted")
+                    found_answer_end = True
+                    continue

-            continue
+                remaining = model_output[m.end() :]
+                if len(remaining) > 0:
+                    yield DanswerAnswerPiece(answer_piece=remaining)
+                continue

        if found_answer_start and not found_answer_end:
            if is_json_prompt and _stream_json_answer_end(model_previous, token):
--- a/backend/danswer/prompts/direct_qa_prompts.py
+++ b/backend/danswer/prompts/direct_qa_prompts.py
@ -7,7 +7,6 @@ from danswer.prompts.constants import FINAL_QUERY_PAT
 from danswer.prompts.constants import GENERAL_SEP_PAT
 from danswer.prompts.constants import QUESTION_PAT
 from danswer.prompts.constants import THOUGHT_PAT
-from danswer.prompts.constants import UNCERTAINTY_PAT


 ONE_SHOT_SYSTEM_PROMPT = """
@ -66,9 +65,6 @@ EMPTY_SAMPLE_JSON = {
 }


-ANSWER_NOT_FOUND_RESPONSE = f'{{"answer": "{UNCERTAINTY_PAT}", "quotes": []}}'
-
-
 # Default json prompt which can reference multiple docs and provide answer + quotes
 # system_like_header is similar to system message, can be user provided or defaults to QA_HEADER
 # context/history blocks are for context documents and conversation history, they can be blank
--- a/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py
+++ b/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py
@ -0,0 +1,197 @@
+import json
+from datetime import datetime
+
+from danswer.chat.models import DanswerAnswerPiece
+from danswer.chat.models import LlmDoc
+from danswer.configs.constants import DocumentSource
+from danswer.llm.answering.stream_processing.quotes_processing import (
+    process_model_tokens,
+)
+
+mock_docs = [
+    LlmDoc(
+        document_id=f"doc_{int(id/2)}",
+        content="Document is a doc",
+        blurb=f"Document #{id}",
+        semantic_identifier=f"Doc {id}",
+        source_type=DocumentSource.WEB,
+        metadata={},
+        updated_at=datetime.now(),
+        link=f"https://{int(id/2)}.com" if int(id / 2) % 2 == 0 else None,
+        source_links={0: "https://mintlify.com/docs/settings/broken-links"},
+    )
+    for id in range(10)
+]
+
+
+def test_process_model_tokens() -> None:
+    tokens = [
+        "{",
+        "\n  ",
+        '"answer": "Yes',
+        ", Danswer allows",
+        " customized prompts. This",
+        " feature",
+        " is currently being",
+        " developed and implemente",
+        "d to",
+        " improve",
+        " the accuracy",
+        " of",
+        " Language",
+        " Models (",
+        "LL",
+        "Ms) for",
+        " different",
+        " companies",
+        ".",
+        " The custom",
+        "ized prompts feature",
+        " woul",
+        "d allow users to ad",
+        "d person",
+        "alized prom",
+        "pts through",
+        " an",
+        " interface or",
+        " metho",
+        "d,",
+        " which would then be used to",
+        " train",
+        " the LLM.",
+        " This enhancement",
+        " aims to make",
+        " Danswer more",
+        " adaptable to",
+        " different",
+        " business",
+        " contexts",
+        " by",
+        " tail",
+        "oring it",
+        " to the specific language",
+        " an",
+        "d terminology",
+        " used within",
+        " a",
+        " company.",
+        " Additionally",
+        ",",
+        " Danswer already",
+        " supports creating",
+        " custom AI",
+        " Assistants with",
+        " different",
+        " prom",
+        "pts and backing",
+        " knowledge",
+        " sets",
+        ",",
+        " which",
+        " is",
+        " a form",
+        " of prompt",
+        " customization. However, it",
+        "'s important to nLogging Details LiteLLM-Success Call: Noneote that some",
+        " aspects",
+        " of prompt",
+        " customization,",
+        " such as for",
+        " Sl",
+        "ack",
+        "b",
+        "ots, may",
+        " still",
+        " be in",
+        " development or have",
+        ' limitations.",',
+        '\n  "quotes": [',
+        '\n    "We',
+        " woul",
+        "d like to ad",
+        "d customized prompts for",
+        " different",
+        " companies to improve the accuracy of",
+        " Language",
+        " Model",
+        " (LLM)",
+        '.",\n    "A',
+        " new",
+        " feature that",
+        " allows users to add personalize",
+        "d prompts.",
+        " This would involve",
+        " creating",
+        " an interface or method for",
+        " users to input",
+        " their",
+        " own",
+        " prom",
+        "pts,",
+        " which would then be used to",
+        ' train the LLM.",',
+        '\n    "Create',
+        " custom AI Assistants with",
+        " different prompts and backing knowledge",
+        ' sets.",',
+        '\n    "This',
+        " PR",
+        " fixes",
+        " https",
+        "://github.com/dan",
+        "swer-ai/dan",
+        "swer/issues/1",
+        "584",
+        " by",
+        " setting",
+        " the system",
+        " default",
+        " prompt for",
+        " sl",
+        "ackbots const",
+        "rained by",
+        " ",
+        "document sets",
+        ".",
+        " It",
+        " probably",
+        " isn",
+        "'t ideal",
+        " -",
+        " it",
+        " might",
+        " be pref",
+        "erable to be",
+        " able to select",
+        " a prompt for",
+        " the",
+        " slackbot from",
+        " the",
+        " admin",
+        " panel",
+        " -",
+        " but it sol",
+        "ves the immediate problem",
+        " of",
+        " the slack",
+        " listener",
+        " cr",
+        "ashing when",
+        " configure",
+        "d this",
+        ' way."\n  ]',
+        "\n}",
+        "",
+    ]
+    gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs)
+
+    s_json = "".join(tokens)
+    j = json.loads(s_json)
+    expected_answer = j["answer"]
+    actual = ""
+    for o in gen:
+        if isinstance(o, DanswerAnswerPiece):
+            if o.answer_piece:
+                actual += o.answer_piece
+
+    assert expected_answer == actual