diff --git a/backend/danswer/llm/answering/stream_processing/quotes_processing.py b/backend/danswer/llm/answering/stream_processing/quotes_processing.py index 10d15b7195..b5f88804f2 100644 --- a/backend/danswer/llm/answering/stream_processing/quotes_processing.py +++ b/backend/danswer/llm/answering/stream_processing/quotes_processing.py @@ -17,7 +17,6 @@ from danswer.chat.models import LlmDoc from danswer.configs.chat_configs import QUOTE_ALLOWED_ERROR_PERCENT from danswer.prompts.constants import ANSWER_PAT from danswer.prompts.constants import QUOTE_PAT -from danswer.prompts.constants import UNCERTAINTY_PAT from danswer.search.models import InferenceChunk from danswer.utils.logger import setup_logger from danswer.utils.text_processing import clean_model_quote @@ -28,6 +27,8 @@ from danswer.utils.text_processing import shared_precompare_cleanup logger = setup_logger() +answer_pattern = re.compile(r'{\s*"answer"\s*:\s*"') + def _extract_answer_quotes_freeform( answer_raw: str, @@ -166,11 +167,8 @@ def process_answer( into an Answer and Quotes AND (2) after the complete streaming response has been received to process the model output into an Answer and Quotes.""" answer, quote_strings = separate_answer_quotes(answer_raw, is_json_prompt) - if answer == UNCERTAINTY_PAT or not answer: - if answer == UNCERTAINTY_PAT: - logger.debug("Answer matched UNCERTAINTY_PAT") - else: - logger.debug("No answer extracted from raw output") + if not answer: + logger.debug("No answer extracted from raw output") return DanswerAnswer(answer=None), DanswerQuotes(quotes=[]) logger.info(f"Answer: {answer}") @@ -227,22 +225,26 @@ def process_model_tokens( found_answer_start = False if is_json_prompt else True found_answer_end = False hold_quote = "" + for token in tokens: model_previous = model_output model_output += token - if not found_answer_start and '{"answer":"' in re.sub(r"\s", "", model_output): - # Note, if the token that completes the pattern has additional text, for example if the token is "? - # Then the chars after " will not be streamed, but this is ok as it prevents streaming the ? in the - # event that the model outputs the UNCERTAINTY_PAT - found_answer_start = True + if not found_answer_start: + m = answer_pattern.match(model_output) + if m: + found_answer_start = True - # Prevent heavy cases of hallucinations where model is not even providing a json until later - if is_json_prompt and len(model_output) > 40: - logger.warning("LLM did not produce json as prompted") - found_answer_end = True + # Prevent heavy cases of hallucinations where model is not even providing a json until later + if is_json_prompt and len(model_output) > 40: + logger.warning("LLM did not produce json as prompted") + found_answer_end = True + continue - continue + remaining = model_output[m.end() :] + if len(remaining) > 0: + yield DanswerAnswerPiece(answer_piece=remaining) + continue if found_answer_start and not found_answer_end: if is_json_prompt and _stream_json_answer_end(model_previous, token): diff --git a/backend/danswer/prompts/direct_qa_prompts.py b/backend/danswer/prompts/direct_qa_prompts.py index 64a704fa69..1676896393 100644 --- a/backend/danswer/prompts/direct_qa_prompts.py +++ b/backend/danswer/prompts/direct_qa_prompts.py @@ -7,7 +7,6 @@ from danswer.prompts.constants import FINAL_QUERY_PAT from danswer.prompts.constants import GENERAL_SEP_PAT from danswer.prompts.constants import QUESTION_PAT from danswer.prompts.constants import THOUGHT_PAT -from danswer.prompts.constants import UNCERTAINTY_PAT ONE_SHOT_SYSTEM_PROMPT = """ @@ -66,9 +65,6 @@ EMPTY_SAMPLE_JSON = { } -ANSWER_NOT_FOUND_RESPONSE = f'{{"answer": "{UNCERTAINTY_PAT}", "quotes": []}}' - - # Default json prompt which can reference multiple docs and provide answer + quotes # system_like_header is similar to system message, can be user provided or defaults to QA_HEADER # context/history blocks are for context documents and conversation history, they can be blank diff --git a/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py b/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py new file mode 100644 index 0000000000..89494f4bf8 --- /dev/null +++ b/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py @@ -0,0 +1,197 @@ +import json +from datetime import datetime + +from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.models import LlmDoc +from danswer.configs.constants import DocumentSource +from danswer.llm.answering.stream_processing.quotes_processing import ( + process_model_tokens, +) + +mock_docs = [ + LlmDoc( + document_id=f"doc_{int(id/2)}", + content="Document is a doc", + blurb=f"Document #{id}", + semantic_identifier=f"Doc {id}", + source_type=DocumentSource.WEB, + metadata={}, + updated_at=datetime.now(), + link=f"https://{int(id/2)}.com" if int(id / 2) % 2 == 0 else None, + source_links={0: "https://mintlify.com/docs/settings/broken-links"}, + ) + for id in range(10) +] + + +def test_process_model_tokens() -> None: + tokens = [ + "{", + "\n ", + '"answer": "Yes', + ", Danswer allows", + " customized prompts. This", + " feature", + " is currently being", + " developed and implemente", + "d to", + " improve", + " the accuracy", + " of", + " Language", + " Models (", + "LL", + "Ms) for", + " different", + " companies", + ".", + " The custom", + "ized prompts feature", + " woul", + "d allow users to ad", + "d person", + "alized prom", + "pts through", + " an", + " interface or", + " metho", + "d,", + " which would then be used to", + " train", + " the LLM.", + " This enhancement", + " aims to make", + " Danswer more", + " adaptable to", + " different", + " business", + " contexts", + " by", + " tail", + "oring it", + " to the specific language", + " an", + "d terminology", + " used within", + " a", + " company.", + " Additionally", + ",", + " Danswer already", + " supports creating", + " custom AI", + " Assistants with", + " different", + " prom", + "pts and backing", + " knowledge", + " sets", + ",", + " which", + " is", + " a form", + " of prompt", + " customization. However, it", + "'s important to nLogging Details LiteLLM-Success Call: Noneote that some", + " aspects", + " of prompt", + " customization,", + " such as for", + " Sl", + "ack", + "b", + "ots, may", + " still", + " be in", + " development or have", + ' limitations.",', + '\n "quotes": [', + '\n "We', + " woul", + "d like to ad", + "d customized prompts for", + " different", + " companies to improve the accuracy of", + " Language", + " Model", + " (LLM)", + '.",\n "A', + " new", + " feature that", + " allows users to add personalize", + "d prompts.", + " This would involve", + " creating", + " an interface or method for", + " users to input", + " their", + " own", + " prom", + "pts,", + " which would then be used to", + ' train the LLM.",', + '\n "Create', + " custom AI Assistants with", + " different prompts and backing knowledge", + ' sets.",', + '\n "This', + " PR", + " fixes", + " https", + "://github.com/dan", + "swer-ai/dan", + "swer/issues/1", + "584", + " by", + " setting", + " the system", + " default", + " prompt for", + " sl", + "ackbots const", + "rained by", + " ", + "document sets", + ".", + " It", + " probably", + " isn", + "'t ideal", + " -", + " it", + " might", + " be pref", + "erable to be", + " able to select", + " a prompt for", + " the", + " slackbot from", + " the", + " admin", + " panel", + " -", + " but it sol", + "ves the immediate problem", + " of", + " the slack", + " listener", + " cr", + "ashing when", + " configure", + "d this", + ' way."\n ]', + "\n}", + "", + ] + gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs) + + s_json = "".join(tokens) + j = json.loads(s_json) + expected_answer = j["answer"] + actual = "" + for o in gen: + if isinstance(o, DanswerAnswerPiece): + if o.answer_piece: + actual += o.answer_piece + + assert expected_answer == actual