add unit test for quotes (#2085)

* add unit test for quotes

* test answer and quotes together
This commit is contained in:
rkuo-danswer
2024-08-08 11:20:07 -07:00
committed by GitHub
parent c630fcffee
commit be9ed319d5

View File

@@ -2,6 +2,7 @@ import json
from datetime import datetime from datetime import datetime
from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import DanswerAnswerPiece
from danswer.chat.models import DanswerQuotes
from danswer.chat.models import LlmDoc from danswer.chat.models import LlmDoc
from danswer.configs.constants import DocumentSource from danswer.configs.constants import DocumentSource
from danswer.llm.answering.stream_processing.quotes_processing import ( from danswer.llm.answering.stream_processing.quotes_processing import (
@@ -24,168 +25,170 @@ mock_docs = [
] ]
def test_process_model_tokens() -> None: tokens_with_quotes = [
tokens = [ "{",
"{", "\n ",
"\n ", '"answer": "Yes',
'"answer": "Yes', ", Danswer allows",
", Danswer allows", " customized prompts. This",
" customized prompts. This", " feature",
" feature", " is currently being",
" is currently being", " developed and implemente",
" developed and implemente", "d to",
"d to", " improve",
" improve", " the accuracy",
" the accuracy", " of",
" of", " Language",
" Language", " Models (",
" Models (", "LL",
"LL", "Ms) for",
"Ms) for", " different",
" different", " companies",
" companies", ".",
".", " The custom",
" The custom", "ized prompts feature",
"ized prompts feature", " woul",
" woul", "d allow users to ad",
"d allow users to ad", "d person",
"d person", "alized prom",
"alized prom", "pts through",
"pts through", " an",
" an", " interface or",
" interface or", " metho",
" metho", "d,",
"d,", " which would then be used to",
" which would then be used to", " train",
" train", " the LLM.",
" the LLM.", " This enhancement",
" This enhancement", " aims to make",
" aims to make", " Danswer more",
" Danswer more", " adaptable to",
" adaptable to", " different",
" different", " business",
" business", " contexts",
" contexts", " by",
" by", " tail",
" tail", "oring it",
"oring it", " to the specific language",
" to the specific language", " an",
" an", "d terminology",
"d terminology", " used within",
" used within", " a",
" a", " company.",
" company.", " Additionally",
" Additionally", ",",
",", " Danswer already",
" Danswer already", " supports creating",
" supports creating", " custom AI",
" custom AI", " Assistants with",
" Assistants with", " different",
" different", " prom",
" prom", "pts and backing",
"pts and backing", " knowledge",
" knowledge", " sets",
" sets", ",",
",", " which",
" which", " is",
" is", " a form",
" a form", " of prompt",
" of prompt", " customization. However, it",
" customization. However, it", "'s important to nLogging Details LiteLLM-Success Call: Noneote that some",
"'s important to nLogging Details LiteLLM-Success Call: Noneote that some", " aspects",
" aspects", " of prompt",
" of prompt", " customization,",
" customization,", " such as for",
" such as for", " Sl",
" Sl", "ack",
"ack", "b",
"b", "ots, may",
"ots, may", " still",
" still", " be in",
" be in", " development or have",
" development or have", ' limitations.",',
' limitations.",', '\n "quotes": [',
'\n "quotes": [', '\n "We',
'\n "We', " woul",
" woul", "d like to ad",
"d like to ad", "d customized prompts for",
"d customized prompts for", " different",
" different", " companies to improve the accuracy of",
" companies to improve the accuracy of", " Language",
" Language", " Model",
" Model", " (LLM)",
" (LLM)", '.",\n "A',
'.",\n "A', " new",
" new", " feature that",
" feature that", " allows users to add personalize",
" allows users to add personalize", "d prompts.",
"d prompts.", " This would involve",
" This would involve", " creating",
" creating", " an interface or method for",
" an interface or method for", " users to input",
" users to input", " their",
" their", " own",
" own", " prom",
" prom", "pts,",
"pts,", " which would then be used to",
" which would then be used to", ' train the LLM.",',
' train the LLM.",', '\n "Create',
'\n "Create', " custom AI Assistants with",
" custom AI Assistants with", " different prompts and backing knowledge",
" different prompts and backing knowledge", ' sets.",',
' sets.",', '\n "This',
'\n "This', " PR",
" PR", " fixes",
" fixes", " https",
" https", "://github.com/dan",
"://github.com/dan", "swer-ai/dan",
"swer-ai/dan", "swer/issues/1",
"swer/issues/1", "584",
"584", " by",
" by", " setting",
" setting", " the system",
" the system", " default",
" default", " prompt for",
" prompt for", " sl",
" sl", "ackbots const",
"ackbots const", "rained by",
"rained by", " ",
" ", "document sets",
"document sets", ".",
".", " It",
" It", " probably",
" probably", " isn",
" isn", "'t ideal",
"'t ideal", " -",
" -", " it",
" it", " might",
" might", " be pref",
" be pref", "erable to be",
"erable to be", " able to select",
" able to select", " a prompt for",
" a prompt for", " the",
" the", " slackbot from",
" slackbot from", " the",
" the", " admin",
" admin", " panel",
" panel", " -",
" -", " but it sol",
" but it sol", "ves the immediate problem",
"ves the immediate problem", " of",
" of", " the slack",
" the slack", " listener",
" listener", " cr",
" cr", "ashing when",
"ashing when", " configure",
" configure", "d this",
"d this", ' way."\n ]',
' way."\n ]', "\n}",
"\n}", "",
"", ]
]
gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs)
s_json = "".join(tokens)
def test_process_model_tokens_answer() -> None:
gen = process_model_tokens(tokens=iter(tokens_with_quotes), context_docs=mock_docs)
s_json = "".join(tokens_with_quotes)
j = json.loads(s_json) j = json.loads(s_json)
expected_answer = j["answer"] expected_answer = j["answer"]
actual = "" actual = ""
@@ -278,3 +281,37 @@ def test_json_answer_split_tokens() -> None:
) )
assert expected_answer == actual assert expected_answer == actual
def test_prefixed_json_with_quotes() -> None:
tokens = [
"```",
"json",
"\n",
"{",
'"answer": "This is a simple ',
"answer.",
'",\n"',
'quotes": ["Document"]',
"\n}",
"\n",
"```",
]
gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs)
actual_answer = ""
actual_count = 0
for o in gen:
if isinstance(o, DanswerAnswerPiece):
if o.answer_piece:
actual_answer += o.answer_piece
continue
if isinstance(o, DanswerQuotes):
for q in o.quotes:
assert q.quote == "Document"
actual_count += 1
assert "This is a simple answer." == actual_answer
assert 1 == actual_count