mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-29 21:37:21 +02:00
add unit test for quotes (#2085)
* add unit test for quotes * test answer and quotes together
This commit is contained in:
@@ -2,6 +2,7 @@ import json
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from danswer.chat.models import DanswerAnswerPiece
|
from danswer.chat.models import DanswerAnswerPiece
|
||||||
|
from danswer.chat.models import DanswerQuotes
|
||||||
from danswer.chat.models import LlmDoc
|
from danswer.chat.models import LlmDoc
|
||||||
from danswer.configs.constants import DocumentSource
|
from danswer.configs.constants import DocumentSource
|
||||||
from danswer.llm.answering.stream_processing.quotes_processing import (
|
from danswer.llm.answering.stream_processing.quotes_processing import (
|
||||||
@@ -24,168 +25,170 @@ mock_docs = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_process_model_tokens() -> None:
|
tokens_with_quotes = [
|
||||||
tokens = [
|
"{",
|
||||||
"{",
|
"\n ",
|
||||||
"\n ",
|
'"answer": "Yes',
|
||||||
'"answer": "Yes',
|
", Danswer allows",
|
||||||
", Danswer allows",
|
" customized prompts. This",
|
||||||
" customized prompts. This",
|
" feature",
|
||||||
" feature",
|
" is currently being",
|
||||||
" is currently being",
|
" developed and implemente",
|
||||||
" developed and implemente",
|
"d to",
|
||||||
"d to",
|
" improve",
|
||||||
" improve",
|
" the accuracy",
|
||||||
" the accuracy",
|
" of",
|
||||||
" of",
|
" Language",
|
||||||
" Language",
|
" Models (",
|
||||||
" Models (",
|
"LL",
|
||||||
"LL",
|
"Ms) for",
|
||||||
"Ms) for",
|
" different",
|
||||||
" different",
|
" companies",
|
||||||
" companies",
|
".",
|
||||||
".",
|
" The custom",
|
||||||
" The custom",
|
"ized prompts feature",
|
||||||
"ized prompts feature",
|
" woul",
|
||||||
" woul",
|
"d allow users to ad",
|
||||||
"d allow users to ad",
|
"d person",
|
||||||
"d person",
|
"alized prom",
|
||||||
"alized prom",
|
"pts through",
|
||||||
"pts through",
|
" an",
|
||||||
" an",
|
" interface or",
|
||||||
" interface or",
|
" metho",
|
||||||
" metho",
|
"d,",
|
||||||
"d,",
|
" which would then be used to",
|
||||||
" which would then be used to",
|
" train",
|
||||||
" train",
|
" the LLM.",
|
||||||
" the LLM.",
|
" This enhancement",
|
||||||
" This enhancement",
|
" aims to make",
|
||||||
" aims to make",
|
" Danswer more",
|
||||||
" Danswer more",
|
" adaptable to",
|
||||||
" adaptable to",
|
" different",
|
||||||
" different",
|
" business",
|
||||||
" business",
|
" contexts",
|
||||||
" contexts",
|
" by",
|
||||||
" by",
|
" tail",
|
||||||
" tail",
|
"oring it",
|
||||||
"oring it",
|
" to the specific language",
|
||||||
" to the specific language",
|
" an",
|
||||||
" an",
|
"d terminology",
|
||||||
"d terminology",
|
" used within",
|
||||||
" used within",
|
" a",
|
||||||
" a",
|
" company.",
|
||||||
" company.",
|
" Additionally",
|
||||||
" Additionally",
|
",",
|
||||||
",",
|
" Danswer already",
|
||||||
" Danswer already",
|
" supports creating",
|
||||||
" supports creating",
|
" custom AI",
|
||||||
" custom AI",
|
" Assistants with",
|
||||||
" Assistants with",
|
" different",
|
||||||
" different",
|
" prom",
|
||||||
" prom",
|
"pts and backing",
|
||||||
"pts and backing",
|
" knowledge",
|
||||||
" knowledge",
|
" sets",
|
||||||
" sets",
|
",",
|
||||||
",",
|
" which",
|
||||||
" which",
|
" is",
|
||||||
" is",
|
" a form",
|
||||||
" a form",
|
" of prompt",
|
||||||
" of prompt",
|
" customization. However, it",
|
||||||
" customization. However, it",
|
"'s important to nLogging Details LiteLLM-Success Call: Noneote that some",
|
||||||
"'s important to nLogging Details LiteLLM-Success Call: Noneote that some",
|
" aspects",
|
||||||
" aspects",
|
" of prompt",
|
||||||
" of prompt",
|
" customization,",
|
||||||
" customization,",
|
" such as for",
|
||||||
" such as for",
|
" Sl",
|
||||||
" Sl",
|
"ack",
|
||||||
"ack",
|
"b",
|
||||||
"b",
|
"ots, may",
|
||||||
"ots, may",
|
" still",
|
||||||
" still",
|
" be in",
|
||||||
" be in",
|
" development or have",
|
||||||
" development or have",
|
' limitations.",',
|
||||||
' limitations.",',
|
'\n "quotes": [',
|
||||||
'\n "quotes": [',
|
'\n "We',
|
||||||
'\n "We',
|
" woul",
|
||||||
" woul",
|
"d like to ad",
|
||||||
"d like to ad",
|
"d customized prompts for",
|
||||||
"d customized prompts for",
|
" different",
|
||||||
" different",
|
" companies to improve the accuracy of",
|
||||||
" companies to improve the accuracy of",
|
" Language",
|
||||||
" Language",
|
" Model",
|
||||||
" Model",
|
" (LLM)",
|
||||||
" (LLM)",
|
'.",\n "A',
|
||||||
'.",\n "A',
|
" new",
|
||||||
" new",
|
" feature that",
|
||||||
" feature that",
|
" allows users to add personalize",
|
||||||
" allows users to add personalize",
|
"d prompts.",
|
||||||
"d prompts.",
|
" This would involve",
|
||||||
" This would involve",
|
" creating",
|
||||||
" creating",
|
" an interface or method for",
|
||||||
" an interface or method for",
|
" users to input",
|
||||||
" users to input",
|
" their",
|
||||||
" their",
|
" own",
|
||||||
" own",
|
" prom",
|
||||||
" prom",
|
"pts,",
|
||||||
"pts,",
|
" which would then be used to",
|
||||||
" which would then be used to",
|
' train the LLM.",',
|
||||||
' train the LLM.",',
|
'\n "Create',
|
||||||
'\n "Create',
|
" custom AI Assistants with",
|
||||||
" custom AI Assistants with",
|
" different prompts and backing knowledge",
|
||||||
" different prompts and backing knowledge",
|
' sets.",',
|
||||||
' sets.",',
|
'\n "This',
|
||||||
'\n "This',
|
" PR",
|
||||||
" PR",
|
" fixes",
|
||||||
" fixes",
|
" https",
|
||||||
" https",
|
"://github.com/dan",
|
||||||
"://github.com/dan",
|
"swer-ai/dan",
|
||||||
"swer-ai/dan",
|
"swer/issues/1",
|
||||||
"swer/issues/1",
|
"584",
|
||||||
"584",
|
" by",
|
||||||
" by",
|
" setting",
|
||||||
" setting",
|
" the system",
|
||||||
" the system",
|
" default",
|
||||||
" default",
|
" prompt for",
|
||||||
" prompt for",
|
" sl",
|
||||||
" sl",
|
"ackbots const",
|
||||||
"ackbots const",
|
"rained by",
|
||||||
"rained by",
|
" ",
|
||||||
" ",
|
"document sets",
|
||||||
"document sets",
|
".",
|
||||||
".",
|
" It",
|
||||||
" It",
|
" probably",
|
||||||
" probably",
|
" isn",
|
||||||
" isn",
|
"'t ideal",
|
||||||
"'t ideal",
|
" -",
|
||||||
" -",
|
" it",
|
||||||
" it",
|
" might",
|
||||||
" might",
|
" be pref",
|
||||||
" be pref",
|
"erable to be",
|
||||||
"erable to be",
|
" able to select",
|
||||||
" able to select",
|
" a prompt for",
|
||||||
" a prompt for",
|
" the",
|
||||||
" the",
|
" slackbot from",
|
||||||
" slackbot from",
|
" the",
|
||||||
" the",
|
" admin",
|
||||||
" admin",
|
" panel",
|
||||||
" panel",
|
" -",
|
||||||
" -",
|
" but it sol",
|
||||||
" but it sol",
|
"ves the immediate problem",
|
||||||
"ves the immediate problem",
|
" of",
|
||||||
" of",
|
" the slack",
|
||||||
" the slack",
|
" listener",
|
||||||
" listener",
|
" cr",
|
||||||
" cr",
|
"ashing when",
|
||||||
"ashing when",
|
" configure",
|
||||||
" configure",
|
"d this",
|
||||||
"d this",
|
' way."\n ]',
|
||||||
' way."\n ]',
|
"\n}",
|
||||||
"\n}",
|
"",
|
||||||
"",
|
]
|
||||||
]
|
|
||||||
gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs)
|
|
||||||
|
|
||||||
s_json = "".join(tokens)
|
|
||||||
|
def test_process_model_tokens_answer() -> None:
|
||||||
|
gen = process_model_tokens(tokens=iter(tokens_with_quotes), context_docs=mock_docs)
|
||||||
|
|
||||||
|
s_json = "".join(tokens_with_quotes)
|
||||||
j = json.loads(s_json)
|
j = json.loads(s_json)
|
||||||
expected_answer = j["answer"]
|
expected_answer = j["answer"]
|
||||||
actual = ""
|
actual = ""
|
||||||
@@ -278,3 +281,37 @@ def test_json_answer_split_tokens() -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert expected_answer == actual
|
assert expected_answer == actual
|
||||||
|
|
||||||
|
|
||||||
|
def test_prefixed_json_with_quotes() -> None:
|
||||||
|
tokens = [
|
||||||
|
"```",
|
||||||
|
"json",
|
||||||
|
"\n",
|
||||||
|
"{",
|
||||||
|
'"answer": "This is a simple ',
|
||||||
|
"answer.",
|
||||||
|
'",\n"',
|
||||||
|
'quotes": ["Document"]',
|
||||||
|
"\n}",
|
||||||
|
"\n",
|
||||||
|
"```",
|
||||||
|
]
|
||||||
|
|
||||||
|
gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs)
|
||||||
|
|
||||||
|
actual_answer = ""
|
||||||
|
actual_count = 0
|
||||||
|
for o in gen:
|
||||||
|
if isinstance(o, DanswerAnswerPiece):
|
||||||
|
if o.answer_piece:
|
||||||
|
actual_answer += o.answer_piece
|
||||||
|
continue
|
||||||
|
|
||||||
|
if isinstance(o, DanswerQuotes):
|
||||||
|
for q in o.quotes:
|
||||||
|
assert q.quote == "Document"
|
||||||
|
actual_count += 1
|
||||||
|
|
||||||
|
assert "This is a simple answer." == actual_answer
|
||||||
|
assert 1 == actual_count
|
||||||
|
Reference in New Issue
Block a user