Regression Test (#434)

This commit is contained in:
Yuhong Sun 2023-09-11 19:06:01 -07:00 committed by GitHub
parent 101ff2f392
commit b5fc2a5775
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 223 additions and 2 deletions

View File

@ -30,6 +30,7 @@ from danswer.llm.llm import LLM
from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
from danswer.llm.utils import str_prompt_to_langchain_prompt
from danswer.utils.logger import setup_logger
from danswer.utils.text_processing import clean_up_code_blocks
from danswer.utils.text_processing import escape_newlines
logger = setup_logger()
@ -155,12 +156,14 @@ class SingleMessageScratchpadHandler(QAHandler):
) -> tuple[DanswerAnswer, DanswerQuotes]:
logger.debug(model_output)
answer_start = model_output.find('{"answer":')
model_clean = clean_up_code_blocks(model_output)
answer_start = model_clean.find('{"answer":')
# Only found thoughts, no final answer
if answer_start == -1:
return DanswerAnswer(answer=None), DanswerQuotes(quotes=[])
final_json = escape_newlines(model_output[answer_start:])
final_json = escape_newlines(model_clean[answer_start:])
return process_answer(
final_json, context_chunks, is_json_prompt=self.is_json_output

View File

@ -8,6 +8,10 @@ def escape_newlines(s: str) -> str:
return re.sub(r"(?<!\\)\n", "\\\\n", s)
def clean_up_code_blocks(model_out_raw: str) -> str:
return model_out_raw.strip().strip("```").strip()
def clean_model_quote(quote: str, trim_length: int) -> str:
quote_clean = quote.strip()
if quote_clean[0] == '"':

View File

@ -4,6 +4,7 @@ mypy==1.1.1
pre-commit==3.2.2
reorder-python-imports==3.9.0
ruff==0.0.286
types-PyYAML==6.0.12.11
types-beautifulsoup4==4.12.0.3
types-html5lib==1.1.11.13
types-oauthlib==3.2.0.9

View File

@ -0,0 +1,117 @@
import argparse
from datetime import datetime
import yaml
from sqlalchemy.orm import Session
from danswer.db.engine import get_sqlalchemy_engine
from danswer.direct_qa.answer_question import answer_qa_query
from danswer.server.models import QuestionRequest
engine = get_sqlalchemy_engine()
def load_yaml(filepath: str) -> dict:
with open(filepath, "r") as file:
data = yaml.safe_load(file)
return data
def word_wrap(s: str, max_line_size: int = 120) -> str:
words = s.split()
current_line: list[str] = []
result_lines: list[str] = []
current_length = 0
for word in words:
if len(word) > max_line_size:
if current_line:
result_lines.append(" ".join(current_line))
current_line = []
current_length = 0
result_lines.append(word)
continue
if current_length + len(word) + len(current_line) > max_line_size:
result_lines.append(" ".join(current_line))
current_line = []
current_length = 0
current_line.append(word)
current_length += len(word)
if current_line:
result_lines.append(" ".join(current_line))
return "\n".join(result_lines)
def get_answer_for_question(query: str, db_session: Session) -> str | None:
question = QuestionRequest(
query=query,
collection="danswer_index",
use_keyword=None,
filters=None,
offset=None,
)
answer = answer_qa_query(
question=question,
user=None,
db_session=db_session,
answer_generation_timeout=100,
real_time_flow=False,
)
return answer.answer
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"regression_yaml",
type=str,
help="Path to the Questions YAML file.",
default="./tests/regression/answer_quality/sample_questions.yaml",
nargs="?",
)
parser.add_argument(
"--real-time", action="store_true", help="Set to use the real-time flow."
)
parser.add_argument(
"--output",
type=str,
help="Path to the output results file.",
default="./tests/regression/answer_quality/regression_results.txt",
)
args = parser.parse_args()
questions_data = load_yaml(args.regression_yaml)
with open(args.output, "w") as outfile:
print("Running Question Answering Flow", file=outfile)
with Session(engine, expire_on_commit=False) as db_session:
for sample in questions_data["questions"]:
# This line goes to stdout to track progress
print(f"Running Test for Question {sample['id']}: {sample['question']}")
start_time = datetime.now()
answer = get_answer_for_question(sample["question"], db_session)
end_time = datetime.now()
print(f"====Duration: {end_time - start_time}====", file=outfile)
print(f"Question {sample['id']}:", file=outfile)
print(sample["question"], file=outfile)
print("\nExpected Answer:", file=outfile)
print(sample["expected_answer"], file=outfile)
print("\nActual Answer:", file=outfile)
print(
word_wrap(answer)
if answer
else "Failed, either crashed or refused to answer.",
file=outfile,
)
print("\n\n", file=outfile, flush=True)

View File

@ -0,0 +1,96 @@
# This YAML file contains regression questions for Danswer.
# The sources mentioned are the same ones to power the DanswerBot for the community's use
# The regression flow assumes the data from the sources listed are already indexed
metadata:
version: v0.0.1
date: 2023-09-10
sources:
- name: web
detail: https://www.danswer.ai/
- name: web
detail: https://docs.danswer.dev/
- name: github issues
detail: danswer-ai/danswer
- name: github pull-requests
detail: danswer-ai/danswer
- name: slack
workspace: danswer.slack.com
- name: file
detail: Markdown files from Danswer repo
questions:
- id: 1
question: "What is Danswer?"
expected_answer: "Danswer is an open source question-answering system."
notes: "This comes directly from the docs, the actual answer should be more informative"
- id: 2
question: "What is Danswer licensed under?"
expected_answer: "Danswer is MIT licensed"
notes: "This info can be found in many places"
- id: 3
question: "What are the required variables to set to use GPT-4?"
expected_answer: "Set the environment variables INTERNAL_MODEL_VERSION=openai-chat-completion and GEN_AI_MODEL_VERSION=gpt-4"
notes: "Two env vars are must have, the third (the key) is optional"
- id: 4
question: "Why might I want to use the deberta model for QnA?"
expected_answer: "This kind of model can run on CPU and are less likely to produce hallucinations"
notes: "https://docs.danswer.dev/gen_ai_configs/transformers, this is a pretty hard question"
- id: 5
question: "What auth related tokens do I need for BookStack?"
expected_answer: "You will need the API Token ID and the API Token Secret"
notes: "https://docs.danswer.dev/connectors/bookstack"
- id: 6
question: "ValueError: invalid literal for int() with base 10"
expected_answer: "This was a bug that was fixed shortly after the issue was filed. Try updating the code."
notes: "This question is in Github Issue #290"
- id: 7
question: "Is there support for knowledge sets or document sets?"
expected_answer: "This was requested and approved however it is not clear if the feature is implemented yet."
notes: "This question is in Github Issue #338"
- id: 8
question: "nginx returning 502"
expected_answer: "Google OAuth must be configured for Danswer backend to work. A PR was created to fix it"
notes: "This question is in Github Issue #260"
- id: 9
question: "Why isn't GPT4All enabled by default"
expected_answer: "There is no recent version of GPT4All that is compatible with M1 Mac."
notes: "This question is in Github Issue #232 but also mentioned in several other places"
- id: 10
question: "Why isn't GPT4All enabled by default"
expected_answer: "There is no recent version of GPT4All that is compatible with M1 Mac."
notes: "This question is in Github Issue #232 but also mentioned in several other places"
- id: 11
question: "Why are the models warmed up on server start"
expected_answer: "This ensures that the first indexing isn't really slow."
notes: "This is in Github PR #333"
- id: 12
question: "Why are the models warmed up on server start"
expected_answer: "This ensures that the first indexing isn't really slow."
notes: "This is in Github PR #333"
- id: 13
question: "What text from the Alation Connector is used to generate the docs?"
expected_answer: "Articles are used with the body contents. Schemas, Tables, and Columns use Description"
notes: "This is in Github PR #161"
- id: 14
question: "Does Danswer support PDFs in Google Drive?"
expected_answer: "Yes"
notes: "This question is in Slack, if the message expires due to using free slack version, the info may be gone as well"
- id: 15
question: "I deleted a connector in Danswer but some deleted docs are still showing in search"
expected_answer: "The issue was fixed via a code change, it should go away after pulling the latest code"
notes: "This question is in Slack, if the message expires due to using free slack version, the info may be gone as well"