Provide Metadata to the LLM (#740)

This commit is contained in:
Yuhong Sun
2023-11-19 12:28:45 -08:00
committed by GitHub
parent 6fb07d20cc
commit f72825cd46
5 changed files with 48 additions and 15 deletions

View File

@@ -65,6 +65,38 @@ class QAHandler(abc.ABC):
) )
# Maps connector enum string to a more natural language representation for the LLM
# If not on the list, uses the original but slightly cleaned up, see below
CONNECTOR_NAME_MAP = {
"web": "Website",
"requesttracker": "Request Tracker",
"github": "GitHub",
"file": "File Upload",
}
def clean_up_source(source_str: str) -> str:
if source_str in CONNECTOR_NAME_MAP:
return CONNECTOR_NAME_MAP[source_str]
return source_str.replace("_", " ").title()
def build_context_str(
context_chunks: list[InferenceChunk],
include_metadata: bool = True,
) -> str:
context = ""
for chunk in context_chunks:
if include_metadata:
context += f"NEW DOCUMENT: {chunk.semantic_identifier}\n"
context += f"Source: {clean_up_source(chunk.source_type)}\n"
if chunk.updated_at:
update_str = chunk.updated_at.strftime("%B %d, %Y %H:%M")
context += f"Updated: {update_str}\n"
context += f"{CODE_BLOCK_PAT.format(chunk.content.strip())}\n\n\n"
return context.strip()
class WeakLLMQAHandler(QAHandler): class WeakLLMQAHandler(QAHandler):
"""Since Danswer supports a variety of LLMs, this less demanding prompt is provided """Since Danswer supports a variety of LLMs, this less demanding prompt is provided
as an option to use with weaker LLMs such as small version, low float precision, quantized, as an option to use with weaker LLMs such as small version, low float precision, quantized,
@@ -95,9 +127,7 @@ class SingleMessageQAHandler(QAHandler):
context_chunks: list[InferenceChunk], context_chunks: list[InferenceChunk],
use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
) -> list[BaseMessage]: ) -> list[BaseMessage]:
context_docs_str = "\n".join( context_docs_str = build_context_str(context_chunks)
f"\n{CODE_BLOCK_PAT.format(c.content)}\n" for c in context_chunks
)
single_message = JSON_PROMPT.format( single_message = JSON_PROMPT.format(
context_docs_str=context_docs_str, context_docs_str=context_docs_str,
@@ -123,9 +153,7 @@ class SingleMessageScratchpadHandler(QAHandler):
context_chunks: list[InferenceChunk], context_chunks: list[InferenceChunk],
use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION), use_language_hint: bool = bool(MULTILINGUAL_QUERY_EXPANSION),
) -> list[BaseMessage]: ) -> list[BaseMessage]:
context_docs_str = "\n".join( context_docs_str = build_context_str(context_chunks)
f"\n{CODE_BLOCK_PAT.format(c.content)}\n" for c in context_chunks
)
single_message = COT_PROMPT.format( single_message = COT_PROMPT.format(
context_docs_str=context_docs_str, context_docs_str=context_docs_str,

View File

@@ -349,7 +349,8 @@ def get_chunks_for_qa(
# We calculate it live in case the user uses a different LLM + tokenizer # We calculate it live in case the user uses a different LLM + tokenizer
chunk_token = check_number_of_tokens(chunk.content) chunk_token = check_number_of_tokens(chunk.content)
token_count += chunk_token # 50 for an approximate/slight overestimate for # tokens for metadata for the chunk
token_count += chunk_token + 50
# Always use at least 1 chunk # Always use at least 1 chunk
if token_count <= token_limit or not latest_batch_indices: if token_count <= token_limit or not latest_batch_indices:

View File

@@ -80,7 +80,7 @@ class DocMetadataAwareIndexChunk(IndexChunk):
@dataclass @dataclass
class InferenceChunk(BaseChunk): class InferenceChunk(BaseChunk):
document_id: str document_id: str
source_type: str source_type: str # This is the string value of the enum already like "web"
semantic_identifier: str semantic_identifier: str
boost: int boost: int
recency_bias: float recency_bias: float

View File

@@ -1,4 +1,4 @@
GENERAL_SEP_PAT = "-----" GENERAL_SEP_PAT = "--------------" # Same length as Langchain's separator
CODE_BLOCK_PAT = "```\n{}\n```" CODE_BLOCK_PAT = "```\n{}\n```"
QUESTION_PAT = "Query:" QUESTION_PAT = "Query:"
THOUGHT_PAT = "Thought:" THOUGHT_PAT = "Thought:"

View File

@@ -49,15 +49,17 @@ ANSWER_NOT_FOUND_RESPONSE = f'{{"answer": "{UNCERTAINTY_PAT}", "quotes": []}}'
JSON_PROMPT = f""" JSON_PROMPT = f"""
{QA_HEADER} {QA_HEADER}
{REQUIRE_JSON} {REQUIRE_JSON}
{GENERAL_SEP_PAT}
CONTEXT: CONTEXT:
{GENERAL_SEP_PAT}
{{context_docs_str}} {{context_docs_str}}
{GENERAL_SEP_PAT} {GENERAL_SEP_PAT}
SAMPLE_RESPONSE: SAMPLE_RESPONSE:
``` ```
{{{json.dumps(EMPTY_SAMPLE_JSON)}}} {{{json.dumps(EMPTY_SAMPLE_JSON)}}}
``` ```
{QUESTION_PAT} {{user_query}} {QUESTION_PAT.upper()} {{user_query}}
{JSON_HELPFUL_HINT} {JSON_HELPFUL_HINT}
{{language_hint_or_none}} {{language_hint_or_none}}
""".strip() """.strip()
@@ -68,10 +70,12 @@ SAMPLE_RESPONSE:
# COT (chain-of-thought) flow basically # COT (chain-of-thought) flow basically
COT_PROMPT = f""" COT_PROMPT = f"""
{QA_HEADER} {QA_HEADER}
{GENERAL_SEP_PAT}
CONTEXT: CONTEXT:
{GENERAL_SEP_PAT}
{{context_docs_str}} {{context_docs_str}}
{GENERAL_SEP_PAT} {GENERAL_SEP_PAT}
You MUST respond in the following format: You MUST respond in the following format:
``` ```
{THOUGHT_PAT} Use this section as a scratchpad to reason through the answer. {THOUGHT_PAT} Use this section as a scratchpad to reason through the answer.
@@ -79,7 +83,7 @@ You MUST respond in the following format:
{{{json.dumps(EMPTY_SAMPLE_JSON)}}} {{{json.dumps(EMPTY_SAMPLE_JSON)}}}
``` ```
{QUESTION_PAT} {{user_query}} {QUESTION_PAT.upper()} {{user_query}}
{JSON_HELPFUL_HINT} {JSON_HELPFUL_HINT}
{{language_hint_or_none}} {{language_hint_or_none}}
""".strip() """.strip()
@@ -96,8 +100,8 @@ Answer the user query below based on the reference document above.
Respond with an "{ANSWER_PAT}" section and as many "{QUOTE_PAT}" sections as needed to support \ Respond with an "{ANSWER_PAT}" section and as many "{QUOTE_PAT}" sections as needed to support \
the answer.' the answer.'
{QUESTION_PAT} {{user_query}} {QUESTION_PAT.upper()} {{user_query}}
{ANSWER_PAT} {ANSWER_PAT.upper()}
""".strip() """.strip()