From 1c4f7fe7eff782f74d819138c5ce008ae4635763 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 11 Feb 2024 15:58:42 -0800 Subject: [PATCH] Pass Tags to LLM (#1071) --- backend/danswer/chat/chat_utils.py | 13 ++++++++++++- backend/danswer/chat/models.py | 1 + backend/danswer/chat/process_message.py | 1 + backend/danswer/search/search_runner.py | 1 + backend/danswer/server/documents/document.py | 1 + 5 files changed, 16 insertions(+), 1 deletion(-) diff --git a/backend/danswer/chat/chat_utils.py b/backend/danswer/chat/chat_utils.py index efd0924e4..ed45c9562 100644 --- a/backend/danswer/chat/chat_utils.py +++ b/backend/danswer/chat/chat_utils.py @@ -63,14 +63,23 @@ def build_doc_context_str( semantic_identifier: str, source_type: DocumentSource, content: str, + metadata_dict: dict[str, str | list[str]], + updated_at: datetime | None, ind: int, include_metadata: bool = True, - updated_at: datetime | None = None, ) -> str: context_str = "" if include_metadata: context_str += f"DOCUMENT {ind}: {semantic_identifier}\n" context_str += f"Source: {clean_up_source(source_type)}\n" + + for k, v in metadata_dict.items(): + if isinstance(v, list): + v_str = ", ".join(v) + context_str += f"{k.capitalize()}: {v_str}\n" + else: + context_str += f"{k.capitalize()}: {v}\n" + if updated_at: update_str = updated_at.strftime("%B %d, %Y %H:%M") context_str += f"Updated: {update_str}\n" @@ -88,6 +97,7 @@ def build_complete_context_str( semantic_identifier=doc.semantic_identifier, source_type=doc.source_type, content=doc.content, + metadata_dict=doc.metadata, updated_at=doc.updated_at, ind=ind, include_metadata=include_metadata, @@ -145,6 +155,7 @@ def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc: content=inf_chunk.content, semantic_identifier=inf_chunk.semantic_identifier, source_type=inf_chunk.source_type, + metadata=inf_chunk.metadata, updated_at=inf_chunk.updated_at, link=inf_chunk.source_links[0] if inf_chunk.source_links else None, ) diff --git a/backend/danswer/chat/models.py b/backend/danswer/chat/models.py index d23f2229c..de3f7e4f0 100644 --- a/backend/danswer/chat/models.py +++ b/backend/danswer/chat/models.py @@ -18,6 +18,7 @@ class LlmDoc(BaseModel): content: str semantic_identifier: str source_type: DocumentSource + metadata: dict[str, str | list[str]] updated_at: datetime | None link: str | None diff --git a/backend/danswer/chat/process_message.py b/backend/danswer/chat/process_message.py index 4885ff877..1f8ecc55a 100644 --- a/backend/danswer/chat/process_message.py +++ b/backend/danswer/chat/process_message.py @@ -294,6 +294,7 @@ def stream_chat_message( semantic_identifier=llm_doc.semantic_identifier, source_type=llm_doc.source_type, content=llm_doc.content, + metadata_dict=llm_doc.metadata, updated_at=llm_doc.updated_at, ind=ind, ) diff --git a/backend/danswer/search/search_runner.py b/backend/danswer/search/search_runner.py index 5cbd8b17b..943b696ac 100644 --- a/backend/danswer/search/search_runner.py +++ b/backend/danswer/search/search_runner.py @@ -614,6 +614,7 @@ def combine_inference_chunks(inf_chunks: list[InferenceChunk]) -> LlmDoc: content="\n".join(chunk_texts), semantic_identifier=first_chunk.semantic_identifier, source_type=first_chunk.source_type, + metadata=first_chunk.metadata, updated_at=first_chunk.updated_at, link=first_chunk.source_links[0] if first_chunk.source_links else None, ) diff --git a/backend/danswer/server/documents/document.py b/backend/danswer/server/documents/document.py index 05232fb2b..a0ba40254 100644 --- a/backend/danswer/server/documents/document.py +++ b/backend/danswer/server/documents/document.py @@ -57,6 +57,7 @@ def get_document_info( semantic_identifier=first_chunk.semantic_identifier, source_type=first_chunk.source_type, content=combined_contents, + metadata_dict=first_chunk.metadata, updated_at=first_chunk.updated_at, ind=0, )