Pass Tags to LLM (#1071)

This commit is contained in:
Yuhong Sun 2024-02-11 15:58:42 -08:00 committed by GitHub
parent 4629df06ef
commit 1c4f7fe7ef
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 16 additions and 1 deletions

View File

@ -63,14 +63,23 @@ def build_doc_context_str(
semantic_identifier: str,
source_type: DocumentSource,
content: str,
metadata_dict: dict[str, str | list[str]],
updated_at: datetime | None,
ind: int,
include_metadata: bool = True,
updated_at: datetime | None = None,
) -> str:
context_str = ""
if include_metadata:
context_str += f"DOCUMENT {ind}: {semantic_identifier}\n"
context_str += f"Source: {clean_up_source(source_type)}\n"
for k, v in metadata_dict.items():
if isinstance(v, list):
v_str = ", ".join(v)
context_str += f"{k.capitalize()}: {v_str}\n"
else:
context_str += f"{k.capitalize()}: {v}\n"
if updated_at:
update_str = updated_at.strftime("%B %d, %Y %H:%M")
context_str += f"Updated: {update_str}\n"
@ -88,6 +97,7 @@ def build_complete_context_str(
semantic_identifier=doc.semantic_identifier,
source_type=doc.source_type,
content=doc.content,
metadata_dict=doc.metadata,
updated_at=doc.updated_at,
ind=ind,
include_metadata=include_metadata,
@ -145,6 +155,7 @@ def llm_doc_from_inference_chunk(inf_chunk: InferenceChunk) -> LlmDoc:
content=inf_chunk.content,
semantic_identifier=inf_chunk.semantic_identifier,
source_type=inf_chunk.source_type,
metadata=inf_chunk.metadata,
updated_at=inf_chunk.updated_at,
link=inf_chunk.source_links[0] if inf_chunk.source_links else None,
)

View File

@ -18,6 +18,7 @@ class LlmDoc(BaseModel):
content: str
semantic_identifier: str
source_type: DocumentSource
metadata: dict[str, str | list[str]]
updated_at: datetime | None
link: str | None

View File

@ -294,6 +294,7 @@ def stream_chat_message(
semantic_identifier=llm_doc.semantic_identifier,
source_type=llm_doc.source_type,
content=llm_doc.content,
metadata_dict=llm_doc.metadata,
updated_at=llm_doc.updated_at,
ind=ind,
)

View File

@ -614,6 +614,7 @@ def combine_inference_chunks(inf_chunks: list[InferenceChunk]) -> LlmDoc:
content="\n".join(chunk_texts),
semantic_identifier=first_chunk.semantic_identifier,
source_type=first_chunk.source_type,
metadata=first_chunk.metadata,
updated_at=first_chunk.updated_at,
link=first_chunk.source_links[0] if first_chunk.source_links else None,
)

View File

@ -57,6 +57,7 @@ def get_document_info(
semantic_identifier=first_chunk.semantic_identifier,
source_type=first_chunk.source_type,
content=combined_contents,
metadata_dict=first_chunk.metadata,
updated_at=first_chunk.updated_at,
ind=0,
)