Chunk Filter Metadata Format (#2053)

This commit is contained in:
Yuhong Sun 2024-08-05 15:12:36 -07:00 committed by GitHub
parent 5bfdecacad
commit a8a4ad9546
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 4 additions and 7 deletions

View File

@ -5,7 +5,7 @@
USEFUL_PAT = "Yes useful"
NONUSEFUL_PAT = "Not useful"
SECTION_FILTER_PROMPT = f"""
Determine if the reference section is USEFUL for answering the user query.
Determine if the following section is USEFUL for answering the user query.
It is NOT enough for the section to be related to the query, \
it must contain information that is USEFUL for answering the query.
If the section contains ANY useful information, that is good enough, \
@ -13,11 +13,8 @@ it does not need to fully answer the every part of the user query.
Title: {{title}}
{{optional_metadata}}
Reference Section:
```
{{chunk_text}}
```

View File

@ -20,11 +20,11 @@ def llm_eval_section(
metadata: dict[str, str | list[str]],
) -> bool:
def _get_metadata_str(metadata: dict[str, str | list[str]]) -> str:
metadata_str = "\n\nMetadata:\n"
metadata_str = "\nMetadata:\n"
for key, value in metadata.items():
value_str = ", ".join(value) if isinstance(value, list) else value
metadata_str += f"{key} - {value_str}\n"
return metadata_str + "\nContent:"
return metadata_str
def _get_usefulness_messages() -> list[dict[str, str]]:
metadata_str = _get_metadata_str(metadata) if metadata else ""
@ -32,7 +32,7 @@ def llm_eval_section(
{
"role": "user",
"content": SECTION_FILTER_PROMPT.format(
title=title,
title=title.replace("\n", " "),
chunk_text=section_content,
user_query=query,
optional_metadata=metadata_str,