mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-26 16:01:09 +02:00
Add support for overriding semantic_identifier for file connector
This commit is contained in:
parent
eed45f8410
commit
9dac17d3e1
@ -68,6 +68,9 @@ def _process_file(
|
|||||||
file_content_raw, file_metadata = read_file(file)
|
file_content_raw, file_metadata = read_file(file)
|
||||||
file_metadata = {**metadata, **file_metadata}
|
file_metadata = {**metadata, **file_metadata}
|
||||||
|
|
||||||
|
# If this is set, we will show this in the UI as the "name" of the file
|
||||||
|
file_display_name_override = file_metadata.get("file_display_name")
|
||||||
|
|
||||||
time_updated = file_metadata.get("time_updated", datetime.now(timezone.utc))
|
time_updated = file_metadata.get("time_updated", datetime.now(timezone.utc))
|
||||||
if isinstance(time_updated, str):
|
if isinstance(time_updated, str):
|
||||||
time_updated = time_str_to_utc(time_updated)
|
time_updated = time_str_to_utc(time_updated)
|
||||||
@ -87,17 +90,18 @@ def _process_file(
|
|||||||
"primary_owners",
|
"primary_owners",
|
||||||
"secondary_owners",
|
"secondary_owners",
|
||||||
"filename",
|
"filename",
|
||||||
|
"file_display_name",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
return [
|
return [
|
||||||
Document(
|
Document(
|
||||||
id=file_name,
|
id=f"FILE_CONNECTOR__{file_name}", # add a prefix to avoid conflicts with other connectors
|
||||||
sections=[
|
sections=[
|
||||||
Section(link=metadata.get("link"), text=file_content_raw.strip())
|
Section(link=metadata.get("link"), text=file_content_raw.strip())
|
||||||
],
|
],
|
||||||
source=DocumentSource.FILE,
|
source=DocumentSource.FILE,
|
||||||
semantic_identifier=file_name,
|
semantic_identifier=file_display_name_override or file_name,
|
||||||
doc_updated_at=final_time_updated,
|
doc_updated_at=final_time_updated,
|
||||||
primary_owners=metadata.get("primary_owners"),
|
primary_owners=metadata.get("primary_owners"),
|
||||||
secondary_owners=metadata.get("secondary_owners"),
|
secondary_owners=metadata.get("secondary_owners"),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user