From 78f2e07d236ae35be78cce70a156bf670b8ded99 Mon Sep 17 00:00:00 2001 From: Weves Date: Tue, 27 Feb 2024 15:41:36 -0800 Subject: [PATCH] Improve tag handling --- backend/danswer/db/tag.py | 25 ++++++++++++++++++- web/src/components/search/DocumentDisplay.tsx | 21 ++++++++++++---- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/backend/danswer/db/tag.py b/backend/danswer/db/tag.py index bf70f7308..377621c4f 100644 --- a/backend/danswer/db/tag.py +++ b/backend/danswer/db/tag.py @@ -12,13 +12,30 @@ from danswer.utils.logger import setup_logger logger = setup_logger() +def check_tag_validity(tag_key: str, tag_value: str) -> bool: + """If a tag is too long, it should not be used (it will cause an error in Postgres + as the unique constraint can only apply to entries that are less than 2704 bytes). + + Additionally, extremely long tags are not really usable / useful.""" + if len(tag_key) + len(tag_value) > 255: + logger.error( + f"Tag with key '{tag_key}' and value '{tag_value}' is too long, cannot be used" + ) + return False + + return True + + def create_or_add_document_tag( tag_key: str, tag_value: str, source: DocumentSource, document_id: str, db_session: Session, -) -> Tag: +) -> Tag | None: + if not check_tag_validity(tag_key, tag_value): + return None + document = db_session.get(Document, document_id) if not document: raise ValueError("Invalid Document, cannot attach Tags") @@ -48,6 +65,12 @@ def create_or_add_document_tag_list( document_id: str, db_session: Session, ) -> list[Tag]: + valid_tag_values = [ + tag_value for tag_value in tag_values if check_tag_validity(tag_key, tag_value) + ] + if not valid_tag_values: + return [] + document = db_session.get(Document, document_id) if not document: raise ValueError("Invalid Document, cannot attach Tags") diff --git a/web/src/components/search/DocumentDisplay.tsx b/web/src/components/search/DocumentDisplay.tsx index f7406ddf6..4df96b67b 100644 --- a/web/src/components/search/DocumentDisplay.tsx +++ b/web/src/components/search/DocumentDisplay.tsx @@ -110,6 +110,9 @@ export function DocumentMetadataBlock({ }: { document: DanswerDocument; }) { + // don't display super long tags, as they are ugly + const MAXIMUM_TAG_LENGTH = 40; + return (
{document.updated_at && ( @@ -120,11 +123,19 @@ export function DocumentMetadataBlock({ {Object.entries(document.metadata).length > 0 && ( <>
- {Object.entries(document.metadata).map(([key, value]) => { - return ( - - ); - })} + {Object.entries(document.metadata) + .filter( + ([key, value]) => (key + value).length <= MAXIMUM_TAG_LENGTH + ) + .map(([key, value]) => { + return ( + + ); + })} )}