Fix latency caused by large numbers of tags

This commit is contained in:
Weves
2024-07-14 14:09:04 -07:00
committed by Chris Weaver
parent f63d0ca3ad
commit dae4f6a0bd
5 changed files with 95 additions and 20 deletions

View File

@ -1,5 +1,6 @@
from sqlalchemy import delete
from sqlalchemy import func
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.orm import Session
@ -107,18 +108,28 @@ def create_or_add_document_tag_list(
def get_tags_by_value_prefix_for_source_types(
tag_key_prefix: str | None,
tag_value_prefix: str | None,
sources: list[DocumentSource] | None,
limit: int | None,
db_session: Session,
) -> list[Tag]:
query = select(Tag)
if tag_value_prefix:
query = query.where(Tag.tag_value.startswith(tag_value_prefix))
if tag_key_prefix or tag_value_prefix:
conditions = []
if tag_key_prefix:
conditions.append(Tag.tag_key.ilike(f"{tag_key_prefix}%"))
if tag_value_prefix:
conditions.append(Tag.tag_value.ilike(f"{tag_value_prefix}%"))
query = query.where(or_(*conditions))
if sources:
query = query.where(Tag.source.in_(sources))
if limit:
query = query.limit(limit)
result = db_session.execute(query)
tags = result.scalars().all()

View File

@ -88,6 +88,7 @@ def get_tags(
# If this is empty or None, then tags for all sources are considered
sources: list[DocumentSource] | None = None,
allow_prefix: bool = True, # This is currently the only option
limit: int = 50,
_: User = Depends(current_user),
db_session: Session = Depends(get_session),
) -> TagResponse:
@ -95,8 +96,10 @@ def get_tags(
raise NotImplementedError("Cannot disable prefix match for now")
db_tags = get_tags_by_value_prefix_for_source_types(
tag_key_prefix=match_pattern,
tag_value_prefix=match_pattern,
sources=sources,
limit=limit,
db_session=db_session,
)
server_tags = [