danswer/backend/danswer/datastores/datastore_utils.py
2023-10-24 15:08:27 -07:00

39 lines
1.2 KiB
Python

import math
import uuid
from danswer.chunking.models import IndexChunk
from danswer.chunking.models import InferenceChunk
DEFAULT_BATCH_SIZE = 30
def translate_boost_count_to_multiplier(boost: int) -> float:
"""Mapping boost integer values to a multiplier according to a sigmoid curve
Piecewise such that at many downvotes, its 0.5x the score and with many upvotes
it is 2x the score. This should be in line with the Vespa calculation."""
# 3 in the equation below stretches it out to hit asymptotes slower
if boost < 0:
# 0.5 + sigmoid -> range of 0.5 to 1
return 0.5 + (1 / (1 + math.exp(-1 * boost / 3)))
# 2 x sigmoid -> range of 1 to 2
return 2 / (1 + math.exp(-1 * boost / 3))
def get_uuid_from_chunk(
chunk: IndexChunk | InferenceChunk, mini_chunk_ind: int = 0
) -> uuid.UUID:
doc_str = (
chunk.document_id
if isinstance(chunk, InferenceChunk)
else chunk.source_document.id
)
# Web parsing URL duplicate catching
if doc_str and doc_str[-1] == "/":
doc_str = doc_str[:-1]
unique_identifier_string = "_".join(
[doc_str, str(chunk.chunk_id), str(mini_chunk_ind)]
)
return uuid.uuid5(uuid.NAMESPACE_X500, unique_identifier_string)