From 890eb7901e2052ba6ebc0fbe01eeb987342683f5 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Tue, 24 Oct 2023 15:08:27 -0700 Subject: [PATCH] Capping negative boost at 0.5 (#622) --- backend/danswer/datastores/datastore_utils.py | 14 ++++++++++---- .../vespa/app_config/schemas/danswer_chunk.sd | 4 ++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/backend/danswer/datastores/datastore_utils.py b/backend/danswer/datastores/datastore_utils.py index a4fbb02f3ca..c9565db5c7d 100644 --- a/backend/danswer/datastores/datastore_utils.py +++ b/backend/danswer/datastores/datastore_utils.py @@ -6,13 +6,19 @@ from danswer.chunking.models import InferenceChunk DEFAULT_BATCH_SIZE = 30 -BOOST_MULTIPLIER = 2 # Try to keep this consistent with Vespa def translate_boost_count_to_multiplier(boost: int) -> float: - # Sigmoid function, maxed out at BOOST_MULTIPLIER - # 3 here stretches it out so we hit asymptote slower - return BOOST_MULTIPLIER / (1 + math.exp(-1 * boost / 3)) + """Mapping boost integer values to a multiplier according to a sigmoid curve + Piecewise such that at many downvotes, its 0.5x the score and with many upvotes + it is 2x the score. This should be in line with the Vespa calculation.""" + # 3 in the equation below stretches it out to hit asymptotes slower + if boost < 0: + # 0.5 + sigmoid -> range of 0.5 to 1 + return 0.5 + (1 / (1 + math.exp(-1 * boost / 3))) + + # 2 x sigmoid -> range of 1 to 2 + return 2 / (1 + math.exp(-1 * boost / 3)) def get_uuid_from_chunk( diff --git a/backend/danswer/datastores/vespa/app_config/schemas/danswer_chunk.sd b/backend/danswer/datastores/vespa/app_config/schemas/danswer_chunk.sd index 4d08b60a75d..5c4f49e2618 100644 --- a/backend/danswer/datastores/vespa/app_config/schemas/danswer_chunk.sd +++ b/backend/danswer/datastores/vespa/app_config/schemas/danswer_chunk.sd @@ -100,9 +100,9 @@ schema danswer_chunk { } function inline document_boost() { - # 0 to 2x score following sigmoid function stretched out by factor of 3 + # 0.5 to 2x score: piecewise sigmoid function stretched out by factor of 3 # meaning requires 3x the number of feedback votes to have default sigmoid effect - expression: 2 / (1 + exp(-attribute(boost) / 3)) + expression: if(attribute(boost) < 0, 0.5 + (1 / (1 + exp(-attribute(boost) / 3))), 2 / (1 + exp(-attribute(boost) / 3))) } function inline document_age() {