Capping negative boost at 0.5 (#622)

This commit is contained in:
Yuhong Sun 2023-10-24 15:08:27 -07:00 committed by GitHub
parent 0a6c2afb8a
commit 890eb7901e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 6 deletions

View File

@ -6,13 +6,19 @@ from danswer.chunking.models import InferenceChunk
DEFAULT_BATCH_SIZE = 30 DEFAULT_BATCH_SIZE = 30
BOOST_MULTIPLIER = 2 # Try to keep this consistent with Vespa
def translate_boost_count_to_multiplier(boost: int) -> float: def translate_boost_count_to_multiplier(boost: int) -> float:
# Sigmoid function, maxed out at BOOST_MULTIPLIER """Mapping boost integer values to a multiplier according to a sigmoid curve
# 3 here stretches it out so we hit asymptote slower Piecewise such that at many downvotes, its 0.5x the score and with many upvotes
return BOOST_MULTIPLIER / (1 + math.exp(-1 * boost / 3)) it is 2x the score. This should be in line with the Vespa calculation."""
# 3 in the equation below stretches it out to hit asymptotes slower
if boost < 0:
# 0.5 + sigmoid -> range of 0.5 to 1
return 0.5 + (1 / (1 + math.exp(-1 * boost / 3)))
# 2 x sigmoid -> range of 1 to 2
return 2 / (1 + math.exp(-1 * boost / 3))
def get_uuid_from_chunk( def get_uuid_from_chunk(

View File

@ -100,9 +100,9 @@ schema danswer_chunk {
} }
function inline document_boost() { function inline document_boost() {
# 0 to 2x score following sigmoid function stretched out by factor of 3 # 0.5 to 2x score: piecewise sigmoid function stretched out by factor of 3
# meaning requires 3x the number of feedback votes to have default sigmoid effect # meaning requires 3x the number of feedback votes to have default sigmoid effect
expression: 2 / (1 + exp(-attribute(boost) / 3)) expression: if(attribute(boost) < 0, 0.5 + (1 / (1 + exp(-attribute(boost) / 3))), 2 / (1 + exp(-attribute(boost) / 3)))
} }
function inline document_age() { function inline document_age() {