From 430c9a47d71d40c1293d304719f7ca329baec456 Mon Sep 17 00:00:00 2001 From: trial-danswer Date: Fri, 13 Sep 2024 22:28:07 -0700 Subject: [PATCH] Match any/all keywords in Standard Answers (#2443) * migration: add column "match_any_keywords" to StandardAnswer * Implement any/all keyword matching for standard answers * Add match_any_keywords to non-searchable fields * Remove stray print * Simplify Slack messages for any and all cases --------- Co-authored-by: danswer-trial --- ...3_match_any_keywords_flag_for_standard_.py | 35 ++++++++++++ backend/danswer/db/models.py | 1 + backend/danswer/db/standard_answer.py | 4 ++ backend/danswer/server/manage/models.py | 12 +++++ .../slack/handlers/handle_standard_answers.py | 2 +- backend/ee/danswer/db/standard_answer.py | 33 ++++++++---- .../danswer/server/manage/standard_answer.py | 2 + .../StandardAnswerCreationForm.tsx | 53 +++++++++++++++++-- web/src/app/ee/admin/standard-answer/lib.ts | 2 + web/src/app/ee/admin/standard-answer/page.tsx | 10 +++- web/src/lib/types.ts | 1 + 11 files changed, 138 insertions(+), 17 deletions(-) create mode 100644 backend/alembic/versions/5c7fdadae813_match_any_keywords_flag_for_standard_.py diff --git a/backend/alembic/versions/5c7fdadae813_match_any_keywords_flag_for_standard_.py b/backend/alembic/versions/5c7fdadae813_match_any_keywords_flag_for_standard_.py new file mode 100644 index 000000000000..0e49b603cecc --- /dev/null +++ b/backend/alembic/versions/5c7fdadae813_match_any_keywords_flag_for_standard_.py @@ -0,0 +1,35 @@ +"""match_any_keywords flag for standard answers + +Revision ID: 5c7fdadae813 +Revises: efb35676026c +Create Date: 2024-09-13 18:52:59.256478 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "5c7fdadae813" +down_revision = "efb35676026c" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "standard_answer", + sa.Column( + "match_any_keywords", + sa.Boolean(), + nullable=False, + server_default=sa.false(), + ), + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("standard_answer", "match_any_keywords") + # ### end Alembic commands ### diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index 89b94fb6a948..16a6459f380f 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -1372,6 +1372,7 @@ class StandardAnswer(Base): answer: Mapped[str] = mapped_column(String) active: Mapped[bool] = mapped_column(Boolean) match_regex: Mapped[bool] = mapped_column(Boolean) + match_any_keywords: Mapped[bool] = mapped_column(Boolean) __table_args__ = ( Index( diff --git a/backend/danswer/db/standard_answer.py b/backend/danswer/db/standard_answer.py index d7f1346c3f99..85d5d922889a 100644 --- a/backend/danswer/db/standard_answer.py +++ b/backend/danswer/db/standard_answer.py @@ -41,6 +41,7 @@ def insert_standard_answer( answer: str, category_ids: list[int], match_regex: bool, + match_any_keywords: bool, db_session: Session, ) -> StandardAnswer: existing_categories = fetch_standard_answer_categories_by_ids( @@ -56,6 +57,7 @@ def insert_standard_answer( categories=existing_categories, active=True, match_regex=match_regex, + match_any_keywords=match_any_keywords, ) db_session.add(standard_answer) db_session.commit() @@ -68,6 +70,7 @@ def update_standard_answer( answer: str, category_ids: list[int], match_regex: bool, + match_any_keywords: bool, db_session: Session, ) -> StandardAnswer: standard_answer = db_session.scalar( @@ -87,6 +90,7 @@ def update_standard_answer( standard_answer.answer = answer standard_answer.categories = list(existing_categories) standard_answer.match_regex = match_regex + standard_answer.match_any_keywords = match_any_keywords db_session.commit() diff --git a/backend/danswer/server/manage/models.py b/backend/danswer/server/manage/models.py index e42a5951ba35..e4618c45658d 100644 --- a/backend/danswer/server/manage/models.py +++ b/backend/danswer/server/manage/models.py @@ -143,6 +143,7 @@ class StandardAnswer(BaseModel): answer: str categories: list[StandardAnswerCategory] match_regex: bool + match_any_keywords: bool @classmethod def from_model(cls, standard_answer_model: StandardAnswerModel) -> "StandardAnswer": @@ -151,6 +152,7 @@ class StandardAnswer(BaseModel): keyword=standard_answer_model.keyword, answer=standard_answer_model.answer, match_regex=standard_answer_model.match_regex, + match_any_keywords=standard_answer_model.match_any_keywords, categories=[ StandardAnswerCategory.from_model(standard_answer_category_model) for standard_answer_category_model in standard_answer_model.categories @@ -163,6 +165,7 @@ class StandardAnswerCreationRequest(BaseModel): answer: str categories: list[int] match_regex: bool + match_any_keywords: bool @field_validator("categories", mode="before") @classmethod @@ -173,6 +176,15 @@ class StandardAnswerCreationRequest(BaseModel): ) return value + @model_validator(mode="after") + def validate_only_match_any_if_not_regex(self) -> Any: + if self.match_regex and self.match_any_keywords: + raise ValueError( + "Can only match any keywords in keyword mode, not regex mode" + ) + + return self + @model_validator(mode="after") def validate_keyword_if_regex(self) -> Any: if not self.match_regex: diff --git a/backend/ee/danswer/danswerbot/slack/handlers/handle_standard_answers.py b/backend/ee/danswer/danswerbot/slack/handlers/handle_standard_answers.py index e01d3cba266b..96c72187a67a 100644 --- a/backend/ee/danswer/danswerbot/slack/handlers/handle_standard_answers.py +++ b/backend/ee/danswer/danswerbot/slack/handlers/handle_standard_answers.py @@ -174,7 +174,7 @@ def _handle_standard_answers( formatted_answers = [] for standard_answer, match_str in matching_standard_answers: since_you_mentioned_pretext = ( - f'Since your question contained "_{match_str}_"' + f'Since your question contains "_{match_str}_"' ) block_quotified_answer = ">" + standard_answer.answer.replace("\n", "\n> ") formatted_answer = f"{since_you_mentioned_pretext}, I thought this might be useful: \n\n{block_quotified_answer}" diff --git a/backend/ee/danswer/db/standard_answer.py b/backend/ee/danswer/db/standard_answer.py index 7b2bf431c597..2887a487b5eb 100644 --- a/backend/ee/danswer/db/standard_answer.py +++ b/backend/ee/danswer/db/standard_answer.py @@ -36,8 +36,8 @@ def find_matching_standard_answers( If `answer_instance.match_regex` is true, the definition is considered "matched" if the query matches the `answer_instance.keyword` using `re.search`. - Otherwise, the definition is considered "matched" if each space-delimited token - in `keyword` exists in `query`. + Otherwise, the definition is considered "matched" if the space-delimited tokens + in `keyword` exists in `query`, depending on the state of `match_any_keywords` """ stmt = ( select(StandardAnswer) @@ -56,11 +56,13 @@ def find_matching_standard_answers( else: # Remove punctuation and split the keyword into individual words - keyword_words = "".join( - char - for char in standard_answer.keyword.lower() - if char not in string.punctuation - ).split() + keyword_words = set( + "".join( + char + for char in standard_answer.keyword.lower() + if char not in string.punctuation + ).split() + ) # Remove punctuation and split the query into individual words query_words = "".join( @@ -68,9 +70,18 @@ def find_matching_standard_answers( ).split() # Check if all of the keyword words are in the query words - if all(word in query_words for word in keyword_words): - matching_standard_answers.append( - (standard_answer, standard_answer.keyword) - ) + if standard_answer.match_any_keywords: + for word in query_words: + if word in keyword_words: + matching_standard_answers.append((standard_answer, word)) + break + else: + if all(word in query_words for word in keyword_words): + matching_standard_answers.append( + ( + standard_answer, + re.sub(r"\s+?", ", ", standard_answer.keyword), + ) + ) return matching_standard_answers diff --git a/backend/ee/danswer/server/manage/standard_answer.py b/backend/ee/danswer/server/manage/standard_answer.py index ef97bfb51343..ea3ca0bc0dc7 100644 --- a/backend/ee/danswer/server/manage/standard_answer.py +++ b/backend/ee/danswer/server/manage/standard_answer.py @@ -34,6 +34,7 @@ def create_standard_answer( answer=standard_answer_creation_request.answer, category_ids=standard_answer_creation_request.categories, match_regex=standard_answer_creation_request.match_regex, + match_any_keywords=standard_answer_creation_request.match_any_keywords, db_session=db_session, ) return StandardAnswer.from_model(standard_answer_model) @@ -72,6 +73,7 @@ def patch_standard_answer( answer=standard_answer_creation_request.answer, category_ids=standard_answer_creation_request.categories, match_regex=standard_answer_creation_request.match_regex, + match_any_keywords=standard_answer_creation_request.match_any_keywords, db_session=db_session, ) return StandardAnswer.from_model(standard_answer_model) diff --git a/web/src/app/ee/admin/standard-answer/StandardAnswerCreationForm.tsx b/web/src/app/ee/admin/standard-answer/StandardAnswerCreationForm.tsx index 7fd5d54aaf60..15574701fb42 100644 --- a/web/src/app/ee/admin/standard-answer/StandardAnswerCreationForm.tsx +++ b/web/src/app/ee/admin/standard-answer/StandardAnswerCreationForm.tsx @@ -9,15 +9,25 @@ import * as Yup from "yup"; import { createStandardAnswer, createStandardAnswerCategory, + StandardAnswerCreationRequest, updateStandardAnswer, } from "./lib"; import { TextFormField, MarkdownFormField, BooleanFormField, + SelectorFormField, } from "@/components/admin/connectors/Field"; import MultiSelectDropdown from "@/components/MultiSelectDropdown"; +function mapKeywordSelectToMatchAny(keywordSelect: "any" | "all"): boolean { + return keywordSelect == "any"; +} + +function mapMatchAnyToKeywordSelect(matchAny: boolean): "any" | "all" { + return matchAny ? "any" : "all"; +} + export const StandardAnswerCreationForm = ({ standardAnswerCategories, existingStandardAnswer, @@ -45,6 +55,11 @@ export const StandardAnswerCreationForm = ({ matchRegex: existingStandardAnswer ? existingStandardAnswer.match_regex : false, + matchAnyKeywords: existingStandardAnswer + ? mapMatchAnyToKeywordSelect( + existingStandardAnswer.match_any_keywords + ) + : "all", }} validationSchema={Yup.object().shape({ keyword: Yup.string() @@ -59,8 +74,11 @@ export const StandardAnswerCreationForm = ({ onSubmit={async (values, formikHelpers) => { formikHelpers.setSubmitting(true); - const cleanedValues = { + const cleanedValues: StandardAnswerCreationRequest = { ...values, + matchAnyKeywords: mapKeywordSelectToMatchAny( + values.matchAnyKeywords + ), categories: values.categories.map((category) => category.id), }; @@ -98,11 +116,19 @@ export const StandardAnswerCreationForm = ({ tooltip="Triggers if the question matches this regex pattern (using Python `re.search()`)" placeholder="(?:it|support)\s*ticket" /> + ) : values.matchAnyKeywords == "any" ? ( + ) : ( @@ -113,6 +139,27 @@ export const StandardAnswerCreationForm = ({ label="Match regex" name="matchRegex" /> + {values.matchRegex ? null : ( + { + setFieldValue("matchAnyKeywords", selected); + }} + /> + )}
{ - const { answer, id, categories, match_regex, ...fieldsToSearch } = - standardAnswer; + const { + answer, + id, + categories, + match_regex, + match_any_keywords, + ...fieldsToSearch + } = standardAnswer; const cleanedQuery = query.toLowerCase(); const searchMatch = Object.values(fieldsToSearch).some((value) => { return value.toLowerCase().includes(cleanedQuery); diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index 3a197178f66c..612e8f2ee672 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -162,6 +162,7 @@ export interface StandardAnswer { keyword: string; answer: string; match_regex: boolean; + match_any_keywords: boolean; categories: StandardAnswerCategory[]; }