From 5da81a3d0dd150d92c9f8b6d67ef74573dbcbc53 Mon Sep 17 00:00:00 2001
From: Chris Weaver <25087905+Weves@users.noreply.github.com>
Date: Tue, 17 Oct 2023 20:06:12 -0700
Subject: [PATCH] Add hiding of documents to feedback page (#585)
---
backend/danswer/configs/constants.py | 1 +
backend/danswer/datastores/interfaces.py | 1 +
.../vespa/app_config/schemas/danswer_chunk.sd | 3 +
backend/danswer/datastores/vespa/store.py | 37 ++-
backend/danswer/db/feedback.py | 24 +-
backend/danswer/server/manage.py | 18 ++
backend/danswer/server/models.py | 5 +
.../app/admin/connectors/document360/page.tsx | 7 +-
.../feedback/DocumentFeedbackTable.tsx | 243 ++++++++++++++++++
.../app/admin/documents/feedback/constants.ts | 2 +
web/src/app/admin/documents/feedback/lib.ts | 34 +++
web/src/app/admin/documents/feedback/page.tsx | 183 +------------
web/src/components/CustomCheckbox.tsx | 34 +++
web/src/components/HoverPopup.tsx | 22 +-
web/src/components/search/Filters.tsx | 8 +-
web/src/lib/fetchUtils.ts | 7 +
16 files changed, 422 insertions(+), 207 deletions(-)
create mode 100644 web/src/app/admin/documents/feedback/DocumentFeedbackTable.tsx
create mode 100644 web/src/app/admin/documents/feedback/constants.ts
create mode 100644 web/src/app/admin/documents/feedback/lib.ts
create mode 100644 web/src/components/CustomCheckbox.tsx
create mode 100644 web/src/lib/fetchUtils.ts
diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py
index 977682f870cc..e5330cce4c9a 100644
--- a/backend/danswer/configs/constants.py
+++ b/backend/danswer/configs/constants.py
@@ -24,6 +24,7 @@ PUBLIC_DOC_PAT = "PUBLIC"
PUBLIC_DOCUMENT_SET = "__PUBLIC"
QUOTE = "quote"
BOOST = "boost"
+HIDDEN = "hidden"
SCORE = "score"
ID_SEPARATOR = ":;:"
DEFAULT_BOOST = 0
diff --git a/backend/danswer/datastores/interfaces.py b/backend/danswer/datastores/interfaces.py
index 969cc19341f6..895b3098e58e 100644
--- a/backend/danswer/datastores/interfaces.py
+++ b/backend/danswer/datastores/interfaces.py
@@ -36,6 +36,7 @@ class UpdateRequest:
access: DocumentAccess | None = None
document_sets: set[str] | None = None
boost: float | None = None
+ hidden: bool | None = None
class Verifiable(abc.ABC):
diff --git a/backend/danswer/datastores/vespa/app_config/schemas/danswer_chunk.sd b/backend/danswer/datastores/vespa/app_config/schemas/danswer_chunk.sd
index 228480632ca0..c0d026f9ac67 100644
--- a/backend/danswer/datastores/vespa/app_config/schemas/danswer_chunk.sd
+++ b/backend/danswer/datastores/vespa/app_config/schemas/danswer_chunk.sd
@@ -47,6 +47,9 @@ schema danswer_chunk {
field boost type float {
indexing: summary | attribute
}
+ field hidden type bool {
+ indexing: summary | attribute
+ }
field metadata type string {
indexing: summary | attribute
}
diff --git a/backend/danswer/datastores/vespa/store.py b/backend/danswer/datastores/vespa/store.py
index b2f51ccf9f28..e6a4b73c439b 100644
--- a/backend/danswer/datastores/vespa/store.py
+++ b/backend/danswer/datastores/vespa/store.py
@@ -30,6 +30,7 @@ from danswer.configs.constants import DEFAULT_BOOST
from danswer.configs.constants import DOCUMENT_ID
from danswer.configs.constants import DOCUMENT_SETS
from danswer.configs.constants import EMBEDDINGS
+from danswer.configs.constants import HIDDEN
from danswer.configs.constants import MATCH_HIGHLIGHTS
from danswer.configs.constants import METADATA
from danswer.configs.constants import SCORE
@@ -271,8 +272,10 @@ def _build_vespa_filters(filters: list[IndexFilter] | None) -> str:
# via the `filters` arg. These are set either in the Web UI or in the Slack
# listener
+ # ignore hidden docs
+ filter_str = f"!({HIDDEN}=true) and "
+
# Handle provided query filters
- filter_str = ""
if filters:
for filter_dict in filters:
valid_filters = {
@@ -424,16 +427,26 @@ class VespaIndex(DocumentIndex):
batch_size: int = _BATCH_SIZE,
) -> None:
"""Runs a batch of updates in parallel via the ThreadPoolExecutor."""
+
+ def _update_chunk(update: _VespaUpdateRequest) -> Response:
+ update_body = json.dumps(update.update_request)
+ logger.debug(
+ f"Updating with request to {update.url} with body {update_body}"
+ )
+ return requests.put(
+ update.url,
+ headers={"Content-Type": "application/json"},
+ data=update_body,
+ )
+
with concurrent.futures.ThreadPoolExecutor(
max_workers=_NUM_THREADS
) as executor:
for update_batch in batch_generator(updates, batch_size):
future_to_document_id = {
executor.submit(
- requests.put,
- update.url,
- headers={"Content-Type": "application/json"},
- data=json.dumps(update.update_request),
+ _update_chunk,
+ update,
): update.document_id
for update in update_batch
}
@@ -451,14 +464,6 @@ class VespaIndex(DocumentIndex):
processed_updates_requests: list[_VespaUpdateRequest] = []
for update_request in update_requests:
- if (
- update_request.boost is None
- and update_request.access is None
- and update_request.document_sets is None
- ):
- logger.error("Update request received but nothing to update")
- continue
-
update_dict: dict[str, dict] = {"fields": {}}
if update_request.boost is not None:
update_dict["fields"][BOOST] = {"assign": update_request.boost}
@@ -474,6 +479,12 @@ class VespaIndex(DocumentIndex):
acl_entry: 1 for acl_entry in update_request.access.to_acl()
}
}
+ if update_request.hidden is not None:
+ update_dict["fields"][HIDDEN] = {"assign": update_request.hidden}
+
+ if not update_dict["fields"]:
+ logger.error("Update request received but nothing to update")
+ continue
for document_id in update_request.document_ids:
for doc_chunk_id in _get_vespa_chunk_ids_by_document_id(document_id):
diff --git a/backend/danswer/db/feedback.py b/backend/danswer/db/feedback.py
index 2348a31b4bf7..be0e844a80b0 100644
--- a/backend/danswer/db/feedback.py
+++ b/backend/danswer/db/feedback.py
@@ -46,7 +46,11 @@ def fetch_docs_ranked_by_boost(
db_session: Session, ascending: bool = False, limit: int = 100
) -> list[DbDocument]:
order_func = asc if ascending else desc
- stmt = select(DbDocument).order_by(order_func(DbDocument.boost)).limit(limit)
+ stmt = (
+ select(DbDocument)
+ .order_by(order_func(DbDocument.boost), order_func(DbDocument.semantic_id))
+ .limit(limit)
+ )
result = db_session.execute(stmt)
doc_list = result.scalars().all()
@@ -71,6 +75,24 @@ def update_document_boost(db_session: Session, document_id: str, boost: int) ->
db_session.commit()
+def update_document_hidden(db_session: Session, document_id: str, hidden: bool) -> None:
+ stmt = select(DbDocument).where(DbDocument.id == document_id)
+ result = db_session.execute(stmt).scalar_one_or_none()
+ if result is None:
+ raise ValueError(f"No document found with ID: '{document_id}'")
+
+ result.hidden = hidden
+
+ update = UpdateRequest(
+ document_ids=[document_id],
+ hidden=hidden,
+ )
+
+ get_default_document_index().update([update])
+
+ db_session.commit()
+
+
def create_query_event(
query: str,
selected_flow: SearchType | None,
diff --git a/backend/danswer/server/manage.py b/backend/danswer/server/manage.py
index 35aa9a709cb8..9e788ce2b8e9 100644
--- a/backend/danswer/server/manage.py
+++ b/backend/danswer/server/manage.py
@@ -54,6 +54,7 @@ from danswer.db.document import get_document_cnts_for_cc_pairs
from danswer.db.engine import get_session
from danswer.db.feedback import fetch_docs_ranked_by_boost
from danswer.db.feedback import update_document_boost
+from danswer.db.feedback import update_document_hidden
from danswer.db.index_attempt import create_index_attempt
from danswer.db.index_attempt import get_latest_index_attempts
from danswer.db.models import User
@@ -78,6 +79,7 @@ from danswer.server.models import GDriveCallback
from danswer.server.models import GoogleAppCredentials
from danswer.server.models import GoogleServiceAccountCredentialRequest
from danswer.server.models import GoogleServiceAccountKey
+from danswer.server.models import HiddenUpdateRequest
from danswer.server.models import IndexAttemptSnapshot
from danswer.server.models import ObjectCreationIdResponse
from danswer.server.models import RunConnectorRequest
@@ -133,6 +135,22 @@ def document_boost_update(
raise HTTPException(status_code=400, detail=str(e))
+@router.post("/admin/doc-hidden")
+def document_hidden_update(
+ hidden_update: HiddenUpdateRequest,
+ _: User | None = Depends(current_admin_user),
+ db_session: Session = Depends(get_session),
+) -> None:
+ try:
+ update_document_hidden(
+ db_session=db_session,
+ document_id=hidden_update.document_id,
+ hidden=hidden_update.hidden,
+ )
+ except ValueError as e:
+ raise HTTPException(status_code=400, detail=str(e))
+
+
@router.get("/admin/connector/google-drive/app-credential")
def check_google_app_credentials_exist(
_: User = Depends(current_admin_user),
diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py
index 7b6a923ced11..0f5a349ee3a5 100644
--- a/backend/danswer/server/models.py
+++ b/backend/danswer/server/models.py
@@ -135,6 +135,11 @@ class BoostUpdateRequest(BaseModel):
boost: int
+class HiddenUpdateRequest(BaseModel):
+ document_id: str
+ hidden: bool
+
+
class SearchDoc(BaseModel):
document_id: str
semantic_identifier: str
diff --git a/web/src/app/admin/connectors/document360/page.tsx b/web/src/app/admin/connectors/document360/page.tsx
index fa9a067f3c97..357151eb97a3 100644
--- a/web/src/app/admin/connectors/document360/page.tsx
+++ b/web/src/app/admin/connectors/document360/page.tsx
@@ -96,14 +96,15 @@ const MainSection = () => {
<>
To use the Document360 connector, you must first provide the API
- token and portal ID corresponding to your Document360 setup. See setup guide{" "}
+ token and portal ID corresponding to your Document360 setup. See
+ setup guide{" "}
here
-
- {" "}for more detail.
+ {" "}
+ for more detail.