mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-19 12:03:54 +02:00
Add hiding of documents to feedback page (#585)
This commit is contained in:
@@ -24,6 +24,7 @@ PUBLIC_DOC_PAT = "PUBLIC"
|
||||
PUBLIC_DOCUMENT_SET = "__PUBLIC"
|
||||
QUOTE = "quote"
|
||||
BOOST = "boost"
|
||||
HIDDEN = "hidden"
|
||||
SCORE = "score"
|
||||
ID_SEPARATOR = ":;:"
|
||||
DEFAULT_BOOST = 0
|
||||
|
@@ -36,6 +36,7 @@ class UpdateRequest:
|
||||
access: DocumentAccess | None = None
|
||||
document_sets: set[str] | None = None
|
||||
boost: float | None = None
|
||||
hidden: bool | None = None
|
||||
|
||||
|
||||
class Verifiable(abc.ABC):
|
||||
|
@@ -47,6 +47,9 @@ schema danswer_chunk {
|
||||
field boost type float {
|
||||
indexing: summary | attribute
|
||||
}
|
||||
field hidden type bool {
|
||||
indexing: summary | attribute
|
||||
}
|
||||
field metadata type string {
|
||||
indexing: summary | attribute
|
||||
}
|
||||
|
@@ -30,6 +30,7 @@ from danswer.configs.constants import DEFAULT_BOOST
|
||||
from danswer.configs.constants import DOCUMENT_ID
|
||||
from danswer.configs.constants import DOCUMENT_SETS
|
||||
from danswer.configs.constants import EMBEDDINGS
|
||||
from danswer.configs.constants import HIDDEN
|
||||
from danswer.configs.constants import MATCH_HIGHLIGHTS
|
||||
from danswer.configs.constants import METADATA
|
||||
from danswer.configs.constants import SCORE
|
||||
@@ -271,8 +272,10 @@ def _build_vespa_filters(filters: list[IndexFilter] | None) -> str:
|
||||
# via the `filters` arg. These are set either in the Web UI or in the Slack
|
||||
# listener
|
||||
|
||||
# ignore hidden docs
|
||||
filter_str = f"!({HIDDEN}=true) and "
|
||||
|
||||
# Handle provided query filters
|
||||
filter_str = ""
|
||||
if filters:
|
||||
for filter_dict in filters:
|
||||
valid_filters = {
|
||||
@@ -424,16 +427,26 @@ class VespaIndex(DocumentIndex):
|
||||
batch_size: int = _BATCH_SIZE,
|
||||
) -> None:
|
||||
"""Runs a batch of updates in parallel via the ThreadPoolExecutor."""
|
||||
|
||||
def _update_chunk(update: _VespaUpdateRequest) -> Response:
|
||||
update_body = json.dumps(update.update_request)
|
||||
logger.debug(
|
||||
f"Updating with request to {update.url} with body {update_body}"
|
||||
)
|
||||
return requests.put(
|
||||
update.url,
|
||||
headers={"Content-Type": "application/json"},
|
||||
data=update_body,
|
||||
)
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(
|
||||
max_workers=_NUM_THREADS
|
||||
) as executor:
|
||||
for update_batch in batch_generator(updates, batch_size):
|
||||
future_to_document_id = {
|
||||
executor.submit(
|
||||
requests.put,
|
||||
update.url,
|
||||
headers={"Content-Type": "application/json"},
|
||||
data=json.dumps(update.update_request),
|
||||
_update_chunk,
|
||||
update,
|
||||
): update.document_id
|
||||
for update in update_batch
|
||||
}
|
||||
@@ -451,14 +464,6 @@ class VespaIndex(DocumentIndex):
|
||||
|
||||
processed_updates_requests: list[_VespaUpdateRequest] = []
|
||||
for update_request in update_requests:
|
||||
if (
|
||||
update_request.boost is None
|
||||
and update_request.access is None
|
||||
and update_request.document_sets is None
|
||||
):
|
||||
logger.error("Update request received but nothing to update")
|
||||
continue
|
||||
|
||||
update_dict: dict[str, dict] = {"fields": {}}
|
||||
if update_request.boost is not None:
|
||||
update_dict["fields"][BOOST] = {"assign": update_request.boost}
|
||||
@@ -474,6 +479,12 @@ class VespaIndex(DocumentIndex):
|
||||
acl_entry: 1 for acl_entry in update_request.access.to_acl()
|
||||
}
|
||||
}
|
||||
if update_request.hidden is not None:
|
||||
update_dict["fields"][HIDDEN] = {"assign": update_request.hidden}
|
||||
|
||||
if not update_dict["fields"]:
|
||||
logger.error("Update request received but nothing to update")
|
||||
continue
|
||||
|
||||
for document_id in update_request.document_ids:
|
||||
for doc_chunk_id in _get_vespa_chunk_ids_by_document_id(document_id):
|
||||
|
@@ -46,7 +46,11 @@ def fetch_docs_ranked_by_boost(
|
||||
db_session: Session, ascending: bool = False, limit: int = 100
|
||||
) -> list[DbDocument]:
|
||||
order_func = asc if ascending else desc
|
||||
stmt = select(DbDocument).order_by(order_func(DbDocument.boost)).limit(limit)
|
||||
stmt = (
|
||||
select(DbDocument)
|
||||
.order_by(order_func(DbDocument.boost), order_func(DbDocument.semantic_id))
|
||||
.limit(limit)
|
||||
)
|
||||
result = db_session.execute(stmt)
|
||||
doc_list = result.scalars().all()
|
||||
|
||||
@@ -71,6 +75,24 @@ def update_document_boost(db_session: Session, document_id: str, boost: int) ->
|
||||
db_session.commit()
|
||||
|
||||
|
||||
def update_document_hidden(db_session: Session, document_id: str, hidden: bool) -> None:
|
||||
stmt = select(DbDocument).where(DbDocument.id == document_id)
|
||||
result = db_session.execute(stmt).scalar_one_or_none()
|
||||
if result is None:
|
||||
raise ValueError(f"No document found with ID: '{document_id}'")
|
||||
|
||||
result.hidden = hidden
|
||||
|
||||
update = UpdateRequest(
|
||||
document_ids=[document_id],
|
||||
hidden=hidden,
|
||||
)
|
||||
|
||||
get_default_document_index().update([update])
|
||||
|
||||
db_session.commit()
|
||||
|
||||
|
||||
def create_query_event(
|
||||
query: str,
|
||||
selected_flow: SearchType | None,
|
||||
|
@@ -54,6 +54,7 @@ from danswer.db.document import get_document_cnts_for_cc_pairs
|
||||
from danswer.db.engine import get_session
|
||||
from danswer.db.feedback import fetch_docs_ranked_by_boost
|
||||
from danswer.db.feedback import update_document_boost
|
||||
from danswer.db.feedback import update_document_hidden
|
||||
from danswer.db.index_attempt import create_index_attempt
|
||||
from danswer.db.index_attempt import get_latest_index_attempts
|
||||
from danswer.db.models import User
|
||||
@@ -78,6 +79,7 @@ from danswer.server.models import GDriveCallback
|
||||
from danswer.server.models import GoogleAppCredentials
|
||||
from danswer.server.models import GoogleServiceAccountCredentialRequest
|
||||
from danswer.server.models import GoogleServiceAccountKey
|
||||
from danswer.server.models import HiddenUpdateRequest
|
||||
from danswer.server.models import IndexAttemptSnapshot
|
||||
from danswer.server.models import ObjectCreationIdResponse
|
||||
from danswer.server.models import RunConnectorRequest
|
||||
@@ -133,6 +135,22 @@ def document_boost_update(
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/admin/doc-hidden")
|
||||
def document_hidden_update(
|
||||
hidden_update: HiddenUpdateRequest,
|
||||
_: User | None = Depends(current_admin_user),
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> None:
|
||||
try:
|
||||
update_document_hidden(
|
||||
db_session=db_session,
|
||||
document_id=hidden_update.document_id,
|
||||
hidden=hidden_update.hidden,
|
||||
)
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/admin/connector/google-drive/app-credential")
|
||||
def check_google_app_credentials_exist(
|
||||
_: User = Depends(current_admin_user),
|
||||
|
@@ -135,6 +135,11 @@ class BoostUpdateRequest(BaseModel):
|
||||
boost: int
|
||||
|
||||
|
||||
class HiddenUpdateRequest(BaseModel):
|
||||
document_id: str
|
||||
hidden: bool
|
||||
|
||||
|
||||
class SearchDoc(BaseModel):
|
||||
document_id: str
|
||||
semantic_identifier: str
|
||||
|
Reference in New Issue
Block a user