mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-29 11:12:02 +01:00
Add verbose logging in case of query failure (#1297)
This commit is contained in:
parent
4abf5f27a0
commit
7298cc2835
@ -112,13 +112,13 @@ def _does_document_exist(
|
||||
"""Returns whether the document already exists and the users/group whitelists
|
||||
Specifically in this case, document refers to a vespa document which is equivalent to a Danswer
|
||||
chunk. This checks for whether the chunk exists already in the index"""
|
||||
doc_fetch_response = http_client.get(
|
||||
f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}"
|
||||
)
|
||||
doc_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}"
|
||||
doc_fetch_response = http_client.get(doc_url)
|
||||
if doc_fetch_response.status_code == 404:
|
||||
return False
|
||||
|
||||
if doc_fetch_response.status_code != 200:
|
||||
logger.debug(f"Failed to check for document with URL {doc_url}")
|
||||
raise RuntimeError(
|
||||
f"Unexpected fetch document by ID value from Vespa "
|
||||
f"with error {doc_fetch_response.status_code}"
|
||||
@ -157,7 +157,24 @@ def _get_vespa_chunk_ids_by_document_id(
|
||||
"hits": hits_per_page,
|
||||
}
|
||||
while True:
|
||||
results = requests.post(SEARCH_ENDPOINT, json=params).json()
|
||||
res = requests.post(SEARCH_ENDPOINT, json=params)
|
||||
try:
|
||||
res.raise_for_status()
|
||||
except requests.HTTPError as e:
|
||||
request_info = f"Headers: {res.request.headers}\nPayload: {params}"
|
||||
response_info = (
|
||||
f"Status Code: {res.status_code}\nResponse Content: {res.text}"
|
||||
)
|
||||
error_base = f"Error occurred getting chunk by Document ID {document_id}"
|
||||
logger.error(
|
||||
f"{error_base}:\n"
|
||||
f"{request_info}\n"
|
||||
f"{response_info}\n"
|
||||
f"Exception: {e}"
|
||||
)
|
||||
raise requests.HTTPError(error_base) from e
|
||||
|
||||
results = res.json()
|
||||
hits = results["root"].get("children", [])
|
||||
|
||||
doc_chunk_ids.extend(
|
||||
@ -179,10 +196,14 @@ def _delete_vespa_doc_chunks(
|
||||
)
|
||||
|
||||
for chunk_id in doc_chunk_ids:
|
||||
res = http_client.delete(
|
||||
f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{chunk_id}"
|
||||
)
|
||||
res.raise_for_status()
|
||||
try:
|
||||
res = http_client.delete(
|
||||
f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{chunk_id}"
|
||||
)
|
||||
res.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Failed to delete chunk, details: {e.response.text}")
|
||||
raise
|
||||
|
||||
|
||||
def _delete_vespa_docs(
|
||||
@ -559,18 +580,35 @@ def _query_vespa(query_params: Mapping[str, str | int | float]) -> list[Inferenc
|
||||
if "query" in query_params and not cast(str, query_params["query"]).strip():
|
||||
raise ValueError("No/empty query received")
|
||||
|
||||
params = dict(
|
||||
**query_params,
|
||||
**{
|
||||
"presentation.timing": True,
|
||||
}
|
||||
if LOG_VESPA_TIMING_INFORMATION
|
||||
else {},
|
||||
)
|
||||
|
||||
response = requests.post(
|
||||
SEARCH_ENDPOINT,
|
||||
json=dict(
|
||||
**query_params,
|
||||
**{
|
||||
"presentation.timing": True,
|
||||
}
|
||||
if LOG_VESPA_TIMING_INFORMATION
|
||||
else {},
|
||||
),
|
||||
json=params,
|
||||
)
|
||||
response.raise_for_status()
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except requests.HTTPError as e:
|
||||
request_info = f"Headers: {response.request.headers}\nPayload: {params}"
|
||||
response_info = (
|
||||
f"Status Code: {response.status_code}\n"
|
||||
f"Response Content: {response.text}"
|
||||
)
|
||||
error_base = "Failed to query Vespa"
|
||||
logger.error(
|
||||
f"{error_base}:\n"
|
||||
f"{request_info}\n"
|
||||
f"{response_info}\n"
|
||||
f"Exception: {e}"
|
||||
)
|
||||
raise requests.HTTPError(error_base) from e
|
||||
|
||||
response_json: dict[str, Any] = response.json()
|
||||
if LOG_VESPA_TIMING_INFORMATION:
|
||||
|
Loading…
x
Reference in New Issue
Block a user