diff --git a/backend/danswer/background/indexing/run_indexing.py b/backend/danswer/background/indexing/run_indexing.py index e9806d229..8f2d03b30 100644 --- a/backend/danswer/background/indexing/run_indexing.py +++ b/backend/danswer/background/indexing/run_indexing.py @@ -105,7 +105,7 @@ def _run_indexing( document_index = get_default_document_index( primary_index_name=index_name, secondary_index_name=None ) - + embedding_model = DefaultIndexingEmbedder.from_db_search_settings( search_settings=search_settings @@ -202,7 +202,7 @@ def _run_indexing( db_session.refresh(index_attempt) if index_attempt.status != IndexingStatus.IN_PROGRESS: # Likely due to user manually disabling it or model swap - raise RuntimeError("Index Attempt was canceled") + raise RuntimeError(f"Index Attempt was canceled, status is {index_attempt.status}") batch_description = [] for doc in doc_batch: diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index 1faf84e5f..952852897 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -271,6 +271,7 @@ def cleanup_indexing_jobs( # batch of documents indexed current_db_time = get_db_current_time(db_session=db_session) time_since_update = current_db_time - index_attempt.time_updated + logger.info("ERRORS 1") if time_since_update.total_seconds() > 60 * 60 * timeout_hours: existing_jobs[index_attempt.id].cancel() _mark_run_failed( @@ -280,6 +281,8 @@ def cleanup_indexing_jobs( "The run will be re-attempted at next scheduled indexing time.", ) else: + logger.info(f"ERRORS 2 {tenant_id} {len(existing_jobs)}") + continue # If job isn't known, simply mark it as failed _mark_run_failed( db_session=db_session, diff --git a/backend/danswer/document_index/vespa/chunk_retrieval.py b/backend/danswer/document_index/vespa/chunk_retrieval.py index 6a7427630..23161c86d 100644 --- a/backend/danswer/document_index/vespa/chunk_retrieval.py +++ b/backend/danswer/document_index/vespa/chunk_retrieval.py @@ -335,12 +335,15 @@ def query_vespa( return inference_chunks + + def _get_chunks_via_batch_search( index_name: str, chunk_requests: list[VespaChunkRequest], filters: IndexFilters, get_large_chunks: bool = False, ) -> list[InferenceChunkUncleaned]: + print("GET CHUNKS") if not chunk_requests: return [] diff --git a/backend/danswer/document_index/vespa/indexing_utils.py b/backend/danswer/document_index/vespa/indexing_utils.py index 376c200eb..3b06c71ce 100644 --- a/backend/danswer/document_index/vespa/indexing_utils.py +++ b/backend/danswer/document_index/vespa/indexing_utils.py @@ -178,8 +178,9 @@ def _index_vespa_chunk( DOCUMENT_SETS: {document_set: 1 for document_set in chunk.document_sets}, } + vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_chunk_id}" - logger.debug(f'Indexing to URL "{vespa_url}"') + logger.debug(f'Indexing to URL "{vespa_url}" with TENANT ID "{chunk.tenant_id}"') res = http_client.post( vespa_url, headers=json_header, json={"fields": vespa_document_fields} ) diff --git a/backend/danswer/document_index/vespa/shared_utils/vespa_request_builders.py b/backend/danswer/document_index/vespa/shared_utils/vespa_request_builders.py index 24cc0f28f..aed175508 100644 --- a/backend/danswer/document_index/vespa/shared_utils/vespa_request_builders.py +++ b/backend/danswer/document_index/vespa/shared_utils/vespa_request_builders.py @@ -20,6 +20,8 @@ logger = setup_logger() def build_vespa_filters(filters: IndexFilters, include_hidden: bool = False) -> str: + print("\n\n\n\n\nzzzzzzzzzzBUILDING VVESPA FILTESR") + def _build_or_filters(key: str, vals: list[str] | None) -> str: if vals is None: return "" @@ -55,7 +57,10 @@ def build_vespa_filters(filters: IndexFilters, include_hidden: bool = False) -> filter_str = f"!({HIDDEN}=true) and " if not include_hidden else "" if filters.tenant_id: - filter_str += f"({TENANT_ID} contains {filters.tenant_id}) and " + print("TENANT ID") + filter_str += f'({TENANT_ID} contains "{filters.tenant_id}") and ' + else: + print("NO TENANT ID") # CAREFUL touching this one, currently there is no second ACL double-check post retrieval if filters.access_control_list is not None: diff --git a/backend/danswer/one_shot_answer/answer_question.py b/backend/danswer/one_shot_answer/answer_question.py index 9299b4715..57c439cc4 100644 --- a/backend/danswer/one_shot_answer/answer_question.py +++ b/backend/danswer/one_shot_answer/answer_question.py @@ -212,6 +212,7 @@ def stream_answer_objects( answer_style_config=answer_config, prompt_config=PromptConfig.from_model(prompt), llm=get_main_llm_from_tuple(get_llms_for_persona(persona=chat_session.persona, db_session=db_session)), + # TODO: change back single_message_history=history_str, tools=[search_tool], force_use_tool=ForceUseTool( diff --git a/backend/danswer/search/preprocessing/preprocessing.py b/backend/danswer/search/preprocessing/preprocessing.py index 43a6a43ce..668ebf482 100644 --- a/backend/danswer/search/preprocessing/preprocessing.py +++ b/backend/danswer/search/preprocessing/preprocessing.py @@ -29,7 +29,7 @@ from danswer.utils.logger import setup_logger from danswer.utils.threadpool_concurrency import FunctionCall from danswer.utils.threadpool_concurrency import run_functions_in_parallel from danswer.utils.timing import log_function_time - +from danswer.db.engine import current_tenant_id logger = setup_logger() @@ -151,12 +151,15 @@ def retrieval_preprocessing( user_acl_filters = ( None if bypass_acl else build_access_filters_for_user(user, db_session) ) + print("building filtes") + print(current_tenant_id.get()) final_filters = IndexFilters( source_type=preset_filters.source_type or predicted_source_filters, document_set=preset_filters.document_set, time_cutoff=preset_filters.time_cutoff or predicted_time_cutoff, tags=preset_filters.tags, # Tags are never auto-extracted access_control_list=user_acl_filters, + tenant_id="4990f0d0-8447-4476-b6a0-53f4938654c1" # TODO FIX ) llm_evaluation_type = LLMEvaluationType.BASIC diff --git a/web/next.config.js b/web/next.config.js index 1586af8d1..6653b611a 100644 --- a/web/next.config.js +++ b/web/next.config.js @@ -30,23 +30,25 @@ const nextConfig = { if (process.env.NODE_ENV === "production") return defaultRedirects; return defaultRedirects.concat([ - { - source: "/api/chat/send-message:params*", - destination: "http://127.0.0.1:8080/chat/send-message:params*", // Proxy to Backend - permanent: true, - }, - { - source: "/api/query/stream-answer-with-quote:params*", - destination: - "http://127.0.0.1:8080/query/stream-answer-with-quote:params*", // Proxy to Backend - permanent: true, - }, - { - source: "/api/query/stream-query-validation:params*", - destination: - "http://127.0.0.1:8080/query/stream-query-validation:params*", // Proxy to Backend - permanent: true, - }, + // TODO: validate the db sesion in tenancy for loca dev + // { + + // source: "/api/chat/send-message:params*", + // destination: "http://127.0.0.1:8080/chat/send-message:params*", // Proxy to Backend + // permanent: true, + // }, + // { + // source: "/api/query/stream-answer-with-quote:params*", + // destination: + // "http://127.0.0.1:8080/query/stream-answer-with-quote:params*", // Proxy to Backend + // permanent: true, + // }, + // { + // source: "/api/query/stream-query-validation:params*", + // destination: + // "http://127.0.0.1:8080/query/stream-query-validation:params*", // Proxy to Backend + // permanent: true, + // }, ]); }, publicRuntimeConfig: {