diff --git a/backend/alembic/__init__.py b/backend/alembic/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/onyx/auth/users.py b/backend/onyx/auth/users.py index 0d8fa54745..da9865550f 100644 --- a/backend/onyx/auth/users.py +++ b/backend/onyx/auth/users.py @@ -245,6 +245,8 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]): referral_source=referral_source, request=request, ) + user: User + async with get_async_session_with_tenant(tenant_id) as db_session: token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id) verify_email_is_invited(user_create.email) @@ -368,6 +370,8 @@ class UserManager(UUIDIDMixin, BaseUserManager[User, uuid.UUID]): "refresh_token": refresh_token, } + user: User + try: # Attempt to get user by OAuth account user = await self.get_by_oauth_account(oauth_name, account_id) diff --git a/backend/onyx/background/celery/tasks/indexing/tasks.py b/backend/onyx/background/celery/tasks/indexing/tasks.py index 2e3c0e3aec..b1451a0512 100644 --- a/backend/onyx/background/celery/tasks/indexing/tasks.py +++ b/backend/onyx/background/celery/tasks/indexing/tasks.py @@ -586,11 +586,12 @@ def connector_indexing_proxy_task( # if the job is done, clean up and break if job.done(): + exit_code: int | None try: if job.status == "error": ignore_exitcode = False - exit_code: int | None = None + exit_code = None if job.process: exit_code = job.process.exitcode diff --git a/backend/onyx/background/celery/tasks/indexing/utils.py b/backend/onyx/background/celery/tasks/indexing/utils.py index e14e79b5ff..00cbc3632d 100644 --- a/backend/onyx/background/celery/tasks/indexing/utils.py +++ b/backend/onyx/background/celery/tasks/indexing/utils.py @@ -438,6 +438,7 @@ def try_creating_indexing_task( if not acquired: return None + redis_connector_index: RedisConnectorIndex try: redis_connector = RedisConnector(tenant_id, cc_pair.id) redis_connector_index = redis_connector.new_index(search_settings.id) diff --git a/backend/onyx/background/celery/tasks/monitoring/tasks.py b/backend/onyx/background/celery/tasks/monitoring/tasks.py index 0375608413..244b4abd2e 100644 --- a/backend/onyx/background/celery/tasks/monitoring/tasks.py +++ b/backend/onyx/background/celery/tasks/monitoring/tasks.py @@ -747,6 +747,7 @@ def cloud_check_alembic() -> bool | None: revision_counts: dict[str, int] = {} out_of_date_tenants: dict[str, str | None] = {} top_revision: str = "" + tenant_ids: list[str] = [] try: # map each tenant_id to its revision diff --git a/backend/onyx/background/celery/tasks/shared/tasks.py b/backend/onyx/background/celery/tasks/shared/tasks.py index 5530d9eebc..a1bc12252a 100644 --- a/backend/onyx/background/celery/tasks/shared/tasks.py +++ b/backend/onyx/background/celery/tasks/shared/tasks.py @@ -247,6 +247,7 @@ def cloud_beat_task_generator( return None last_lock_time = time.monotonic() + tenant_ids: list[str] | list[None] = [] try: tenant_ids = get_all_tenant_ids() diff --git a/backend/onyx/background/celery/tasks/vespa/tasks.py b/backend/onyx/background/celery/tasks/vespa/tasks.py index c695f92a8b..4033b2503f 100644 --- a/backend/onyx/background/celery/tasks/vespa/tasks.py +++ b/backend/onyx/background/celery/tasks/vespa/tasks.py @@ -1084,6 +1084,7 @@ def vespa_metadata_sync_task( task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}") return False except Exception as ex: + e: Exception | None = None if isinstance(ex, RetryError): task_logger.warning( f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}" diff --git a/backend/onyx/background/indexing/run_indexing.py b/backend/onyx/background/indexing/run_indexing.py index 94f8cb01be..fd0ab0b35d 100644 --- a/backend/onyx/background/indexing/run_indexing.py +++ b/backend/onyx/background/indexing/run_indexing.py @@ -239,6 +239,7 @@ def _run_indexing( callback=callback, ) + tracer: OnyxTracer if INDEXING_TRACER_INTERVAL > 0: logger.debug(f"Memory tracer starting: interval={INDEXING_TRACER_INTERVAL}") tracer = OnyxTracer() diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index 8c1de70ca9..f87de5f488 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -87,6 +87,7 @@ from onyx.file_store.utils import save_files from onyx.llm.exceptions import GenAIDisabledException from onyx.llm.factory import get_llms_for_persona from onyx.llm.factory import get_main_llm_from_tuple +from onyx.llm.interfaces import LLM from onyx.llm.models import PreviousMessage from onyx.llm.utils import litellm_exception_to_error_msg from onyx.natural_language_processing.utils import get_tokenizer @@ -349,7 +350,8 @@ def stream_chat_message_objects( new_msg_req.chunks_above = 0 new_msg_req.chunks_below = 0 - llm = None + llm: LLM + try: user_id = user.id if user is not None else None diff --git a/backend/onyx/connectors/airtable/airtable_connector.py b/backend/onyx/connectors/airtable/airtable_connector.py index 37b84623f9..c4d0e63d90 100644 --- a/backend/onyx/connectors/airtable/airtable_connector.py +++ b/backend/onyx/connectors/airtable/airtable_connector.py @@ -369,11 +369,12 @@ class AirtableConnector(LoadConnector): # Process records in parallel batches using ThreadPoolExecutor PARALLEL_BATCH_SIZE = 8 max_workers = min(PARALLEL_BATCH_SIZE, len(records)) + record_documents: list[Document] = [] # Process records in batches for i in range(0, len(records), PARALLEL_BATCH_SIZE): batch_records = records[i : i + PARALLEL_BATCH_SIZE] - record_documents: list[Document] = [] + record_documents = [] with ThreadPoolExecutor(max_workers=max_workers) as executor: # Submit batch tasks diff --git a/backend/onyx/connectors/salesforce/doc_conversion.py b/backend/onyx/connectors/salesforce/doc_conversion.py index e6acaf2e0c..c9f48a9ec9 100644 --- a/backend/onyx/connectors/salesforce/doc_conversion.py +++ b/backend/onyx/connectors/salesforce/doc_conversion.py @@ -59,6 +59,7 @@ def _clean_salesforce_dict(data: dict | list) -> dict | list: elif isinstance(data, list): filtered_list = [] for item in data: + filtered_item: dict | list if isinstance(item, (dict, list)): filtered_item = _clean_salesforce_dict(item) # Only add non-empty dictionaries or lists diff --git a/backend/onyx/db/document_set.py b/backend/onyx/db/document_set.py index 6383a6c02c..bf9d6d38db 100644 --- a/backend/onyx/db/document_set.py +++ b/backend/onyx/db/document_set.py @@ -221,6 +221,7 @@ def insert_document_set( group_ids=document_set_creation_request.groups or [], ) + new_document_set_row: DocumentSetDBModel try: new_document_set_row = DocumentSetDBModel( name=document_set_creation_request.name, diff --git a/backend/onyx/file_processing/extract_file_text.py b/backend/onyx/file_processing/extract_file_text.py index efdeeb5469..cebd52b73b 100644 --- a/backend/onyx/file_processing/extract_file_text.py +++ b/backend/onyx/file_processing/extract_file_text.py @@ -365,7 +365,7 @@ def extract_file_text( f"Failed to process with Unstructured: {str(unstructured_error)}. Falling back to normal processing." ) # Fall through to normal processing - + final_extension: str if file_name or extension: if extension is not None: final_extension = extension diff --git a/backend/onyx/indexing/chunker.py b/backend/onyx/indexing/chunker.py index f95fe86e31..e42dafb293 100644 --- a/backend/onyx/indexing/chunker.py +++ b/backend/onyx/indexing/chunker.py @@ -223,6 +223,8 @@ class Chunker: large_chunk_id=None, ) + section_link_text: str + for section_idx, section in enumerate(document.sections): section_text = clean_text(section.text) section_link_text = section.link or "" diff --git a/backend/onyx/onyxbot/slack/utils.py b/backend/onyx/onyxbot/slack/utils.py index f8f3ad845a..4a940c37a1 100644 --- a/backend/onyx/onyxbot/slack/utils.py +++ b/backend/onyx/onyxbot/slack/utils.py @@ -518,7 +518,7 @@ def read_slack_thread( message_type = MessageType.USER else: self_slack_bot_id = get_onyx_bot_slack_bot_id(client) - + blocks: Any if reply.get("user") == self_slack_bot_id: # OnyxBot response message_type = MessageType.ASSISTANT diff --git a/backend/onyx/redis/redis_connector_doc_perm_sync.py b/backend/onyx/redis/redis_connector_doc_perm_sync.py index 7e58736205..538552ed84 100644 --- a/backend/onyx/redis/redis_connector_doc_perm_sync.py +++ b/backend/onyx/redis/redis_connector_doc_perm_sync.py @@ -1,5 +1,6 @@ import time from datetime import datetime +from typing import Any from typing import cast from uuid import uuid4 @@ -95,7 +96,7 @@ class RedisConnectorPermissionSync: @property def payload(self) -> RedisConnectorPermissionSyncPayload | None: # read related data and evaluate/print task progress - fence_bytes = cast(bytes, self.redis.get(self.fence_key)) + fence_bytes = cast(Any, self.redis.get(self.fence_key)) if fence_bytes is None: return None diff --git a/backend/onyx/redis/redis_connector_ext_group_sync.py b/backend/onyx/redis/redis_connector_ext_group_sync.py index 2f0783dbcc..54654bb713 100644 --- a/backend/onyx/redis/redis_connector_ext_group_sync.py +++ b/backend/onyx/redis/redis_connector_ext_group_sync.py @@ -1,4 +1,5 @@ from datetime import datetime +from typing import Any from typing import cast import redis @@ -82,7 +83,7 @@ class RedisConnectorExternalGroupSync: @property def payload(self) -> RedisConnectorExternalGroupSyncPayload | None: # read related data and evaluate/print task progress - fence_bytes = cast(bytes, self.redis.get(self.fence_key)) + fence_bytes = cast(Any, self.redis.get(self.fence_key)) if fence_bytes is None: return None diff --git a/backend/onyx/redis/redis_connector_index.py b/backend/onyx/redis/redis_connector_index.py index 215468f352..b8d72049a6 100644 --- a/backend/onyx/redis/redis_connector_index.py +++ b/backend/onyx/redis/redis_connector_index.py @@ -1,4 +1,5 @@ from datetime import datetime +from typing import Any from typing import cast from uuid import uuid4 @@ -89,7 +90,7 @@ class RedisConnectorIndex: @property def payload(self) -> RedisConnectorIndexPayload | None: # read related data and evaluate/print task progress - fence_bytes = cast(bytes, self.redis.get(self.fence_key)) + fence_bytes = cast(Any, self.redis.get(self.fence_key)) if fence_bytes is None: return None diff --git a/backend/onyx/server/manage/users.py b/backend/onyx/server/manage/users.py index 3ef0faf393..7e3dd1f6d4 100644 --- a/backend/onyx/server/manage/users.py +++ b/backend/onyx/server/manage/users.py @@ -271,6 +271,8 @@ def bulk_invite_users( tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() new_invited_emails = [] + email: str + try: for email in emails: email_info = validate_email(email) diff --git a/backend/scripts/chat_feedback_dump.py b/backend/scripts/chat_feedback_dump.py index 6fe9cbc7df..98d9237f11 100644 --- a/backend/scripts/chat_feedback_dump.py +++ b/backend/scripts/chat_feedback_dump.py @@ -198,6 +198,7 @@ def process_all_chat_feedback(onyx_url: str, api_key: str | None) -> None: r_sessions = get_chat_sessions(onyx_url, headers, user_id) logger.info(f"user={user_id} num_sessions={len(r_sessions.sessions)}") for session in r_sessions.sessions: + s: ChatSessionSnapshot try: s = get_session_history(onyx_url, headers, session.id) except requests.exceptions.HTTPError: diff --git a/backend/tests/daily/connectors/confluence/test_confluence_basic.py b/backend/tests/daily/connectors/confluence/test_confluence_basic.py index 5462a66177..26cfbcf542 100644 --- a/backend/tests/daily/connectors/confluence/test_confluence_basic.py +++ b/backend/tests/daily/connectors/confluence/test_confluence_basic.py @@ -6,6 +6,7 @@ from unittest.mock import patch import pytest from onyx.connectors.confluence.connector import ConfluenceConnector +from onyx.connectors.models import Document @pytest.fixture @@ -41,6 +42,10 @@ def test_confluence_connector_basic( assert len(doc_batch) == 3 + page_within_a_page_doc: Document | None = None + page_doc: Document | None = None + txt_doc: Document | None = None + for doc in doc_batch: if doc.semantic_identifier == "DailyConnectorTestSpace Home": page_doc = doc @@ -49,6 +54,7 @@ def test_confluence_connector_basic( elif doc.semantic_identifier == "Page Within A Page": page_within_a_page_doc = doc + assert page_within_a_page_doc is not None assert page_within_a_page_doc.semantic_identifier == "Page Within A Page" assert page_within_a_page_doc.primary_owners assert page_within_a_page_doc.primary_owners[0].email == "hagen@danswer.ai" @@ -62,6 +68,7 @@ def test_confluence_connector_basic( == "https://danswerai.atlassian.net/wiki/spaces/DailyConne/pages/200769540/Page+Within+A+Page" ) + assert page_doc is not None assert page_doc.semantic_identifier == "DailyConnectorTestSpace Home" assert page_doc.metadata["labels"] == ["testlabel"] assert page_doc.primary_owners @@ -75,6 +82,7 @@ def test_confluence_connector_basic( == "https://danswerai.atlassian.net/wiki/spaces/DailyConne/overview" ) + assert txt_doc is not None assert txt_doc.semantic_identifier == "small-file.txt" assert len(txt_doc.sections) == 1 assert txt_doc.sections[0].text == "small" diff --git a/backend/tests/daily/connectors/gmail/test_gmail_connector.py b/backend/tests/daily/connectors/gmail/test_gmail_connector.py index d746f86bd8..1d58344e67 100644 --- a/backend/tests/daily/connectors/gmail/test_gmail_connector.py +++ b/backend/tests/daily/connectors/gmail/test_gmail_connector.py @@ -110,6 +110,8 @@ def test_docs_retrieval( for doc in retrieved_docs: id = doc.id + retrieved_primary_owner_emails: set[str | None] = set() + retrieved_secondary_owner_emails: set[str | None] = set() if doc.primary_owners: retrieved_primary_owner_emails = set( [owner.email for owner in doc.primary_owners] diff --git a/backend/tests/integration/common_utils/managers/user.py b/backend/tests/integration/common_utils/managers/user.py index 63a6c887d3..03539ce566 100644 --- a/backend/tests/integration/common_utils/managers/user.py +++ b/backend/tests/integration/common_utils/managers/user.py @@ -165,6 +165,7 @@ class UserManager: target_status: bool, user_performing_action: DATestUser, ) -> DATestUser: + url_substring: str if target_status is True: url_substring = "activate" elif target_status is False: