diff --git a/.vscode/launch.template.jsonc b/.vscode/launch.template.jsonc index f1454ca8e9..7402d02681 100644 --- a/.vscode/launch.template.jsonc +++ b/.vscode/launch.template.jsonc @@ -6,396 +6,419 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "compounds": [ - { - // Dummy entry used to label the group - "name": "--- Compound ---", - "configurations": [ - "--- Individual ---" - ], - "presentation": { - "group": "1", - } - }, - { - "name": "Run All Onyx Services", - "configurations": [ - "Web Server", - "Model Server", - "API Server", - "Slack Bot", - "Celery primary", - "Celery light", - "Celery heavy", - "Celery indexing", - "Celery beat", - "Celery monitoring", - ], - "presentation": { - "group": "1", - } - }, - { - "name": "Web / Model / API", - "configurations": [ - "Web Server", - "Model Server", - "API Server", - ], - "presentation": { - "group": "1", - } - }, - { - "name": "Celery (all)", - "configurations": [ - "Celery primary", - "Celery light", - "Celery heavy", - "Celery indexing", - "Celery beat", - "Celery monitoring", - ], - "presentation": { - "group": "1", - } - } + { + // Dummy entry used to label the group + "name": "--- Compound ---", + "configurations": ["--- Individual ---"], + "presentation": { + "group": "1" + } + }, + { + "name": "Run All Onyx Services", + "configurations": [ + "Web Server", + "Model Server", + "API Server", + "Slack Bot", + "Celery primary", + "Celery light", + "Celery heavy", + "Celery indexing", + "Celery user files indexing", + "Celery beat", + "Celery monitoring" + ], + "presentation": { + "group": "1" + } + }, + { + "name": "Web / Model / API", + "configurations": ["Web Server", "Model Server", "API Server"], + "presentation": { + "group": "1" + } + }, + { + "name": "Celery (all)", + "configurations": [ + "Celery primary", + "Celery light", + "Celery heavy", + "Celery indexing", + "Celery user files indexing", + "Celery beat", + "Celery monitoring" + ], + "presentation": { + "group": "1" + } + } ], "configurations": [ - { - // Dummy entry used to label the group - "name": "--- Individual ---", - "type": "node", - "request": "launch", - "presentation": { - "group": "2", - "order": 0 - } - }, - { - "name": "Web Server", - "type": "node", - "request": "launch", - "cwd": "${workspaceRoot}/web", - "runtimeExecutable": "npm", - "envFile": "${workspaceFolder}/.vscode/.env", - "runtimeArgs": [ - "run", "dev" - ], - "presentation": { - "group": "2", - }, - "console": "integratedTerminal", - "consoleTitle": "Web Server Console" + { + // Dummy entry used to label the group + "name": "--- Individual ---", + "type": "node", + "request": "launch", + "presentation": { + "group": "2", + "order": 0 + } + }, + { + "name": "Web Server", + "type": "node", + "request": "launch", + "cwd": "${workspaceRoot}/web", + "runtimeExecutable": "npm", + "envFile": "${workspaceFolder}/.vscode/.env", + "runtimeArgs": ["run", "dev"], + "presentation": { + "group": "2" }, - { - "name": "Model Server", - "consoleName": "Model Server", - "type": "debugpy", - "request": "launch", - "module": "uvicorn", - "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.vscode/.env", - "env": { - "LOG_LEVEL": "DEBUG", - "PYTHONUNBUFFERED": "1" - }, - "args": [ - "model_server.main:app", - "--reload", - "--port", - "9000" - ], - "presentation": { - "group": "2", - }, - "consoleTitle": "Model Server Console" + "console": "integratedTerminal", + "consoleTitle": "Web Server Console" + }, + { + "name": "Model Server", + "consoleName": "Model Server", + "type": "debugpy", + "request": "launch", + "module": "uvicorn", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1" }, - { - "name": "API Server", - "consoleName": "API Server", - "type": "debugpy", - "request": "launch", - "module": "uvicorn", - "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.vscode/.env", - "env": { - "LOG_DANSWER_MODEL_INTERACTIONS": "True", - "LOG_LEVEL": "DEBUG", - "PYTHONUNBUFFERED": "1" - }, - "args": [ - "onyx.main:app", - "--reload", - "--port", - "8080" - ], - "presentation": { - "group": "2", - }, - "consoleTitle": "API Server Console" + "args": ["model_server.main:app", "--reload", "--port", "9000"], + "presentation": { + "group": "2" }, - // For the listener to access the Slack API, - // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project - { - "name": "Slack Bot", - "consoleName": "Slack Bot", - "type": "debugpy", - "request": "launch", - "program": "onyx/onyxbot/slack/listener.py", - "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.vscode/.env", - "env": { - "LOG_LEVEL": "DEBUG", - "PYTHONUNBUFFERED": "1", - "PYTHONPATH": "." - }, - "presentation": { - "group": "2", - }, - "consoleTitle": "Slack Bot Console" + "consoleTitle": "Model Server Console" + }, + { + "name": "API Server", + "consoleName": "API Server", + "type": "debugpy", + "request": "launch", + "module": "uvicorn", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "LOG_DANSWER_MODEL_INTERACTIONS": "True", + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1" }, - { - "name": "Celery primary", - "type": "debugpy", - "request": "launch", - "module": "celery", - "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.vscode/.env", - "env": { - "LOG_LEVEL": "INFO", - "PYTHONUNBUFFERED": "1", - "PYTHONPATH": "." - }, - "args": [ - "-A", - "onyx.background.celery.versioned_apps.primary", - "worker", - "--pool=threads", - "--concurrency=4", - "--prefetch-multiplier=1", - "--loglevel=INFO", - "--hostname=primary@%n", - "-Q", - "celery", - ], - "presentation": { - "group": "2", - }, - "consoleTitle": "Celery primary Console" + "args": ["onyx.main:app", "--reload", "--port", "8080"], + "presentation": { + "group": "2" }, - { - "name": "Celery light", - "type": "debugpy", - "request": "launch", - "module": "celery", - "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.vscode/.env", - "env": { - "LOG_LEVEL": "INFO", - "PYTHONUNBUFFERED": "1", - "PYTHONPATH": "." - }, - "args": [ - "-A", - "onyx.background.celery.versioned_apps.light", - "worker", - "--pool=threads", - "--concurrency=64", - "--prefetch-multiplier=8", - "--loglevel=INFO", - "--hostname=light@%n", - "-Q", - "vespa_metadata_sync,connector_deletion,doc_permissions_upsert,checkpoint_cleanup", - ], - "presentation": { - "group": "2", - }, - "consoleTitle": "Celery light Console" + "consoleTitle": "API Server Console" + }, + // For the listener to access the Slack API, + // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project + { + "name": "Slack Bot", + "consoleName": "Slack Bot", + "type": "debugpy", + "request": "launch", + "program": "onyx/onyxbot/slack/listener.py", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." }, - { - "name": "Celery heavy", - "type": "debugpy", - "request": "launch", - "module": "celery", - "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.vscode/.env", - "env": { - "LOG_LEVEL": "INFO", - "PYTHONUNBUFFERED": "1", - "PYTHONPATH": "." - }, - "args": [ - "-A", - "onyx.background.celery.versioned_apps.heavy", - "worker", - "--pool=threads", - "--concurrency=4", - "--prefetch-multiplier=1", - "--loglevel=INFO", - "--hostname=heavy@%n", - "-Q", - "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync", - ], - "presentation": { - "group": "2", - }, - "consoleTitle": "Celery heavy Console" + "presentation": { + "group": "2" }, - { - "name": "Celery indexing", - "type": "debugpy", - "request": "launch", - "module": "celery", - "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.vscode/.env", - "env": { - "ENABLE_MULTIPASS_INDEXING": "false", - "LOG_LEVEL": "DEBUG", - "PYTHONUNBUFFERED": "1", - "PYTHONPATH": "." - }, - "args": [ - "-A", - "onyx.background.celery.versioned_apps.indexing", - "worker", - "--pool=threads", - "--concurrency=1", - "--prefetch-multiplier=1", - "--loglevel=INFO", - "--hostname=indexing@%n", - "-Q", - "connector_indexing", - ], - "presentation": { - "group": "2", - }, - "consoleTitle": "Celery indexing Console" + "consoleTitle": "Slack Bot Console" + }, + { + "name": "Celery primary", + "type": "debugpy", + "request": "launch", + "module": "celery", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "LOG_LEVEL": "INFO", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." }, - { - "name": "Celery monitoring", - "type": "debugpy", - "request": "launch", - "module": "celery", - "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.vscode/.env", - "env": {}, - "args": [ - "-A", - "onyx.background.celery.versioned_apps.monitoring", - "worker", - "--pool=solo", - "--concurrency=1", - "--prefetch-multiplier=1", - "--loglevel=INFO", - "--hostname=monitoring@%n", - "-Q", - "monitoring", - ], - "presentation": { - "group": "2", - }, - "consoleTitle": "Celery monitoring Console" + "args": [ + "-A", + "onyx.background.celery.versioned_apps.primary", + "worker", + "--pool=threads", + "--concurrency=4", + "--prefetch-multiplier=1", + "--loglevel=INFO", + "--hostname=primary@%n", + "-Q", + "celery" + ], + "presentation": { + "group": "2" }, - { - "name": "Celery beat", - "type": "debugpy", - "request": "launch", - "module": "celery", - "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.vscode/.env", - "env": { - "LOG_LEVEL": "DEBUG", - "PYTHONUNBUFFERED": "1", - "PYTHONPATH": "." - }, - "args": [ - "-A", - "onyx.background.celery.versioned_apps.beat", - "beat", - "--loglevel=INFO", - ], - "presentation": { - "group": "2", - }, - "consoleTitle": "Celery beat Console" + "consoleTitle": "Celery primary Console" + }, + { + "name": "Celery light", + "type": "debugpy", + "request": "launch", + "module": "celery", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "LOG_LEVEL": "INFO", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." }, - { - "name": "Pytest", - "consoleName": "Pytest", - "type": "debugpy", - "request": "launch", - "module": "pytest", - "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.vscode/.env", - "env": { - "LOG_LEVEL": "DEBUG", - "PYTHONUNBUFFERED": "1", - "PYTHONPATH": "." - }, - "args": [ - "-v" - // Specify a sepcific module/test to run or provide nothing to run all tests - //"tests/unit/onyx/llm/answering/test_prune_and_merge.py" - ], - "presentation": { - "group": "2", - }, - "consoleTitle": "Pytest Console" + "args": [ + "-A", + "onyx.background.celery.versioned_apps.light", + "worker", + "--pool=threads", + "--concurrency=64", + "--prefetch-multiplier=8", + "--loglevel=INFO", + "--hostname=light@%n", + "-Q", + "vespa_metadata_sync,connector_deletion,doc_permissions_upsert" + ], + "presentation": { + "group": "2" }, - { - // Dummy entry used to label the group - "name": "--- Tasks ---", - "type": "node", - "request": "launch", - "presentation": { - "group": "3", - "order": 0 - } - }, - { - "name": "Clear and Restart External Volumes and Containers", - "type": "node", - "request": "launch", - "runtimeExecutable": "bash", - "runtimeArgs": ["${workspaceFolder}/backend/scripts/restart_containers.sh"], - "cwd": "${workspaceFolder}", - "console": "integratedTerminal", - "stopOnEntry": true, - "presentation": { - "group": "3", - }, + "consoleTitle": "Celery light Console" + }, + { + "name": "Celery heavy", + "type": "debugpy", + "request": "launch", + "module": "celery", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "LOG_LEVEL": "INFO", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." }, - { - // Celery jobs launched through a single background script (legacy) - // Recommend using the "Celery (all)" compound launch instead. - "name": "Background Jobs", - "consoleName": "Background Jobs", - "type": "debugpy", - "request": "launch", - "program": "scripts/dev_run_background_jobs.py", - "cwd": "${workspaceFolder}/backend", - "envFile": "${workspaceFolder}/.vscode/.env", - "env": { - "LOG_DANSWER_MODEL_INTERACTIONS": "True", - "LOG_LEVEL": "DEBUG", - "PYTHONUNBUFFERED": "1", - "PYTHONPATH": "." - }, + "args": [ + "-A", + "onyx.background.celery.versioned_apps.heavy", + "worker", + "--pool=threads", + "--concurrency=4", + "--prefetch-multiplier=1", + "--loglevel=INFO", + "--hostname=heavy@%n", + "-Q", + "connector_pruning,connector_doc_permissions_sync,connector_external_group_sync" + ], + "presentation": { + "group": "2" }, - { - "name": "Install Python Requirements", - "type": "node", - "request": "launch", - "runtimeExecutable": "bash", - "runtimeArgs": [ - "-c", - "pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt" - ], - "cwd": "${workspaceFolder}", - "console": "integratedTerminal", - "presentation": { - "group": "3" - } + "consoleTitle": "Celery heavy Console" + }, + { + "name": "Celery indexing", + "type": "debugpy", + "request": "launch", + "module": "celery", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "ENABLE_MULTIPASS_INDEXING": "false", + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." }, + "args": [ + "-A", + "onyx.background.celery.versioned_apps.indexing", + "worker", + "--pool=threads", + "--concurrency=1", + "--prefetch-multiplier=1", + "--loglevel=INFO", + "--hostname=indexing@%n", + "-Q", + "connector_indexing" + ], + "presentation": { + "group": "2" + }, + "consoleTitle": "Celery indexing Console" + }, + { + "name": "Celery monitoring", + "type": "debugpy", + "request": "launch", + "module": "celery", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": {}, + "args": [ + "-A", + "onyx.background.celery.versioned_apps.monitoring", + "worker", + "--pool=solo", + "--concurrency=1", + "--prefetch-multiplier=1", + "--loglevel=INFO", + "--hostname=monitoring@%n", + "-Q", + "monitoring" + ], + "presentation": { + "group": "2" + }, + "consoleTitle": "Celery monitoring Console" + }, + { + "name": "Celery beat", + "type": "debugpy", + "request": "launch", + "module": "celery", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." + }, + "args": [ + "-A", + "onyx.background.celery.versioned_apps.beat", + "beat", + "--loglevel=INFO" + ], + "presentation": { + "group": "2" + }, + "consoleTitle": "Celery beat Console" + }, + { + "name": "Celery user files indexing", + "type": "debugpy", + "request": "launch", + "module": "celery", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." + }, + "args": [ + "-A", + "onyx.background.celery.versioned_apps.indexing", + "worker", + "--pool=threads", + "--concurrency=1", + "--prefetch-multiplier=1", + "--loglevel=INFO", + "--hostname=user_files_indexing@%n", + "-Q", + "user_files_indexing" + ], + "presentation": { + "group": "2" + }, + "consoleTitle": "Celery user files indexing Console" + }, + { + "name": "Pytest", + "consoleName": "Pytest", + "type": "debugpy", + "request": "launch", + "module": "pytest", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." + }, + "args": [ + "-v" + // Specify a sepcific module/test to run or provide nothing to run all tests + //"tests/unit/onyx/llm/answering/test_prune_and_merge.py" + ], + "presentation": { + "group": "2" + }, + "consoleTitle": "Pytest Console" + }, + { + // Dummy entry used to label the group + "name": "--- Tasks ---", + "type": "node", + "request": "launch", + "presentation": { + "group": "3", + "order": 0 + } + }, + { + "name": "Clear and Restart External Volumes and Containers", + "type": "node", + "request": "launch", + "runtimeExecutable": "bash", + "runtimeArgs": [ + "${workspaceFolder}/backend/scripts/restart_containers.sh" + ], + "cwd": "${workspaceFolder}", + "console": "integratedTerminal", + "stopOnEntry": true, + "presentation": { + "group": "3" + } + }, + { + // Celery jobs launched through a single background script (legacy) + // Recommend using the "Celery (all)" compound launch instead. + "name": "Background Jobs", + "consoleName": "Background Jobs", + "type": "debugpy", + "request": "launch", + "program": "scripts/dev_run_background_jobs.py", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "LOG_DANSWER_MODEL_INTERACTIONS": "True", + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." + } + }, + { + "name": "Install Python Requirements", + "type": "node", + "request": "launch", + "runtimeExecutable": "bash", + "runtimeArgs": [ + "-c", + "pip install -r backend/requirements/default.txt && pip install -r backend/requirements/dev.txt && pip install -r backend/requirements/ee.txt && pip install -r backend/requirements/model_server.txt" + ], + "cwd": "${workspaceFolder}", + "console": "integratedTerminal", + "presentation": { + "group": "3" + } + }, + { + "name": "Debug React Web App in Chrome", + "type": "chrome", + "request": "launch", + "url": "http://localhost:3000", + "webRoot": "${workspaceFolder}/web" + } ] -} + } + \ No newline at end of file diff --git a/backend/alembic/versions/8e1ac4f39a9f_enable_contextual_retrieval.py b/backend/alembic/versions/8e1ac4f39a9f_enable_contextual_retrieval.py index b3a23efafe..3f6126cae7 100644 --- a/backend/alembic/versions/8e1ac4f39a9f_enable_contextual_retrieval.py +++ b/backend/alembic/versions/8e1ac4f39a9f_enable_contextual_retrieval.py @@ -1,7 +1,7 @@ """enable contextual retrieval Revision ID: 8e1ac4f39a9f -Revises: 3781a5eb12cb +Revises: 9aadf32dfeb4 Create Date: 2024-12-20 13:29:09.918661 """ @@ -11,7 +11,7 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "8e1ac4f39a9f" -down_revision = "3781a5eb12cb" +down_revision = "9aadf32dfeb4" branch_labels = None depends_on = None diff --git a/backend/alembic/versions/9aadf32dfeb4_add_user_files.py b/backend/alembic/versions/9aadf32dfeb4_add_user_files.py new file mode 100644 index 0000000000..21f864a6aa --- /dev/null +++ b/backend/alembic/versions/9aadf32dfeb4_add_user_files.py @@ -0,0 +1,113 @@ +"""add user files + +Revision ID: 9aadf32dfeb4 +Revises: 3781a5eb12cb +Create Date: 2025-01-26 16:08:21.551022 + +""" +from alembic import op +import sqlalchemy as sa +import datetime + + +# revision identifiers, used by Alembic. +revision = "9aadf32dfeb4" +down_revision = "3781a5eb12cb" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Create user_folder table without parent_id + op.create_table( + "user_folder", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True), + sa.Column("name", sa.String(length=255), nullable=True), + sa.Column("description", sa.String(length=255), nullable=True), + sa.Column("display_priority", sa.Integer(), nullable=True, default=0), + sa.Column( + "created_at", sa.DateTime(timezone=True), server_default=sa.func.now() + ), + ) + + # Create user_file table with folder_id instead of parent_folder_id + op.create_table( + "user_file", + sa.Column("id", sa.Integer(), primary_key=True, autoincrement=True), + sa.Column("user_id", sa.UUID(), sa.ForeignKey("user.id"), nullable=True), + sa.Column( + "folder_id", + sa.Integer(), + sa.ForeignKey("user_folder.id"), + nullable=True, + ), + sa.Column("link_url", sa.String(), nullable=True), + sa.Column("token_count", sa.Integer(), nullable=True), + sa.Column("file_type", sa.String(), nullable=True), + sa.Column("file_id", sa.String(length=255), nullable=False), + sa.Column("document_id", sa.String(length=255), nullable=False), + sa.Column("name", sa.String(length=255), nullable=False), + sa.Column( + "created_at", + sa.DateTime(), + default=datetime.datetime.utcnow, + ), + sa.Column( + "cc_pair_id", + sa.Integer(), + sa.ForeignKey("connector_credential_pair.id"), + nullable=True, + unique=True, + ), + ) + + # Create persona__user_file table + op.create_table( + "persona__user_file", + sa.Column( + "persona_id", sa.Integer(), sa.ForeignKey("persona.id"), primary_key=True + ), + sa.Column( + "user_file_id", + sa.Integer(), + sa.ForeignKey("user_file.id"), + primary_key=True, + ), + ) + + # Create persona__user_folder table + op.create_table( + "persona__user_folder", + sa.Column( + "persona_id", sa.Integer(), sa.ForeignKey("persona.id"), primary_key=True + ), + sa.Column( + "user_folder_id", + sa.Integer(), + sa.ForeignKey("user_folder.id"), + primary_key=True, + ), + ) + + op.add_column( + "connector_credential_pair", + sa.Column("is_user_file", sa.Boolean(), nullable=True, default=False), + ) + + # Update existing records to have is_user_file=False instead of NULL + op.execute( + "UPDATE connector_credential_pair SET is_user_file = FALSE WHERE is_user_file IS NULL" + ) + + +def downgrade() -> None: + # Drop the persona__user_folder table + op.drop_table("persona__user_folder") + # Drop the persona__user_file table + op.drop_table("persona__user_file") + # Drop the user_file table + op.drop_table("user_file") + # Drop the user_folder table + op.drop_table("user_folder") + op.drop_column("connector_credential_pair", "is_user_file") diff --git a/backend/hello-vmlinux.bin b/backend/hello-vmlinux.bin new file mode 100644 index 0000000000..3f800f25c4 Binary files /dev/null and b/backend/hello-vmlinux.bin differ diff --git a/backend/onyx/access/access.py b/backend/onyx/access/access.py index 8c3c36416f..b45694ff9b 100644 --- a/backend/onyx/access/access.py +++ b/backend/onyx/access/access.py @@ -57,8 +57,9 @@ def _get_access_for_documents( db_session=db_session, document_ids=document_ids, ) - doc_access = { - document_id: DocumentAccess.build( + doc_access = {} + for document_id, user_emails, is_public in document_access_info: + doc_access[document_id] = DocumentAccess.build( user_emails=[email for email in user_emails if email], # MIT version will wipe all groups and external groups on update user_groups=[], @@ -66,8 +67,6 @@ def _get_access_for_documents( external_user_emails=[], external_user_group_ids=[], ) - for document_id, user_emails, is_public in document_access_info - } # Sometimes the document has not been indexed by the indexing job yet, in those cases # the document does not exist and so we use least permissive. Specifically the EE version diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py index 7b8df025a8..7433ff2100 100644 --- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py +++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py @@ -321,8 +321,10 @@ def dispatch_separated( sep: str = DISPATCH_SEP_CHAR, ) -> list[BaseMessage_Content]: num = 1 + accumulated_tokens = "" streamed_tokens: list[BaseMessage_Content] = [] for token in tokens: + accumulated_tokens += cast(str, token.content) content = cast(str, token.content) if sep in content: sub_question_parts = content.split(sep) diff --git a/backend/onyx/background/celery/apps/light.py b/backend/onyx/background/celery/apps/light.py index e8b7a3dd27..c4aca82574 100644 --- a/backend/onyx/background/celery/apps/light.py +++ b/backend/onyx/background/celery/apps/light.py @@ -111,6 +111,7 @@ celery_app.autodiscover_tasks( "onyx.background.celery.tasks.vespa", "onyx.background.celery.tasks.connector_deletion", "onyx.background.celery.tasks.doc_permission_syncing", + "onyx.background.celery.tasks.user_file_folder_sync", "onyx.background.celery.tasks.indexing", "onyx.background.celery.tasks.tenant_provisioning", ] diff --git a/backend/onyx/background/celery/apps/primary.py b/backend/onyx/background/celery/apps/primary.py index 6ebd80722c..df9e00a1ab 100644 --- a/backend/onyx/background/celery/apps/primary.py +++ b/backend/onyx/background/celery/apps/primary.py @@ -174,6 +174,9 @@ def on_worker_init(sender: Worker, **kwargs: Any) -> None: f"search_settings={attempt.search_settings_id}" ) logger.warning(failure_reason) + logger.exception( + f"Marking attempt {attempt.id} as canceled due to validation error 2" + ) mark_attempt_canceled(attempt.id, db_session, failure_reason) @@ -285,5 +288,6 @@ celery_app.autodiscover_tasks( "onyx.background.celery.tasks.shared", "onyx.background.celery.tasks.vespa", "onyx.background.celery.tasks.llm_model_update", + "onyx.background.celery.tasks.user_file_folder_sync", ] ) diff --git a/backend/onyx/background/celery/tasks/beat_schedule.py b/backend/onyx/background/celery/tasks/beat_schedule.py index a0ac83b386..8bcef470fc 100644 --- a/backend/onyx/background/celery/tasks/beat_schedule.py +++ b/backend/onyx/background/celery/tasks/beat_schedule.py @@ -64,6 +64,15 @@ beat_task_templates.extend( "expires": BEAT_EXPIRES_DEFAULT, }, }, + { + "name": "check-for-user-file-folder-sync", + "task": OnyxCeleryTask.CHECK_FOR_USER_FILE_FOLDER_SYNC, + "schedule": timedelta(seconds=30), + "options": { + "priority": OnyxCeleryPriority.MEDIUM, + "expires": BEAT_EXPIRES_DEFAULT, + }, + }, { "name": "check-for-pruning", "task": OnyxCeleryTask.CHECK_FOR_PRUNING, diff --git a/backend/onyx/background/celery/tasks/indexing/tasks.py b/backend/onyx/background/celery/tasks/indexing/tasks.py index 910efb97f7..68d1afcd7c 100644 --- a/backend/onyx/background/celery/tasks/indexing/tasks.py +++ b/backend/onyx/background/celery/tasks/indexing/tasks.py @@ -365,6 +365,7 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None: Occcasionally does some validation of existing state to clear up error conditions""" time_start = time.monotonic() + task_logger.warning("check_for_indexing - Starting") tasks_created = 0 locked = False @@ -433,7 +434,9 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None: lock_beat.reacquire() cc_pair_ids: list[int] = [] with get_session_with_current_tenant() as db_session: - cc_pairs = fetch_connector_credential_pairs(db_session) + cc_pairs = fetch_connector_credential_pairs( + db_session, include_user_files=True + ) for cc_pair_entry in cc_pairs: cc_pair_ids.append(cc_pair_entry.id) @@ -452,12 +455,18 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None: not search_settings_instance.status.is_current() and not search_settings_instance.background_reindex_enabled ): + task_logger.warning("SKIPPING DUE TO NON-LIVE SEARCH SETTINGS") + continue redis_connector_index = redis_connector.new_index( search_settings_instance.id ) if redis_connector_index.fenced: + task_logger.info( + f"check_for_indexing - Skipping fenced connector: " + f"cc_pair={cc_pair_id} search_settings={search_settings_instance.id}" + ) continue cc_pair = get_connector_credential_pair_from_id( @@ -465,6 +474,9 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None: cc_pair_id=cc_pair_id, ) if not cc_pair: + task_logger.warning( + f"check_for_indexing - CC pair not found: cc_pair={cc_pair_id}" + ) continue last_attempt = get_last_attempt_for_cc_pair( @@ -478,7 +490,20 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None: secondary_index_building=len(search_settings_list) > 1, db_session=db_session, ): + task_logger.info( + f"check_for_indexing - Not indexing cc_pair_id: {cc_pair_id} " + f"search_settings={search_settings_instance.id}, " + f"last_attempt={last_attempt.id if last_attempt else None}, " + f"secondary_index_building={len(search_settings_list) > 1}" + ) continue + else: + task_logger.info( + f"check_for_indexing - Will index cc_pair_id: {cc_pair_id} " + f"search_settings={search_settings_instance.id}, " + f"last_attempt={last_attempt.id if last_attempt else None}, " + f"secondary_index_building={len(search_settings_list) > 1}" + ) reindex = False if search_settings_instance.status.is_current(): @@ -517,6 +542,12 @@ def check_for_indexing(self: Task, *, tenant_id: str) -> int | None: f"search_settings={search_settings_instance.id}" ) tasks_created += 1 + else: + task_logger.info( + f"Failed to create indexing task: " + f"cc_pair={cc_pair.id} " + f"search_settings={search_settings_instance.id}" + ) lock_beat.reacquire() @@ -1149,6 +1180,9 @@ def connector_indexing_proxy_task( if result.status == IndexingWatchdogTerminalStatus.TERMINATED_BY_SIGNAL: try: with get_session_with_current_tenant() as db_session: + logger.exception( + f"Marking attempt {index_attempt_id} as canceled due to termination signal" + ) mark_attempt_canceled( index_attempt_id, db_session, diff --git a/backend/onyx/background/celery/tasks/indexing/utils.py b/backend/onyx/background/celery/tasks/indexing/utils.py index dbe425f938..ad6705a619 100644 --- a/backend/onyx/background/celery/tasks/indexing/utils.py +++ b/backend/onyx/background/celery/tasks/indexing/utils.py @@ -371,6 +371,7 @@ def should_index( # don't kick off indexing for `NOT_APPLICABLE` sources if connector.source == DocumentSource.NOT_APPLICABLE: + print(f"Not indexing cc_pair={cc_pair.id}: NOT_APPLICABLE source") return False # User can still manually create single indexing attempts via the UI for the @@ -380,6 +381,9 @@ def should_index( search_settings_instance.status == IndexModelStatus.PRESENT and secondary_index_building ): + print( + f"Not indexing cc_pair={cc_pair.id}: DISABLE_INDEX_UPDATE_ON_SWAP is True and secondary index building" + ) return False # When switching over models, always index at least once @@ -388,19 +392,31 @@ def should_index( # No new index if the last index attempt succeeded # Once is enough. The model will never be able to swap otherwise. if last_index.status == IndexingStatus.SUCCESS: + print( + f"Not indexing cc_pair={cc_pair.id}: FUTURE model with successful last index attempt={last_index.id}" + ) return False # No new index if the last index attempt is waiting to start if last_index.status == IndexingStatus.NOT_STARTED: + print( + f"Not indexing cc_pair={cc_pair.id}: FUTURE model with NOT_STARTED last index attempt={last_index.id}" + ) return False # No new index if the last index attempt is running if last_index.status == IndexingStatus.IN_PROGRESS: + print( + f"Not indexing cc_pair={cc_pair.id}: FUTURE model with IN_PROGRESS last index attempt={last_index.id}" + ) return False else: if ( connector.id == 0 or connector.source == DocumentSource.INGESTION_API ): # Ingestion API + print( + f"Not indexing cc_pair={cc_pair.id}: FUTURE model with Ingestion API source" + ) return False return True @@ -412,6 +428,9 @@ def should_index( or connector.id == 0 or connector.source == DocumentSource.INGESTION_API ): + print( + f"Not indexing cc_pair={cc_pair.id}: Connector is paused or is Ingestion API" + ) return False if search_settings_instance.status.is_current(): @@ -424,11 +443,16 @@ def should_index( return True if connector.refresh_freq is None: + print(f"Not indexing cc_pair={cc_pair.id}: refresh_freq is None") return False current_db_time = get_db_current_time(db_session) time_since_index = current_db_time - last_index.time_updated if time_since_index.total_seconds() < connector.refresh_freq: + print( + f"Not indexing cc_pair={cc_pair.id}: Last index attempt={last_index.id} " + f"too recent ({time_since_index.total_seconds()}s < {connector.refresh_freq}s)" + ) return False return True @@ -508,6 +532,13 @@ def try_creating_indexing_task( custom_task_id = redis_connector_index.generate_generator_task_id() + # Determine which queue to use based on whether this is a user file + queue = ( + OnyxCeleryQueues.USER_FILES_INDEXING + if cc_pair.is_user_file + else OnyxCeleryQueues.CONNECTOR_INDEXING + ) + # when the task is sent, we have yet to finish setting up the fence # therefore, the task must contain code that blocks until the fence is ready result = celery_app.send_task( @@ -518,7 +549,7 @@ def try_creating_indexing_task( search_settings_id=search_settings.id, tenant_id=tenant_id, ), - queue=OnyxCeleryQueues.CONNECTOR_INDEXING, + queue=queue, task_id=custom_task_id, priority=OnyxCeleryPriority.MEDIUM, ) diff --git a/backend/onyx/background/celery/tasks/shared/RetryDocumentIndex.py b/backend/onyx/background/celery/tasks/shared/RetryDocumentIndex.py index fc94807c1a..bef565f15c 100644 --- a/backend/onyx/background/celery/tasks/shared/RetryDocumentIndex.py +++ b/backend/onyx/background/celery/tasks/shared/RetryDocumentIndex.py @@ -6,6 +6,7 @@ from tenacity import wait_random_exponential from onyx.document_index.interfaces import DocumentIndex from onyx.document_index.interfaces import VespaDocumentFields +from onyx.document_index.interfaces import VespaDocumentUserFields class RetryDocumentIndex: @@ -52,11 +53,13 @@ class RetryDocumentIndex: *, tenant_id: str, chunk_count: int | None, - fields: VespaDocumentFields, + fields: VespaDocumentFields | None, + user_fields: VespaDocumentUserFields | None, ) -> int: return self.index.update_single( doc_id, tenant_id=tenant_id, chunk_count=chunk_count, fields=fields, + user_fields=user_fields, ) diff --git a/backend/onyx/background/celery/tasks/shared/tasks.py b/backend/onyx/background/celery/tasks/shared/tasks.py index 36cb88c3c2..7daea821bb 100644 --- a/backend/onyx/background/celery/tasks/shared/tasks.py +++ b/backend/onyx/background/celery/tasks/shared/tasks.py @@ -164,6 +164,7 @@ def document_by_cc_pair_cleanup_task( tenant_id=tenant_id, chunk_count=doc.chunk_count, fields=fields, + user_fields=None, ) # there are still other cc_pair references to the doc, so just resync to Vespa diff --git a/backend/onyx/background/celery/tasks/user_file_folder_sync/tasks.py b/backend/onyx/background/celery/tasks/user_file_folder_sync/tasks.py new file mode 100644 index 0000000000..0907fdf742 --- /dev/null +++ b/backend/onyx/background/celery/tasks/user_file_folder_sync/tasks.py @@ -0,0 +1,266 @@ +import time +from typing import List + +from celery import shared_task +from celery import Task +from celery.exceptions import SoftTimeLimitExceeded +from redis.lock import Lock as RedisLock +from sqlalchemy.orm import Session +from tenacity import RetryError + +from onyx.background.celery.apps.app_base import task_logger +from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex +from onyx.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT +from onyx.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT +from onyx.background.celery.tasks.shared.tasks import OnyxCeleryTaskCompletionStatus +from onyx.configs.app_configs import JOB_TIMEOUT +from onyx.configs.constants import CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT +from onyx.configs.constants import OnyxCeleryTask +from onyx.configs.constants import OnyxRedisLocks +from onyx.db.connector_credential_pair import ( + get_connector_credential_pairs_with_user_files, +) +from onyx.db.document import get_document +from onyx.db.engine import get_session_with_current_tenant +from onyx.db.models import ConnectorCredentialPair +from onyx.db.models import Document +from onyx.db.models import DocumentByConnectorCredentialPair +from onyx.db.search_settings import get_active_search_settings +from onyx.db.user_documents import fetch_user_files_for_documents +from onyx.db.user_documents import fetch_user_folders_for_documents +from onyx.document_index.factory import get_default_document_index +from onyx.document_index.interfaces import VespaDocumentUserFields +from onyx.httpx.httpx_pool import HttpxPool +from onyx.redis.redis_pool import get_redis_client +from onyx.utils.logger import setup_logger + +logger = setup_logger() + + +@shared_task( + name=OnyxCeleryTask.CHECK_FOR_USER_FILE_FOLDER_SYNC, + ignore_result=True, + soft_time_limit=JOB_TIMEOUT, + trail=False, + bind=True, +) +def check_for_user_file_folder_sync(self: Task, *, tenant_id: str) -> bool | None: + """Runs periodically to check for documents that need user file folder metadata updates. + This task fetches all connector credential pairs with user files, gets the documents + associated with them, and updates the user file and folder metadata in Vespa. + """ + + time_start = time.monotonic() + + r = get_redis_client() + + lock_beat: RedisLock = r.lock( + OnyxRedisLocks.CHECK_USER_FILE_FOLDER_SYNC_BEAT_LOCK, + timeout=CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT, + ) + + # these tasks should never overlap + if not lock_beat.acquire(blocking=False): + return None + + try: + with get_session_with_current_tenant() as db_session: + # Get all connector credential pairs that have user files + cc_pairs = get_connector_credential_pairs_with_user_files(db_session) + + if not cc_pairs: + task_logger.info("No connector credential pairs with user files found") + return True + + # Get all documents associated with these cc_pairs + document_ids = get_documents_for_cc_pairs(cc_pairs, db_session) + + if not document_ids: + task_logger.info( + "No documents found for connector credential pairs with user files" + ) + return True + + # Fetch current user file and folder IDs for these documents + doc_id_to_user_file_id = fetch_user_files_for_documents( + document_ids=document_ids, db_session=db_session + ) + doc_id_to_user_folder_id = fetch_user_folders_for_documents( + document_ids=document_ids, db_session=db_session + ) + + # Update Vespa metadata for each document + for doc_id in document_ids: + user_file_id = doc_id_to_user_file_id.get(doc_id) + user_folder_id = doc_id_to_user_folder_id.get(doc_id) + + if user_file_id is not None or user_folder_id is not None: + # Schedule a task to update the document metadata + update_user_file_folder_metadata.apply_async( + args=(doc_id,), # Use tuple instead of list for args + kwargs={ + "tenant_id": tenant_id, + "user_file_id": user_file_id, + "user_folder_id": user_folder_id, + }, + queue="vespa_metadata_sync", + ) + + task_logger.info( + f"Scheduled metadata updates for {len(document_ids)} documents. " + f"Elapsed time: {time.monotonic() - time_start:.2f}s" + ) + + return True + except Exception as e: + task_logger.exception(f"Error in check_for_user_file_folder_sync: {e}") + return False + finally: + lock_beat.release() + + +def get_documents_for_cc_pairs( + cc_pairs: List[ConnectorCredentialPair], db_session: Session +) -> List[str]: + """Get all document IDs associated with the given connector credential pairs.""" + if not cc_pairs: + return [] + + cc_pair_ids = [cc_pair.id for cc_pair in cc_pairs] + + # Query to get document IDs from DocumentByConnectorCredentialPair + # Note: DocumentByConnectorCredentialPair uses connector_id and credential_id, not cc_pair_id + doc_cc_pairs = ( + db_session.query(Document.id) + .join( + DocumentByConnectorCredentialPair, + Document.id == DocumentByConnectorCredentialPair.id, + ) + .filter( + db_session.query(ConnectorCredentialPair) + .filter( + ConnectorCredentialPair.id.in_(cc_pair_ids), + ConnectorCredentialPair.connector_id + == DocumentByConnectorCredentialPair.connector_id, + ConnectorCredentialPair.credential_id + == DocumentByConnectorCredentialPair.credential_id, + ) + .exists() + ) + .all() + ) + + return [doc_id for (doc_id,) in doc_cc_pairs] + + +@shared_task( + name=OnyxCeleryTask.UPDATE_USER_FILE_FOLDER_METADATA, + bind=True, + soft_time_limit=LIGHT_SOFT_TIME_LIMIT, + time_limit=LIGHT_TIME_LIMIT, + max_retries=3, +) +def update_user_file_folder_metadata( + self: Task, + document_id: str, + *, + tenant_id: str, + user_file_id: int | None, + user_folder_id: int | None, +) -> bool: + """Updates the user file and folder metadata for a document in Vespa.""" + start = time.monotonic() + completion_status = OnyxCeleryTaskCompletionStatus.UNDEFINED + + try: + with get_session_with_current_tenant() as db_session: + active_search_settings = get_active_search_settings(db_session) + doc_index = get_default_document_index( + search_settings=active_search_settings.primary, + secondary_search_settings=active_search_settings.secondary, + httpx_client=HttpxPool.get("vespa"), + ) + + retry_index = RetryDocumentIndex(doc_index) + + doc = get_document(document_id, db_session) + if not doc: + elapsed = time.monotonic() - start + task_logger.info( + f"doc={document_id} " + f"action=no_operation " + f"elapsed={elapsed:.2f}" + ) + completion_status = OnyxCeleryTaskCompletionStatus.SKIPPED + return False + + # Create user fields object with file and folder IDs + user_fields = VespaDocumentUserFields( + user_file_id=str(user_file_id) if user_file_id is not None else None, + user_folder_id=str(user_folder_id) + if user_folder_id is not None + else None, + ) + + # Update Vespa. OK if doc doesn't exist. Raises exception otherwise. + chunks_affected = retry_index.update_single( + document_id, + tenant_id=tenant_id, + chunk_count=doc.chunk_count, + fields=None, # We're only updating user fields + user_fields=user_fields, + ) + + elapsed = time.monotonic() - start + task_logger.info( + f"doc={document_id} " + f"action=user_file_folder_sync " + f"user_file_id={user_file_id} " + f"user_folder_id={user_folder_id} " + f"chunks={chunks_affected} " + f"elapsed={elapsed:.2f}" + ) + completion_status = OnyxCeleryTaskCompletionStatus.SUCCEEDED + return True + + except SoftTimeLimitExceeded: + task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}") + completion_status = OnyxCeleryTaskCompletionStatus.SOFT_TIME_LIMIT + except Exception as ex: + e: Exception | None = None + while True: + if isinstance(ex, RetryError): + task_logger.warning( + f"Tenacity retry failed: num_attempts={ex.last_attempt.attempt_number}" + ) + + # only set the inner exception if it is of type Exception + e_temp = ex.last_attempt.exception() + if isinstance(e_temp, Exception): + e = e_temp + else: + e = ex + + task_logger.exception( + f"update_user_file_folder_metadata exceptioned: doc={document_id}" + ) + + completion_status = OnyxCeleryTaskCompletionStatus.RETRYABLE_EXCEPTION + if ( + self.max_retries is not None + and self.request.retries >= self.max_retries + ): + completion_status = ( + OnyxCeleryTaskCompletionStatus.NON_RETRYABLE_EXCEPTION + ) + + # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64 + countdown = 2 ** (self.request.retries + 4) + self.retry(exc=e, countdown=countdown) # this will raise a celery exception + break # we won't hit this, but it looks weird not to have it + finally: + task_logger.info( + f"update_user_file_folder_metadata completed: status={completion_status.value} doc={document_id}" + ) + + return False diff --git a/backend/onyx/background/celery/tasks/vespa/tasks.py b/backend/onyx/background/celery/tasks/vespa/tasks.py index 72c0f64816..d1e3b13130 100644 --- a/backend/onyx/background/celery/tasks/vespa/tasks.py +++ b/backend/onyx/background/celery/tasks/vespa/tasks.py @@ -573,6 +573,7 @@ def vespa_metadata_sync_task(self: Task, document_id: str, *, tenant_id: str) -> tenant_id=tenant_id, chunk_count=doc.chunk_count, fields=fields, + user_fields=None, ) # update db last. Worst case = we crash right before this and diff --git a/backend/onyx/background/indexing/run_indexing.py b/backend/onyx/background/indexing/run_indexing.py index ded71d332b..aab03640e6 100644 --- a/backend/onyx/background/indexing/run_indexing.py +++ b/backend/onyx/background/indexing/run_indexing.py @@ -274,7 +274,6 @@ def _run_indexing( "Search settings must be set for indexing. This should not be possible." ) - # search_settings = index_attempt_start.search_settings db_connector = index_attempt_start.connector_credential_pair.connector db_credential = index_attempt_start.connector_credential_pair.credential ctx = RunIndexingContext( @@ -638,6 +637,9 @@ def _run_indexing( # and mark the CCPair as invalid. This prevents the connector from being # used in the future until the credentials are updated. with get_session_with_current_tenant() as db_session_temp: + logger.exception( + f"Marking attempt {index_attempt_id} as canceled due to validation error." + ) mark_attempt_canceled( index_attempt_id, db_session_temp, @@ -684,6 +686,9 @@ def _run_indexing( elif isinstance(e, ConnectorStopSignal): with get_session_with_current_tenant() as db_session_temp: + logger.exception( + f"Marking attempt {index_attempt_id} as canceled due to stop signal." + ) mark_attempt_canceled( index_attempt_id, db_session_temp, @@ -746,6 +751,7 @@ def _run_indexing( f"Connector succeeded: " f"docs={document_count} chunks={chunk_count} elapsed={elapsed_time:.2f}s" ) + else: mark_attempt_partially_succeeded(index_attempt_id, db_session_temp) logger.info( diff --git a/backend/onyx/chat/models.py b/backend/onyx/chat/models.py index d86d504f46..582195240f 100644 --- a/backend/onyx/chat/models.py +++ b/backend/onyx/chat/models.py @@ -127,6 +127,10 @@ class StreamStopInfo(SubQuestionIdentifier): return data +class UserKnowledgeFilePacket(BaseModel): + user_files: list[FileDescriptor] + + class LLMRelevanceFilterResponse(BaseModel): llm_selected_doc_indices: list[int] diff --git a/backend/onyx/chat/process_message.py b/backend/onyx/chat/process_message.py index 4d1b5512bc..7762c67296 100644 --- a/backend/onyx/chat/process_message.py +++ b/backend/onyx/chat/process_message.py @@ -36,6 +36,7 @@ from onyx.chat.models import StreamingError from onyx.chat.models import StreamStopInfo from onyx.chat.models import StreamStopReason from onyx.chat.models import SubQuestionKey +from onyx.chat.models import UserKnowledgeFilePacket from onyx.chat.prompt_builder.answer_prompt_builder import AnswerPromptBuilder from onyx.chat.prompt_builder.answer_prompt_builder import default_build_system_message from onyx.chat.prompt_builder.answer_prompt_builder import default_build_user_message @@ -51,6 +52,7 @@ from onyx.context.search.enums import LLMEvaluationType from onyx.context.search.enums import OptionalSearchSetting from onyx.context.search.enums import QueryFlow from onyx.context.search.enums import SearchType +from onyx.context.search.models import BaseFilters from onyx.context.search.models import InferenceSection from onyx.context.search.models import RetrievalDetails from onyx.context.search.models import SearchRequest @@ -64,6 +66,7 @@ from onyx.context.search.utils import relevant_sections_to_indices from onyx.db.chat import attach_files_to_chat_message from onyx.db.chat import create_db_search_doc from onyx.db.chat import create_new_chat_message +from onyx.db.chat import create_search_doc_from_user_file from onyx.db.chat import get_chat_message from onyx.db.chat import get_chat_session_by_id from onyx.db.chat import get_db_search_doc_by_id @@ -80,12 +83,16 @@ from onyx.db.milestone import update_user_assistant_milestone from onyx.db.models import SearchDoc as DbSearchDoc from onyx.db.models import ToolCall from onyx.db.models import User +from onyx.db.models import UserFile from onyx.db.persona import get_persona_by_id from onyx.db.search_settings import get_current_search_settings from onyx.document_index.factory import get_default_document_index from onyx.file_store.models import ChatFileType from onyx.file_store.models import FileDescriptor +from onyx.file_store.models import InMemoryChatFile from onyx.file_store.utils import load_all_chat_files +from onyx.file_store.utils import load_all_user_file_files +from onyx.file_store.utils import load_all_user_files from onyx.file_store.utils import save_files from onyx.llm.exceptions import GenAIDisabledException from onyx.llm.factory import get_llms_for_persona @@ -98,6 +105,7 @@ from onyx.server.query_and_chat.models import ChatMessageDetail from onyx.server.query_and_chat.models import CreateChatMessageRequest from onyx.server.utils import get_json_line from onyx.tools.force import ForceUseTool +from onyx.tools.models import SearchToolOverrideKwargs from onyx.tools.models import ToolResponse from onyx.tools.tool import Tool from onyx.tools.tool_constructor import construct_tools @@ -175,11 +183,14 @@ def _handle_search_tool_response_summary( db_session: Session, selected_search_docs: list[DbSearchDoc] | None, dedupe_docs: bool = False, + user_files: list[UserFile] | None = None, + loaded_user_files: list[InMemoryChatFile] | None = None, ) -> tuple[QADocsResponse, list[DbSearchDoc], list[int] | None]: response_sumary = cast(SearchResponseSummary, packet.response) is_extended = isinstance(packet, ExtendedToolResponse) dropped_inds = None + if not selected_search_docs: top_docs = chunks_or_sections_to_search_docs(response_sumary.top_sections) @@ -193,9 +204,31 @@ def _handle_search_tool_response_summary( create_db_search_doc(server_search_doc=doc, db_session=db_session) for doc in deduped_docs ] + else: reference_db_search_docs = selected_search_docs + doc_ids = {doc.id for doc in reference_db_search_docs} + if user_files is not None: + for user_file in user_files: + if user_file.id not in doc_ids: + associated_chat_file = None + if loaded_user_files is not None: + associated_chat_file = next( + ( + file + for file in loaded_user_files + if file.file_id == str(user_file.file_id) + ), + None, + ) + # Use create_search_doc_from_user_file to properly add the document to the database + if associated_chat_file is not None: + db_doc = create_search_doc_from_user_file( + user_file, associated_chat_file, db_session + ) + reference_db_search_docs.append(db_doc) + response_docs = [ translate_db_search_doc_to_server_search_doc(db_search_doc) for db_search_doc in reference_db_search_docs @@ -253,7 +286,10 @@ def _handle_internet_search_tool_response_summary( def _get_force_search_settings( - new_msg_req: CreateChatMessageRequest, tools: list[Tool] + new_msg_req: CreateChatMessageRequest, + tools: list[Tool], + user_file_ids: list[int], + user_folder_ids: list[int], ) -> ForceUseTool: internet_search_available = any( isinstance(tool, InternetSearchTool) for tool in tools @@ -261,8 +297,11 @@ def _get_force_search_settings( search_tool_available = any(isinstance(tool, SearchTool) for tool in tools) if not internet_search_available and not search_tool_available: - # Does not matter much which tool is set here as force is false and neither tool is available - return ForceUseTool(force_use=False, tool_name=SearchTool._NAME) + if new_msg_req.force_user_file_search: + return ForceUseTool(force_use=True, tool_name=SearchTool._NAME) + else: + # Does not matter much which tool is set here as force is false and neither tool is available + return ForceUseTool(force_use=False, tool_name=SearchTool._NAME) tool_name = SearchTool._NAME if search_tool_available else InternetSearchTool._NAME # Currently, the internet search tool does not support query override @@ -272,12 +311,25 @@ def _get_force_search_settings( else None ) + # Create override_kwargs for the search tool if user_file_ids are provided + override_kwargs = None + if (user_file_ids or user_folder_ids) and tool_name == SearchTool._NAME: + override_kwargs = SearchToolOverrideKwargs( + force_no_rerank=False, + alternate_db_session=None, + retrieved_sections_callback=None, + skip_query_analysis=False, + user_file_ids=user_file_ids, + user_folder_ids=user_folder_ids, + ) + if new_msg_req.file_descriptors: # If user has uploaded files they're using, don't run any of the search tools return ForceUseTool(force_use=False, tool_name=tool_name) should_force_search = any( [ + new_msg_req.force_user_file_search, new_msg_req.retrieval_options and new_msg_req.retrieval_options.run_search == OptionalSearchSetting.ALWAYS, @@ -290,9 +342,17 @@ def _get_force_search_settings( if should_force_search: # If we are using selected docs, just put something here so the Tool doesn't need to build its own args via an LLM call args = {"query": new_msg_req.message} if new_msg_req.search_doc_ids else args - return ForceUseTool(force_use=True, tool_name=tool_name, args=args) - return ForceUseTool(force_use=False, tool_name=tool_name, args=args) + return ForceUseTool( + force_use=True, + tool_name=tool_name, + args=args, + override_kwargs=override_kwargs, + ) + + return ForceUseTool( + force_use=False, tool_name=tool_name, args=args, override_kwargs=override_kwargs + ) ChatPacket = ( @@ -311,6 +371,7 @@ ChatPacket = ( | AgenticMessageResponseIDInfo | StreamStopInfo | AgentSearchPacket + | UserKnowledgeFilePacket ) ChatPacketStream = Iterator[ChatPacket] @@ -356,6 +417,10 @@ def stream_chat_message_objects( llm: LLM try: + # Move these variables inside the try block + file_id_to_user_file = {} + ordered_user_files = None + user_id = user.id if user is not None else None chat_session = get_chat_session_by_id( @@ -535,6 +600,70 @@ def stream_chat_message_objects( ) req_file_ids = [f["id"] for f in new_msg_req.file_descriptors] latest_query_files = [file for file in files if file.file_id in req_file_ids] + user_file_ids = new_msg_req.user_file_ids or [] + user_folder_ids = new_msg_req.user_folder_ids or [] + + if persona.user_files: + for file in persona.user_files: + user_file_ids.append(file.id) + if persona.user_folders: + for folder in persona.user_folders: + user_folder_ids.append(folder.id) + + # Initialize flag for user file search + use_search_for_user_files = False + + user_files: list[InMemoryChatFile] | None = None + search_for_ordering_only = False + user_file_files: list[UserFile] | None = None + if user_file_ids or user_folder_ids: + # Load user files + user_files = load_all_user_files( + user_file_ids or [], + user_folder_ids or [], + db_session, + ) + user_file_files = load_all_user_file_files( + user_file_ids or [], + user_folder_ids or [], + db_session, + ) + # Store mapping of file_id to file for later reordering + if user_files: + file_id_to_user_file = {file.file_id: file for file in user_files} + + # Calculate token count for the files + from onyx.db.user_documents import calculate_user_files_token_count + from onyx.chat.prompt_builder.citations_prompt import ( + compute_max_document_tokens_for_persona, + ) + + total_tokens = calculate_user_files_token_count( + user_file_ids or [], + user_folder_ids or [], + db_session, + ) + + # Calculate available tokens for documents based on prompt, user input, etc. + available_tokens = compute_max_document_tokens_for_persona( + db_session=db_session, + persona=persona, + actual_user_input=message_text, # Use the actual user message + ) + + logger.debug( + f"Total file tokens: {total_tokens}, Available tokens: {available_tokens}" + ) + + # ALWAYS use search for user files, but track if we need it for context or just ordering + use_search_for_user_files = True + # If files are small enough for context, we'll just use search for ordering + search_for_ordering_only = total_tokens <= available_tokens + + if search_for_ordering_only: + # Add original user files to context since they fit + if user_files: + latest_query_files.extend(user_files) if user_message: attach_files_to_chat_message( @@ -677,8 +806,10 @@ def stream_chat_message_objects( prompt_config=prompt_config, db_session=db_session, user=user, + user_knowledge_present=bool(user_files or user_folder_ids), llm=llm, fast_llm=fast_llm, + use_file_search=new_msg_req.force_user_file_search, search_tool_config=SearchToolConfig( answer_style_config=answer_style_config, document_pruning_config=document_pruning_config, @@ -708,17 +839,138 @@ def stream_chat_message_objects( for tool_list in tool_dict.values(): tools.extend(tool_list) + force_use_tool = _get_force_search_settings( + new_msg_req, tools, user_file_ids, user_folder_ids + ) + + # Set force_use if user files exceed token limit + if use_search_for_user_files: + try: + # Check if search tool is available in the tools list + search_tool_available = any( + isinstance(tool, SearchTool) for tool in tools + ) + + # If no search tool is available, add one + if not search_tool_available: + logger.info("No search tool available, creating one for user files") + # Create a basic search tool config + search_tool_config = SearchToolConfig( + answer_style_config=answer_style_config, + document_pruning_config=document_pruning_config, + retrieval_options=retrieval_options or RetrievalDetails(), + ) + + # Create and add the search tool + search_tool = SearchTool( + db_session=db_session, + user=user, + persona=persona, + retrieval_options=search_tool_config.retrieval_options, + prompt_config=prompt_config, + llm=llm, + fast_llm=fast_llm, + pruning_config=search_tool_config.document_pruning_config, + answer_style_config=search_tool_config.answer_style_config, + evaluation_type=( + LLMEvaluationType.BASIC + if persona.llm_relevance_filter + else LLMEvaluationType.SKIP + ), + bypass_acl=bypass_acl, + ) + + # Add the search tool to the tools list + tools.append(search_tool) + + logger.info( + "Added search tool for user files that exceed token limit" + ) + + # Now set force_use_tool.force_use to True + force_use_tool.force_use = True + force_use_tool.tool_name = SearchTool._NAME + + # Set query argument if not already set + if not force_use_tool.args: + force_use_tool.args = {"query": final_msg.message} + + # Pass the user file IDs to the search tool + if user_file_ids or user_folder_ids: + # Create a BaseFilters object with user_file_ids + if not retrieval_options: + retrieval_options = RetrievalDetails() + if not retrieval_options.filters: + retrieval_options.filters = BaseFilters() + + # Set user file and folder IDs in the filters + retrieval_options.filters.user_file_ids = user_file_ids + retrieval_options.filters.user_folder_ids = user_folder_ids + + # Create override kwargs for the search tool + override_kwargs = SearchToolOverrideKwargs( + force_no_rerank=search_for_ordering_only, # Skip reranking for ordering-only + alternate_db_session=None, + retrieved_sections_callback=None, + skip_query_analysis=search_for_ordering_only, # Skip query analysis for ordering-only + user_file_ids=user_file_ids, + user_folder_ids=user_folder_ids, + ordering_only=search_for_ordering_only, # Set ordering_only flag for fast path + ) + + # Set the override kwargs in the force_use_tool + force_use_tool.override_kwargs = override_kwargs + + if search_for_ordering_only: + logger.info( + "Fast path: Configured search tool with optimized settings for ordering-only" + ) + logger.info( + "Fast path: Skipping reranking and query analysis for ordering-only mode" + ) + logger.info( + f"Using {len(user_file_ids or [])} files and {len(user_folder_ids or [])} folders" + ) + else: + logger.info( + "Configured search tool to use ", + f"{len(user_file_ids or [])} files and {len(user_folder_ids or [])} folders", + ) + except Exception as e: + logger.exception( + f"Error configuring search tool for user files: {str(e)}" + ) + use_search_for_user_files = False + # TODO: unify message history with single message history message_history = [ PreviousMessage.from_chat_message(msg, files) for msg in history_msgs ] + if not use_search_for_user_files and user_files: + yield UserKnowledgeFilePacket( + user_files=[ + FileDescriptor( + id=str(file.file_id), type=ChatFileType.USER_KNOWLEDGE + ) + for file in user_files + ] + ) + + if search_for_ordering_only: + logger.info( + "Performance: Forcing LLMEvaluationType.SKIP to prevent chunk evaluation for ordering-only search" + ) search_request = SearchRequest( query=final_msg.message, evaluation_type=( - LLMEvaluationType.BASIC - if persona.llm_relevance_filter - else LLMEvaluationType.SKIP + LLMEvaluationType.SKIP + if search_for_ordering_only + else ( + LLMEvaluationType.BASIC + if persona.llm_relevance_filter + else LLMEvaluationType.SKIP + ) ), human_selected_filters=( retrieval_options.filters if retrieval_options else None @@ -737,7 +989,6 @@ def stream_chat_message_objects( ), ) - force_use_tool = _get_force_search_settings(new_msg_req, tools) prompt_builder = AnswerPromptBuilder( user_message=default_build_user_message( user_query=final_msg.message, @@ -806,8 +1057,22 @@ def stream_chat_message_objects( info = info_by_subq[ SubQuestionKey(level=level, question_num=level_question_num) ] + + # Skip LLM relevance processing entirely for ordering-only mode + if search_for_ordering_only and packet.id == SECTION_RELEVANCE_LIST_ID: + logger.info( + "Fast path: Completely bypassing section relevance processing for ordering-only mode" + ) + # Skip this packet entirely since it would trigger LLM processing + continue + # TODO: don't need to dedupe here when we do it in agent flow if packet.id == SEARCH_RESPONSE_SUMMARY_ID: + if search_for_ordering_only: + logger.info( + "Fast path: Skipping document deduplication for ordering-only mode" + ) + ( info.qa_docs_response, info.reference_db_search_docs, @@ -817,16 +1082,91 @@ def stream_chat_message_objects( db_session=db_session, selected_search_docs=selected_db_search_docs, # Deduping happens at the last step to avoid harming quality by dropping content early on + # Skip deduping completely for ordering-only mode to save time dedupe_docs=( - retrieval_options.dedupe_docs - if retrieval_options - else False + False + if search_for_ordering_only + else ( + retrieval_options.dedupe_docs + if retrieval_options + else False + ) ), + user_files=user_file_files if search_for_ordering_only else [], + loaded_user_files=user_files + if search_for_ordering_only + else [], ) + + # If we're using search just for ordering user files + if ( + search_for_ordering_only + and user_files + and info.qa_docs_response + ): + logger.info( + f"ORDERING: Processing search results for ordering {len(user_files)} user files" + ) + import time + + ordering_start = time.time() + + # Extract document order from search results + doc_order = [] + for doc in info.qa_docs_response.top_documents: + doc_id = doc.document_id + if str(doc_id).startswith("USER_FILE_CONNECTOR__"): + file_id = doc_id.replace("USER_FILE_CONNECTOR__", "") + if file_id in file_id_to_user_file: + doc_order.append(file_id) + + logger.info( + f"ORDERING: Found {len(doc_order)} files from search results" + ) + + # Add any files that weren't in search results at the end + missing_files = [ + f_id + for f_id in file_id_to_user_file.keys() + if f_id not in doc_order + ] + + missing_files.extend(doc_order) + doc_order = missing_files + + logger.info( + f"ORDERING: Added {len(missing_files)} missing files to the end" + ) + + # Reorder user files based on search results + ordered_user_files = [ + file_id_to_user_file[f_id] + for f_id in doc_order + if f_id in file_id_to_user_file + ] + + time.time() - ordering_start + + yield UserKnowledgeFilePacket( + user_files=[ + FileDescriptor( + id=str(file.file_id), + type=ChatFileType.USER_KNOWLEDGE, + ) + for file in ordered_user_files + ] + ) + yield info.qa_docs_response elif packet.id == SECTION_RELEVANCE_LIST_ID: relevance_sections = packet.response + if search_for_ordering_only: + logger.info( + "Performance: Skipping relevance filtering for ordering-only mode" + ) + continue + if info.reference_db_search_docs is None: logger.warning( "No reference docs found for relevance filtering" @@ -936,7 +1276,7 @@ def stream_chat_message_objects( ] info.tool_result = packet yield cast(ChatPacket, packet) - logger.debug("Reached end of stream") + except ValueError as e: logger.exception("Failed to process chat message.") @@ -1018,10 +1358,16 @@ def stream_chat_message_objects( error=ERROR_TYPE_CANCELLED if answer.is_cancelled() else None, tool_call=( ToolCall( - tool_id=tool_name_to_tool_id[info.tool_result.tool_name], - tool_name=info.tool_result.tool_name, - tool_arguments=info.tool_result.tool_args, - tool_result=info.tool_result.tool_result, + tool_id=tool_name_to_tool_id.get(info.tool_result.tool_name, 0) + if info.tool_result + else None, + tool_name=info.tool_result.tool_name if info.tool_result else None, + tool_arguments=info.tool_result.tool_args + if info.tool_result + else None, + tool_result=info.tool_result.tool_result + if info.tool_result + else None, ) if info.tool_result else None diff --git a/backend/onyx/chat/prompt_builder/utils.py b/backend/onyx/chat/prompt_builder/utils.py index 6b2a003078..8527de3b23 100644 --- a/backend/onyx/chat/prompt_builder/utils.py +++ b/backend/onyx/chat/prompt_builder/utils.py @@ -19,6 +19,7 @@ def translate_onyx_msg_to_langchain( # attached. Just ignore them for now. if not isinstance(msg, ChatMessage): files = msg.files + content = build_content_with_imgs( msg.message, files, message_type=msg.message_type, exclude_images=exclude_images ) diff --git a/backend/onyx/chat/tool_handling/tool_response_handler.py b/backend/onyx/chat/tool_handling/tool_response_handler.py index 21f4830aab..cab5d9e08f 100644 --- a/backend/onyx/chat/tool_handling/tool_response_handler.py +++ b/backend/onyx/chat/tool_handling/tool_response_handler.py @@ -180,6 +180,10 @@ def get_tool_call_for_non_tool_calling_llm_impl( if tool_args is None: raise RuntimeError(f"Tool '{tool.name}' did not return args") + # If we have override_kwargs, add them to the tool_args + if force_use_tool.override_kwargs is not None: + tool_args["override_kwargs"] = force_use_tool.override_kwargs + return (tool, tool_args) else: tool_options = check_which_tools_should_run_for_non_tool_calling_llm( diff --git a/backend/onyx/configs/app_configs.py b/backend/onyx/configs/app_configs.py index bb975eef5b..a293df6248 100644 --- a/backend/onyx/configs/app_configs.py +++ b/backend/onyx/configs/app_configs.py @@ -170,7 +170,7 @@ POSTGRES_USER = os.environ.get("POSTGRES_USER") or "postgres" POSTGRES_PASSWORD = urllib.parse.quote_plus( os.environ.get("POSTGRES_PASSWORD") or "password" ) -POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "localhost" +POSTGRES_HOST = os.environ.get("POSTGRES_HOST") or "127.0.0.1" POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432" POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres" AWS_REGION_NAME = os.environ.get("AWS_REGION_NAME") or "us-east-2" diff --git a/backend/onyx/configs/chat_configs.py b/backend/onyx/configs/chat_configs.py index d99261294b..5e42455458 100644 --- a/backend/onyx/configs/chat_configs.py +++ b/backend/onyx/configs/chat_configs.py @@ -3,7 +3,7 @@ import os INPUT_PROMPT_YAML = "./onyx/seeding/input_prompts.yaml" PROMPTS_YAML = "./onyx/seeding/prompts.yaml" PERSONAS_YAML = "./onyx/seeding/personas.yaml" - +USER_FOLDERS_YAML = "./onyx/seeding/user_folders.yaml" NUM_RETURNED_HITS = 50 # Used for LLM filtering and reranking # We want this to be approximately the number of results we want to show on the first page diff --git a/backend/onyx/configs/constants.py b/backend/onyx/configs/constants.py index 24f8248ef4..e6a81de6ab 100644 --- a/backend/onyx/configs/constants.py +++ b/backend/onyx/configs/constants.py @@ -102,6 +102,8 @@ CELERY_GENERIC_BEAT_LOCK_TIMEOUT = 120 CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT = 120 +CELERY_USER_FILE_FOLDER_SYNC_BEAT_LOCK_TIMEOUT = 120 + CELERY_PRIMARY_WORKER_LOCK_TIMEOUT = 120 @@ -269,6 +271,7 @@ class FileOrigin(str, Enum): CONNECTOR = "connector" GENERATED_REPORT = "generated_report" INDEXING_CHECKPOINT = "indexing_checkpoint" + PLAINTEXT_CACHE = "plaintext_cache" OTHER = "other" @@ -309,6 +312,7 @@ class OnyxCeleryQueues: # Indexing queue CONNECTOR_INDEXING = "connector_indexing" + USER_FILES_INDEXING = "user_files_indexing" # Monitoring queue MONITORING = "monitoring" @@ -327,6 +331,7 @@ class OnyxRedisLocks: CHECK_CONNECTOR_EXTERNAL_GROUP_SYNC_BEAT_LOCK = ( "da_lock:check_connector_external_group_sync_beat" ) + CHECK_USER_FILE_FOLDER_SYNC_BEAT_LOCK = "da_lock:check_user_file_folder_sync_beat" MONITOR_BACKGROUND_PROCESSES_LOCK = "da_lock:monitor_background_processes" CHECK_AVAILABLE_TENANTS_LOCK = "da_lock:check_available_tenants" PRE_PROVISION_TENANT_LOCK = "da_lock:pre_provision_tenant" @@ -397,6 +402,7 @@ class OnyxCeleryTask: # Tenant pre-provisioning PRE_PROVISION_TENANT = f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_pre_provision_tenant" + UPDATE_USER_FILE_FOLDER_METADATA = "update_user_file_folder_metadata" CHECK_FOR_CONNECTOR_DELETION = "check_for_connector_deletion_task" CHECK_FOR_VESPA_SYNC_TASK = "check_for_vespa_sync_task" @@ -405,6 +411,7 @@ class OnyxCeleryTask: CHECK_FOR_DOC_PERMISSIONS_SYNC = "check_for_doc_permissions_sync" CHECK_FOR_EXTERNAL_GROUP_SYNC = "check_for_external_group_sync" CHECK_FOR_LLM_MODEL_UPDATE = "check_for_llm_model_update" + CHECK_FOR_USER_FILE_FOLDER_SYNC = "check_for_user_file_folder_sync" # Connector checkpoint cleanup CHECK_FOR_CHECKPOINT_CLEANUP = "check_for_checkpoint_cleanup" diff --git a/backend/onyx/connectors/github/connector.py b/backend/onyx/connectors/github/connector.py index 757d3d04a0..2193bb80df 100644 --- a/backend/onyx/connectors/github/connector.py +++ b/backend/onyx/connectors/github/connector.py @@ -276,7 +276,26 @@ class GithubConnector(CheckpointConnector[GithubConnectorCheckpoint]): return checkpoint assert checkpoint.cached_repo is not None, "No repo saved in checkpoint" - repo = checkpoint.cached_repo.to_Repository(self.github_client.requester) + + # Try to access the requester - different PyGithub versions may use different attribute names + try: + # Try direct access to a known attribute name first + if hasattr(self.github_client, "_requester"): + requester = self.github_client._requester + elif hasattr(self.github_client, "_Github__requester"): + requester = self.github_client._Github__requester + else: + # If we can't find the requester attribute, we need to fall back to recreating the repo + raise AttributeError("Could not find requester attribute") + + repo = checkpoint.cached_repo.to_Repository(requester) + except Exception as e: + # If all else fails, re-fetch the repo directly + logger.warning( + f"Failed to deserialize repository: {e}. Attempting to re-fetch." + ) + repo_id = checkpoint.cached_repo.id + repo = self.github_client.get_repo(repo_id) if self.include_prs and checkpoint.stage == GithubConnectorStage.PRS: logger.info(f"Fetching PRs for repo: {repo.name}") diff --git a/backend/onyx/context/search/models.py b/backend/onyx/context/search/models.py index 3ce3dacae8..fd7a7af29d 100644 --- a/backend/onyx/context/search/models.py +++ b/backend/onyx/context/search/models.py @@ -105,6 +105,8 @@ class BaseFilters(BaseModel): document_set: list[str] | None = None time_cutoff: datetime | None = None tags: list[Tag] | None = None + user_file_ids: list[int] | None = None + user_folder_ids: list[int] | None = None class IndexFilters(BaseFilters): diff --git a/backend/onyx/context/search/pipeline.py b/backend/onyx/context/search/pipeline.py index 3c7043994e..f387642d80 100644 --- a/backend/onyx/context/search/pipeline.py +++ b/backend/onyx/context/search/pipeline.py @@ -158,6 +158,47 @@ class SearchPipeline: return cast(list[InferenceChunk], self._retrieved_chunks) + def get_ordering_only_chunks( + self, + query: str, + user_file_ids: list[int] | None = None, + user_folder_ids: list[int] | None = None, + ) -> list[InferenceChunk]: + """Optimized method that only retrieves chunks for ordering purposes. + Skips all extra processing and uses minimal configuration to speed up retrieval. + """ + logger.info("Fast path: Using optimized chunk retrieval for ordering-only mode") + + # Create minimal filters with just user file/folder IDs + filters = IndexFilters( + user_file_ids=user_file_ids or [], + user_folder_ids=user_folder_ids or [], + access_control_list=None, + ) + + # Use a simplified query that skips all unnecessary processing + minimal_query = SearchQuery( + query=query, + search_type=SearchType.SEMANTIC, + filters=filters, + # Set minimal options needed for retrieval + evaluation_type=LLMEvaluationType.SKIP, + recency_bias_multiplier=1.0, + chunks_above=0, # No need for surrounding context + chunks_below=0, # No need for surrounding context + processed_keywords=[], # Empty list instead of None + rerank_settings=None, + hybrid_alpha=0.0, + max_llm_filter_sections=0, + ) + + # Retrieve chunks using the minimal configuration + return retrieve_chunks( + query=minimal_query, + document_index=self.document_index, + db_session=self.db_session, + ) + @log_function_time(print_only=True) def _get_sections(self) -> list[InferenceSection]: """Returns an expanded section from each of the chunks. @@ -391,6 +432,10 @@ class SearchPipeline: self.search_query.evaluation_type == LLMEvaluationType.SKIP or DISABLE_LLM_DOC_RELEVANCE ): + if self.search_query.evaluation_type == LLMEvaluationType.SKIP: + logger.info( + "Fast path: Skipping section relevance evaluation for ordering-only mode" + ) return None if self.search_query.evaluation_type == LLMEvaluationType.UNSPECIFIED: diff --git a/backend/onyx/context/search/postprocessing/postprocessing.py b/backend/onyx/context/search/postprocessing/postprocessing.py index cfa07ef020..7043161f7b 100644 --- a/backend/onyx/context/search/postprocessing/postprocessing.py +++ b/backend/onyx/context/search/postprocessing/postprocessing.py @@ -11,6 +11,7 @@ from langchain_core.messages import SystemMessage from onyx.chat.models import SectionRelevancePiece from onyx.configs.app_configs import BLURB_SIZE from onyx.configs.app_configs import IMAGE_ANALYSIS_SYSTEM_PROMPT +from onyx.configs.chat_configs import DISABLE_LLM_DOC_RELEVANCE from onyx.configs.constants import RETURN_SEPARATOR from onyx.configs.llm_configs import get_search_time_image_analysis_enabled from onyx.configs.model_configs import CROSS_ENCODER_RANGE_MAX @@ -366,6 +367,21 @@ def filter_sections( Returns a list of the unique chunk IDs that were marked as relevant """ + # Log evaluation type to help with debugging + logger.info(f"filter_sections called with evaluation_type={query.evaluation_type}") + + # Fast path: immediately return empty list for SKIP evaluation type (ordering-only mode) + if query.evaluation_type == LLMEvaluationType.SKIP: + return [] + + # Additional safeguard: Log a warning if this function is ever called with SKIP evaluation type + # This should never happen if our fast paths are working correctly + if query.evaluation_type == LLMEvaluationType.SKIP: + logger.warning( + "WARNING: filter_sections called with SKIP evaluation_type. This should never happen!" + ) + return [] + sections_to_filter = sections_to_filter[: query.max_llm_filter_sections] contents = [ @@ -398,6 +414,16 @@ def search_postprocessing( llm: LLM, rerank_metrics_callback: Callable[[RerankMetricsContainer], None] | None = None, ) -> Iterator[list[InferenceSection] | list[SectionRelevancePiece]]: + # Fast path for ordering-only: detect it by checking if evaluation_type is SKIP + if search_query.evaluation_type == LLMEvaluationType.SKIP: + logger.info( + "Fast path: Detected ordering-only mode, bypassing all post-processing" + ) + # Immediately yield the sections without any processing and an empty relevance list + yield retrieved_sections + yield cast(list[SectionRelevancePiece], []) + return + post_processing_tasks: list[FunctionCall] = [] if not retrieved_sections: @@ -434,10 +460,14 @@ def search_postprocessing( sections_yielded = True llm_filter_task_id = None - if search_query.evaluation_type in [ - LLMEvaluationType.BASIC, - LLMEvaluationType.UNSPECIFIED, - ]: + # Only add LLM filtering if not in SKIP mode and if LLM doc relevance is not disabled + if ( + search_query.evaluation_type not in [LLMEvaluationType.SKIP] + and not DISABLE_LLM_DOC_RELEVANCE + and search_query.evaluation_type + in [LLMEvaluationType.BASIC, LLMEvaluationType.UNSPECIFIED] + ): + logger.info("Adding LLM filtering task for document relevance evaluation") post_processing_tasks.append( FunctionCall( filter_sections, @@ -449,6 +479,10 @@ def search_postprocessing( ) ) llm_filter_task_id = post_processing_tasks[-1].result_id + elif search_query.evaluation_type == LLMEvaluationType.SKIP: + logger.info("Fast path: Skipping LLM filtering task for ordering-only mode") + elif DISABLE_LLM_DOC_RELEVANCE: + logger.info("Skipping LLM filtering task because LLM doc relevance is disabled") post_processing_results = ( run_functions_in_parallel(post_processing_tasks) diff --git a/backend/onyx/context/search/preprocessing/preprocessing.py b/backend/onyx/context/search/preprocessing/preprocessing.py index 814579e588..59c28db170 100644 --- a/backend/onyx/context/search/preprocessing/preprocessing.py +++ b/backend/onyx/context/search/preprocessing/preprocessing.py @@ -165,7 +165,18 @@ def retrieval_preprocessing( user_acl_filters = ( None if bypass_acl else build_access_filters_for_user(user, db_session) ) + user_file_ids = preset_filters.user_file_ids or [] + user_folder_ids = preset_filters.user_folder_ids or [] + if persona and persona.user_files: + user_file_ids = user_file_ids + [ + file.id + for file in persona.user_files + if file.id not in (preset_filters.user_file_ids or []) + ] + final_filters = IndexFilters( + user_file_ids=user_file_ids, + user_folder_ids=user_folder_ids, source_type=preset_filters.source_type or predicted_source_filters, document_set=preset_filters.document_set, time_cutoff=time_filter or predicted_time_cutoff, diff --git a/backend/onyx/db/chat.py b/backend/onyx/db/chat.py index 931d5eef5a..cdaa1adb59 100644 --- a/backend/onyx/db/chat.py +++ b/backend/onyx/db/chat.py @@ -26,6 +26,7 @@ from onyx.agents.agent_search.shared_graph_utils.models import ( from onyx.auth.schemas import UserRole from onyx.chat.models import DocumentRelevance from onyx.configs.chat_configs import HARD_DELETE_CHATS +from onyx.configs.constants import DocumentSource from onyx.configs.constants import MessageType from onyx.context.search.models import InferenceSection from onyx.context.search.models import RetrievalDocs @@ -44,9 +45,11 @@ from onyx.db.models import SearchDoc from onyx.db.models import SearchDoc as DBSearchDoc from onyx.db.models import ToolCall from onyx.db.models import User +from onyx.db.models import UserFile from onyx.db.persona import get_best_persona_id_for_user from onyx.db.pg_file_store import delete_lobj_by_name from onyx.file_store.models import FileDescriptor +from onyx.file_store.models import InMemoryChatFile from onyx.llm.override_models import LLMOverride from onyx.llm.override_models import PromptOverride from onyx.server.query_and_chat.models import ChatMessageDetail @@ -854,6 +857,87 @@ def get_db_search_doc_by_id(doc_id: int, db_session: Session) -> DBSearchDoc | N return search_doc +def create_search_doc_from_user_file( + db_user_file: UserFile, associated_chat_file: InMemoryChatFile, db_session: Session +) -> SearchDoc: + """Create a SearchDoc in the database from a UserFile and return it. + This ensures proper ID generation by SQLAlchemy and prevents duplicate key errors. + """ + blurb = "" + if associated_chat_file and associated_chat_file.content: + try: + # Try to decode as UTF-8, but handle errors gracefully + content_sample = associated_chat_file.content[:100] + # Remove null bytes which can cause SQL errors + content_sample = content_sample.replace(b"\x00", b"") + blurb = content_sample.decode("utf-8", errors="replace") + except Exception: + # If decoding fails completely, provide a generic description + blurb = f"[Binary file: {db_user_file.name}]" + + db_search_doc = SearchDoc( + document_id=db_user_file.document_id, + chunk_ind=0, # Default to 0 for user files + semantic_id=db_user_file.name, + link=db_user_file.link_url, + blurb=blurb, + source_type=DocumentSource.FILE, # Assuming internal source for user files + boost=0, # Default boost + hidden=False, # Default visibility + doc_metadata={}, # Empty metadata + score=0.0, # Default score of 0.0 instead of None + is_relevant=None, # No relevance initially + relevance_explanation=None, # No explanation initially + match_highlights=[], # No highlights initially + updated_at=db_user_file.created_at, # Use created_at as updated_at + primary_owners=[], # Empty list instead of None + secondary_owners=[], # Empty list instead of None + is_internet=False, # Not from internet + ) + + db_session.add(db_search_doc) + db_session.flush() # Get the ID but don't commit yet + + return db_search_doc + + +def translate_db_user_file_to_search_doc( + db_user_file: UserFile, associated_chat_file: InMemoryChatFile +) -> SearchDoc: + blurb = "" + if associated_chat_file and associated_chat_file.content: + try: + # Try to decode as UTF-8, but handle errors gracefully + content_sample = associated_chat_file.content[:100] + # Remove null bytes which can cause SQL errors + content_sample = content_sample.replace(b"\x00", b"") + blurb = content_sample.decode("utf-8", errors="replace") + except Exception: + # If decoding fails completely, provide a generic description + blurb = f"[Binary file: {db_user_file.name}]" + + return SearchDoc( + # Don't set ID - let SQLAlchemy auto-generate it + document_id=db_user_file.document_id, + chunk_ind=0, # Default to 0 for user files + semantic_id=db_user_file.name, + link=db_user_file.link_url, + blurb=blurb, + source_type=DocumentSource.FILE, # Assuming internal source for user files + boost=0, # Default boost + hidden=False, # Default visibility + doc_metadata={}, # Empty metadata + score=0.0, # Default score of 0.0 instead of None + is_relevant=None, # No relevance initially + relevance_explanation=None, # No explanation initially + match_highlights=[], # No highlights initially + updated_at=db_user_file.created_at, # Use created_at as updated_at + primary_owners=[], # Empty list instead of None + secondary_owners=[], # Empty list instead of None + is_internet=False, # Not from internet + ) + + def translate_db_search_doc_to_server_search_doc( db_search_doc: SearchDoc, remove_doc_content: bool = False, diff --git a/backend/onyx/db/connector_credential_pair.py b/backend/onyx/db/connector_credential_pair.py index 078a09a256..320383cb76 100644 --- a/backend/onyx/db/connector_credential_pair.py +++ b/backend/onyx/db/connector_credential_pair.py @@ -27,6 +27,7 @@ from onyx.db.models import IndexModelStatus from onyx.db.models import SearchSettings from onyx.db.models import User from onyx.db.models import User__UserGroup +from onyx.db.models import UserFile from onyx.db.models import UserGroup__ConnectorCredentialPair from onyx.db.models import UserRole from onyx.server.models import StatusResponse @@ -106,11 +107,13 @@ def get_connector_credential_pairs_for_user( eager_load_connector: bool = False, eager_load_credential: bool = False, eager_load_user: bool = False, + include_user_files: bool = False, ) -> list[ConnectorCredentialPair]: if eager_load_user: assert ( eager_load_credential ), "eager_load_credential must be True if eager_load_user is True" + stmt = select(ConnectorCredentialPair).distinct() if eager_load_connector: @@ -126,6 +129,9 @@ def get_connector_credential_pairs_for_user( if ids: stmt = stmt.where(ConnectorCredentialPair.id.in_(ids)) + if not include_user_files: + stmt = stmt.where(ConnectorCredentialPair.is_user_file != True) # noqa: E712 + return list(db_session.scalars(stmt).unique().all()) @@ -153,14 +159,16 @@ def get_connector_credential_pairs_for_user_parallel( def get_connector_credential_pairs( - db_session: Session, - ids: list[int] | None = None, + db_session: Session, ids: list[int] | None = None, include_user_files: bool = False ) -> list[ConnectorCredentialPair]: stmt = select(ConnectorCredentialPair).distinct() if ids: stmt = stmt.where(ConnectorCredentialPair.id.in_(ids)) + if not include_user_files: + stmt = stmt.where(ConnectorCredentialPair.is_user_file != True) # noqa: E712 + return list(db_session.scalars(stmt).all()) @@ -207,12 +215,15 @@ def get_connector_credential_pair_for_user( connector_id: int, credential_id: int, user: User | None, + include_user_files: bool = False, get_editable: bool = True, ) -> ConnectorCredentialPair | None: stmt = select(ConnectorCredentialPair) stmt = _add_user_filters(stmt, user, get_editable) stmt = stmt.where(ConnectorCredentialPair.connector_id == connector_id) stmt = stmt.where(ConnectorCredentialPair.credential_id == credential_id) + if not include_user_files: + stmt = stmt.where(ConnectorCredentialPair.is_user_file != True) # noqa: E712 result = db_session.execute(stmt) return result.scalar_one_or_none() @@ -321,6 +332,9 @@ def _update_connector_credential_pair( cc_pair.total_docs_indexed += net_docs if status is not None: cc_pair.status = status + if cc_pair.is_user_file: + cc_pair.status = ConnectorCredentialPairStatus.PAUSED + db_session.commit() @@ -446,6 +460,7 @@ def add_credential_to_connector( initial_status: ConnectorCredentialPairStatus = ConnectorCredentialPairStatus.ACTIVE, last_successful_index_time: datetime | None = None, seeding_flow: bool = False, + is_user_file: bool = False, ) -> StatusResponse: connector = fetch_connector_by_id(connector_id, db_session) @@ -511,6 +526,7 @@ def add_credential_to_connector( access_type=access_type, auto_sync_options=auto_sync_options, last_successful_index_time=last_successful_index_time, + is_user_file=is_user_file, ) db_session.add(association) db_session.flush() # make sure the association has an id @@ -587,8 +603,12 @@ def remove_credential_from_connector( def fetch_connector_credential_pairs( db_session: Session, + include_user_files: bool = False, ) -> list[ConnectorCredentialPair]: - return db_session.query(ConnectorCredentialPair).all() + stmt = select(ConnectorCredentialPair) + if not include_user_files: + stmt = stmt.where(ConnectorCredentialPair.is_user_file != True) # noqa: E712 + return list(db_session.scalars(stmt).unique().all()) def resync_cc_pair( @@ -634,3 +654,23 @@ def resync_cc_pair( ) db_session.commit() + + +def get_connector_credential_pairs_with_user_files( + db_session: Session, +) -> list[ConnectorCredentialPair]: + """ + Get all connector credential pairs that have associated user files. + + Args: + db_session: Database session + + Returns: + List of ConnectorCredentialPair objects that have user files + """ + return ( + db_session.query(ConnectorCredentialPair) + .join(UserFile, UserFile.cc_pair_id == ConnectorCredentialPair.id) + .distinct() + .all() + ) diff --git a/backend/onyx/db/document_set.py b/backend/onyx/db/document_set.py index 0f91cbc71b..025e48876c 100644 --- a/backend/onyx/db/document_set.py +++ b/backend/onyx/db/document_set.py @@ -605,7 +605,6 @@ def fetch_document_sets_for_document( result = fetch_document_sets_for_documents([document_id], db_session) if not result: return [] - return result[0][1] diff --git a/backend/onyx/db/models.py b/backend/onyx/db/models.py index 951e2b760e..31a6c10594 100644 --- a/backend/onyx/db/models.py +++ b/backend/onyx/db/models.py @@ -212,6 +212,10 @@ class User(SQLAlchemyBaseUserTableUUID, Base): back_populates="creator", primaryjoin="User.id == foreign(ConnectorCredentialPair.creator_id)", ) + folders: Mapped[list["UserFolder"]] = relationship( + "UserFolder", back_populates="user" + ) + files: Mapped[list["UserFile"]] = relationship("UserFile", back_populates="user") @validates("email") def validate_email(self, key: str, value: str) -> str: @@ -419,6 +423,7 @@ class ConnectorCredentialPair(Base): """ __tablename__ = "connector_credential_pair" + is_user_file: Mapped[bool] = mapped_column(Boolean, default=False) # NOTE: this `id` column has to use `Sequence` instead of `autoincrement=True` # due to some SQLAlchemy quirks + this not being a primary key column id: Mapped[int] = mapped_column( @@ -505,6 +510,10 @@ class ConnectorCredentialPair(Base): primaryjoin="foreign(ConnectorCredentialPair.creator_id) == remote(User.id)", ) + user_file: Mapped["UserFile"] = relationship( + "UserFile", back_populates="cc_pair", uselist=False + ) + background_errors: Mapped[list["BackgroundError"]] = relationship( "BackgroundError", back_populates="cc_pair", cascade="all, delete-orphan" ) @@ -1808,6 +1817,17 @@ class Persona(Base): secondary="persona__user_group", viewonly=True, ) + # Relationship to UserFile + user_files: Mapped[list["UserFile"]] = relationship( + "UserFile", + secondary="persona__user_file", + back_populates="assistants", + ) + user_folders: Mapped[list["UserFolder"]] = relationship( + "UserFolder", + secondary="persona__user_folder", + back_populates="assistants", + ) labels: Mapped[list["PersonaLabel"]] = relationship( "PersonaLabel", secondary=Persona__PersonaLabel.__table__, @@ -1824,6 +1844,24 @@ class Persona(Base): ) +class Persona__UserFolder(Base): + __tablename__ = "persona__user_folder" + + persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True) + user_folder_id: Mapped[int] = mapped_column( + ForeignKey("user_folder.id"), primary_key=True + ) + + +class Persona__UserFile(Base): + __tablename__ = "persona__user_file" + + persona_id: Mapped[int] = mapped_column(ForeignKey("persona.id"), primary_key=True) + user_file_id: Mapped[int] = mapped_column( + ForeignKey("user_file.id"), primary_key=True + ) + + class PersonaLabel(Base): __tablename__ = "persona_label" @@ -2346,6 +2384,64 @@ class InputPrompt__User(Base): disabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) +class UserFolder(Base): + __tablename__ = "user_folder" + + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=False) + name: Mapped[str] = mapped_column(nullable=False) + description: Mapped[str] = mapped_column(nullable=False) + created_at: Mapped[datetime.datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now() + ) + user: Mapped["User"] = relationship(back_populates="folders") + files: Mapped[list["UserFile"]] = relationship(back_populates="folder") + assistants: Mapped[list["Persona"]] = relationship( + "Persona", + secondary=Persona__UserFolder.__table__, + back_populates="user_folders", + ) + + +class UserDocument(str, Enum): + CHAT = "chat" + RECENT = "recent" + FILE = "file" + + +class UserFile(Base): + __tablename__ = "user_file" + + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + user_id: Mapped[UUID | None] = mapped_column(ForeignKey("user.id"), nullable=False) + assistants: Mapped[list["Persona"]] = relationship( + "Persona", + secondary=Persona__UserFile.__table__, + back_populates="user_files", + ) + folder_id: Mapped[int | None] = mapped_column( + ForeignKey("user_folder.id"), nullable=True + ) + + file_id: Mapped[str] = mapped_column(nullable=False) + document_id: Mapped[str] = mapped_column(nullable=False) + name: Mapped[str] = mapped_column(nullable=False) + created_at: Mapped[datetime.datetime] = mapped_column( + default=datetime.datetime.utcnow + ) + user: Mapped["User"] = relationship(back_populates="files") + folder: Mapped["UserFolder"] = relationship(back_populates="files") + token_count: Mapped[int | None] = mapped_column(Integer, nullable=True) + + cc_pair_id: Mapped[int | None] = mapped_column( + ForeignKey("connector_credential_pair.id"), nullable=True, unique=True + ) + cc_pair: Mapped["ConnectorCredentialPair"] = relationship( + "ConnectorCredentialPair", back_populates="user_file" + ) + link_url: Mapped[str | None] = mapped_column(String, nullable=True) + + """ Multi-tenancy related tables """ diff --git a/backend/onyx/db/persona.py b/backend/onyx/db/persona.py index ae37b3f50a..43dc9245e2 100644 --- a/backend/onyx/db/persona.py +++ b/backend/onyx/db/persona.py @@ -33,6 +33,8 @@ from onyx.db.models import StarterMessage from onyx.db.models import Tool from onyx.db.models import User from onyx.db.models import User__UserGroup +from onyx.db.models import UserFile +from onyx.db.models import UserFolder from onyx.db.models import UserGroup from onyx.db.notification import create_notification from onyx.server.features.persona.models import PersonaSharedNotificationData @@ -209,7 +211,6 @@ def create_update_persona( if not all_prompt_ids: raise ValueError("No prompt IDs provided") - is_default_persona: bool | None = create_persona_request.is_default_persona # Default persona validation if create_persona_request.is_default_persona: if not create_persona_request.is_public: @@ -221,7 +222,7 @@ def create_update_persona( user.role == UserRole.CURATOR or user.role == UserRole.GLOBAL_CURATOR ): - is_default_persona = None + pass elif user.role != UserRole.ADMIN: raise ValueError("Only admins can make a default persona") @@ -249,7 +250,9 @@ def create_update_persona( num_chunks=create_persona_request.num_chunks, llm_relevance_filter=create_persona_request.llm_relevance_filter, llm_filter_extraction=create_persona_request.llm_filter_extraction, - is_default_persona=is_default_persona, + is_default_persona=create_persona_request.is_default_persona, + user_file_ids=create_persona_request.user_file_ids, + user_folder_ids=create_persona_request.user_folder_ids, ) versioned_make_persona_private = fetch_versioned_implementation( @@ -344,6 +347,8 @@ def get_personas_for_user( selectinload(Persona.groups), selectinload(Persona.users), selectinload(Persona.labels), + selectinload(Persona.user_files), + selectinload(Persona.user_folders), ) results = db_session.execute(stmt).scalars().all() @@ -438,6 +443,8 @@ def upsert_persona( builtin_persona: bool = False, is_default_persona: bool | None = None, label_ids: list[int] | None = None, + user_file_ids: list[int] | None = None, + user_folder_ids: list[int] | None = None, chunks_above: int = CONTEXT_CHUNKS_ABOVE, chunks_below: int = CONTEXT_CHUNKS_BELOW, ) -> Persona: @@ -463,6 +470,7 @@ def upsert_persona( user=user, get_editable=True, ) + # Fetch and attach tools by IDs tools = None if tool_ids is not None: @@ -481,6 +489,26 @@ def upsert_persona( if not document_sets and document_set_ids: raise ValueError("document_sets not found") + # Fetch and attach user_files by IDs + user_files = None + if user_file_ids is not None: + user_files = ( + db_session.query(UserFile).filter(UserFile.id.in_(user_file_ids)).all() + ) + if not user_files and user_file_ids: + raise ValueError("user_files not found") + + # Fetch and attach user_folders by IDs + user_folders = None + if user_folder_ids is not None: + user_folders = ( + db_session.query(UserFolder) + .filter(UserFolder.id.in_(user_folder_ids)) + .all() + ) + if not user_folders and user_folder_ids: + raise ValueError("user_folders not found") + # Fetch and attach prompts by IDs prompts = None if prompt_ids is not None: @@ -549,6 +577,14 @@ def upsert_persona( if tools is not None: existing_persona.tools = tools or [] + if user_file_ids is not None: + existing_persona.user_files.clear() + existing_persona.user_files = user_files or [] + + if user_folder_ids is not None: + existing_persona.user_folders.clear() + existing_persona.user_folders = user_folders or [] + # We should only update display priority if it is not already set if existing_persona.display_priority is None: existing_persona.display_priority = display_priority @@ -590,6 +626,8 @@ def upsert_persona( is_default_persona=is_default_persona if is_default_persona is not None else False, + user_folders=user_folders or [], + user_files=user_files or [], labels=labels or [], ) db_session.add(new_persona) diff --git a/backend/onyx/db/user_documents.py b/backend/onyx/db/user_documents.py new file mode 100644 index 0000000000..9d98bd3840 --- /dev/null +++ b/backend/onyx/db/user_documents.py @@ -0,0 +1,466 @@ +import datetime +import time +from typing import List +from uuid import UUID + +from fastapi import UploadFile +from sqlalchemy import and_ +from sqlalchemy import func +from sqlalchemy.orm import joinedload +from sqlalchemy.orm import Session + +from onyx.auth.users import get_current_tenant_id +from onyx.configs.constants import DocumentSource +from onyx.connectors.models import InputType +from onyx.db.connector import create_connector +from onyx.db.connector_credential_pair import add_credential_to_connector +from onyx.db.credentials import create_credential +from onyx.db.enums import AccessType +from onyx.db.models import ConnectorCredentialPair +from onyx.db.models import Document +from onyx.db.models import DocumentByConnectorCredentialPair +from onyx.db.models import Persona +from onyx.db.models import Persona__UserFile +from onyx.db.models import User +from onyx.db.models import UserFile +from onyx.db.models import UserFolder +from onyx.server.documents.connector import trigger_indexing_for_cc_pair +from onyx.server.documents.connector import upload_files +from onyx.server.documents.models import ConnectorBase +from onyx.server.documents.models import CredentialBase +from onyx.server.models import StatusResponse + +USER_FILE_CONSTANT = "USER_FILE_CONNECTOR" + + +def create_user_files( + files: List[UploadFile], + folder_id: int | None, + user: User | None, + db_session: Session, + link_url: str | None = None, +) -> list[UserFile]: + upload_response = upload_files(files, db_session) + user_files = [] + + for file_path, file in zip(upload_response.file_paths, files): + new_file = UserFile( + user_id=user.id if user else None, + folder_id=folder_id, + file_id=file_path, + document_id="USER_FILE_CONNECTOR__" + file_path, + name=file.filename, + token_count=None, + link_url=link_url, + ) + db_session.add(new_file) + user_files.append(new_file) + db_session.commit() + return user_files + + +def create_user_file_with_indexing( + files: List[UploadFile], + folder_id: int | None, + user: User, + db_session: Session, + trigger_index: bool = True, +) -> list[UserFile]: + """Create user files and trigger immediate indexing""" + # Create the user files first + user_files = create_user_files(files, folder_id, user, db_session) + + # Create connector and credential for each file + for user_file in user_files: + cc_pair = create_file_connector_credential(user_file, user, db_session) + user_file.cc_pair_id = cc_pair.data + + db_session.commit() + + # Trigger immediate high-priority indexing for all created files + if trigger_index: + tenant_id = get_current_tenant_id() + for user_file in user_files: + # Use the existing trigger_indexing_for_cc_pair function but with highest priority + if user_file.cc_pair_id: + trigger_indexing_for_cc_pair( + [], + user_file.cc_pair.connector_id, + False, + tenant_id, + db_session, + is_user_file=True, + ) + + return user_files + + +def create_file_connector_credential( + user_file: UserFile, user: User, db_session: Session +) -> StatusResponse: + """Create connector and credential for a user file""" + connector_base = ConnectorBase( + name=f"UserFile-{user_file.file_id}-{int(time.time())}", + source=DocumentSource.FILE, + input_type=InputType.LOAD_STATE, + connector_specific_config={ + "file_locations": [user_file.file_id], + }, + refresh_freq=None, + prune_freq=None, + indexing_start=None, + ) + + connector = create_connector(db_session=db_session, connector_data=connector_base) + + credential_info = CredentialBase( + credential_json={}, + admin_public=True, + source=DocumentSource.FILE, + curator_public=True, + groups=[], + name=f"UserFileCredential-{user_file.file_id}-{int(time.time())}", + is_user_file=True, + ) + + credential = create_credential(credential_info, user, db_session) + + return add_credential_to_connector( + db_session=db_session, + user=user, + connector_id=connector.id, + credential_id=credential.id, + cc_pair_name=f"UserFileCCPair-{user_file.file_id}-{int(time.time())}", + access_type=AccessType.PRIVATE, + auto_sync_options=None, + groups=[], + is_user_file=True, + ) + + +def get_user_file_indexing_status( + file_ids: list[int], db_session: Session +) -> dict[int, bool]: + """Get indexing status for multiple user files""" + status_dict = {} + + # Query UserFile with cc_pair join + files_with_pairs = ( + db_session.query(UserFile) + .filter(UserFile.id.in_(file_ids)) + .options(joinedload(UserFile.cc_pair)) + .all() + ) + + for file in files_with_pairs: + if file.cc_pair and file.cc_pair.last_successful_index_time: + status_dict[file.id] = True + else: + status_dict[file.id] = False + + return status_dict + + +def calculate_user_files_token_count( + file_ids: list[int], folder_ids: list[int], db_session: Session +) -> int: + """Calculate total token count for specified files and folders""" + total_tokens = 0 + + # Get tokens from individual files + if file_ids: + file_tokens = ( + db_session.query(func.sum(UserFile.token_count)) + .filter(UserFile.id.in_(file_ids)) + .scalar() + or 0 + ) + total_tokens += file_tokens + + # Get tokens from folders + if folder_ids: + folder_files_tokens = ( + db_session.query(func.sum(UserFile.token_count)) + .filter(UserFile.folder_id.in_(folder_ids)) + .scalar() + or 0 + ) + total_tokens += folder_files_tokens + + return total_tokens + + +def load_all_user_files( + file_ids: list[int], folder_ids: list[int], db_session: Session +) -> list[UserFile]: + """Load all user files from specified file IDs and folder IDs""" + result = [] + + # Get individual files + if file_ids: + files = db_session.query(UserFile).filter(UserFile.id.in_(file_ids)).all() + result.extend(files) + + # Get files from folders + if folder_ids: + folder_files = ( + db_session.query(UserFile).filter(UserFile.folder_id.in_(folder_ids)).all() + ) + result.extend(folder_files) + + return result + + +def get_user_files_from_folder(folder_id: int, db_session: Session) -> list[UserFile]: + return db_session.query(UserFile).filter(UserFile.folder_id == folder_id).all() + + +def share_file_with_assistant( + file_id: int, assistant_id: int, db_session: Session +) -> None: + file = db_session.query(UserFile).filter(UserFile.id == file_id).first() + assistant = db_session.query(Persona).filter(Persona.id == assistant_id).first() + + if file and assistant: + file.assistants.append(assistant) + db_session.commit() + + +def unshare_file_with_assistant( + file_id: int, assistant_id: int, db_session: Session +) -> None: + db_session.query(Persona__UserFile).filter( + and_( + Persona__UserFile.user_file_id == file_id, + Persona__UserFile.persona_id == assistant_id, + ) + ).delete() + db_session.commit() + + +def share_folder_with_assistant( + folder_id: int, assistant_id: int, db_session: Session +) -> None: + folder = db_session.query(UserFolder).filter(UserFolder.id == folder_id).first() + assistant = db_session.query(Persona).filter(Persona.id == assistant_id).first() + + if folder and assistant: + for file in folder.files: + share_file_with_assistant(file.id, assistant_id, db_session) + + +def unshare_folder_with_assistant( + folder_id: int, assistant_id: int, db_session: Session +) -> None: + folder = db_session.query(UserFolder).filter(UserFolder.id == folder_id).first() + + if folder: + for file in folder.files: + unshare_file_with_assistant(file.id, assistant_id, db_session) + + +def fetch_user_files_for_documents( + document_ids: list[str], + db_session: Session, +) -> dict[str, int | None]: + """ + Fetches user file IDs for the given document IDs. + + Args: + document_ids: List of document IDs to fetch user files for + db_session: Database session + + Returns: + Dictionary mapping document IDs to user file IDs (or None if no user file exists) + """ + # First, get the document to cc_pair mapping + doc_cc_pairs = ( + db_session.query(Document.id, ConnectorCredentialPair.id) + .join( + DocumentByConnectorCredentialPair, + Document.id == DocumentByConnectorCredentialPair.id, + ) + .join( + ConnectorCredentialPair, + and_( + DocumentByConnectorCredentialPair.connector_id + == ConnectorCredentialPair.connector_id, + DocumentByConnectorCredentialPair.credential_id + == ConnectorCredentialPair.credential_id, + ), + ) + .filter(Document.id.in_(document_ids)) + .all() + ) + + # Get cc_pair to user_file mapping + cc_pair_to_user_file = ( + db_session.query(ConnectorCredentialPair.id, UserFile.id) + .join(UserFile, UserFile.cc_pair_id == ConnectorCredentialPair.id) + .filter( + ConnectorCredentialPair.id.in_( + [cc_pair_id for _, cc_pair_id in doc_cc_pairs] + ) + ) + .all() + ) + + # Create mapping from cc_pair_id to user_file_id + cc_pair_to_user_file_dict = { + cc_pair_id: user_file_id for cc_pair_id, user_file_id in cc_pair_to_user_file + } + + # Create the final result mapping document_id to user_file_id + result: dict[str, int | None] = {doc_id: None for doc_id in document_ids} + for doc_id, cc_pair_id in doc_cc_pairs: + if cc_pair_id in cc_pair_to_user_file_dict: + result[doc_id] = cc_pair_to_user_file_dict[cc_pair_id] + + return result + + +def fetch_user_folders_for_documents( + document_ids: list[str], + db_session: Session, +) -> dict[str, int | None]: + """ + Fetches user folder IDs for the given document IDs. + + For each document, returns the folder ID that the document's associated user file belongs to. + + Args: + document_ids: List of document IDs to fetch user folders for + db_session: Database session + + Returns: + Dictionary mapping document IDs to user folder IDs (or None if no user folder exists) + """ + # First, get the document to cc_pair mapping + doc_cc_pairs = ( + db_session.query(Document.id, ConnectorCredentialPair.id) + .join( + DocumentByConnectorCredentialPair, + Document.id == DocumentByConnectorCredentialPair.id, + ) + .join( + ConnectorCredentialPair, + and_( + DocumentByConnectorCredentialPair.connector_id + == ConnectorCredentialPair.connector_id, + DocumentByConnectorCredentialPair.credential_id + == ConnectorCredentialPair.credential_id, + ), + ) + .filter(Document.id.in_(document_ids)) + .all() + ) + + # Get cc_pair to user_file and folder mapping + cc_pair_to_folder = ( + db_session.query(ConnectorCredentialPair.id, UserFile.folder_id) + .join(UserFile, UserFile.cc_pair_id == ConnectorCredentialPair.id) + .filter( + ConnectorCredentialPair.id.in_( + [cc_pair_id for _, cc_pair_id in doc_cc_pairs] + ) + ) + .all() + ) + + # Create mapping from cc_pair_id to folder_id + cc_pair_to_folder_dict = { + cc_pair_id: folder_id for cc_pair_id, folder_id in cc_pair_to_folder + } + + # Create the final result mapping document_id to folder_id + result: dict[str, int | None] = {doc_id: None for doc_id in document_ids} + for doc_id, cc_pair_id in doc_cc_pairs: + if cc_pair_id in cc_pair_to_folder_dict: + result[doc_id] = cc_pair_to_folder_dict[cc_pair_id] + + return result + + +def get_user_file_from_id(db_session: Session, user_file_id: int) -> UserFile | None: + return db_session.query(UserFile).filter(UserFile.id == user_file_id).first() + + +# def fetch_user_files_for_documents( +# # document_ids: list[str], +# # db_session: Session, +# # ) -> dict[str, int | None]: +# # # Query UserFile objects for the given document_ids +# # user_files = ( +# # db_session.query(UserFile).filter(UserFile.document_id.in_(document_ids)).all() +# # ) + +# # # Create a dictionary mapping document_ids to UserFile objects +# # result: dict[str, int | None] = {doc_id: None for doc_id in document_ids} +# # for user_file in user_files: +# # result[user_file.document_id] = user_file.id + +# # return result + + +def upsert_user_folder( + db_session: Session, + id: int | None = None, + user_id: UUID | None = None, + name: str | None = None, + description: str | None = None, + created_at: datetime.datetime | None = None, + user: User | None = None, + files: list[UserFile] | None = None, + assistants: list[Persona] | None = None, +) -> UserFolder: + if id is not None: + user_folder = db_session.query(UserFolder).filter_by(id=id).first() + else: + user_folder = ( + db_session.query(UserFolder).filter_by(name=name, user_id=user_id).first() + ) + + if user_folder: + if user_id is not None: + user_folder.user_id = user_id + if name is not None: + user_folder.name = name + if description is not None: + user_folder.description = description + if created_at is not None: + user_folder.created_at = created_at + if user is not None: + user_folder.user = user + if files is not None: + user_folder.files = files + if assistants is not None: + user_folder.assistants = assistants + else: + user_folder = UserFolder( + id=id, + user_id=user_id, + name=name, + description=description, + created_at=created_at or datetime.datetime.utcnow(), + user=user, + files=files or [], + assistants=assistants or [], + ) + db_session.add(user_folder) + + db_session.flush() + return user_folder + + +def get_user_folder_by_name(db_session: Session, name: str) -> UserFolder | None: + return db_session.query(UserFolder).filter(UserFolder.name == name).first() + + +def update_user_file_token_count__no_commit( + user_file_id_to_token_count: dict[int, int | None], + db_session: Session, +) -> None: + for user_file_id, token_count in user_file_id_to_token_count.items(): + db_session.query(UserFile).filter(UserFile.id == user_file_id).update( + {UserFile.token_count: token_count} + ) diff --git a/backend/onyx/document_index/interfaces.py b/backend/onyx/document_index/interfaces.py index e34cbc9eb7..66912a971c 100644 --- a/backend/onyx/document_index/interfaces.py +++ b/backend/onyx/document_index/interfaces.py @@ -104,6 +104,16 @@ class VespaDocumentFields: aggregated_chunk_boost_factor: float | None = None +@dataclass +class VespaDocumentUserFields: + """ + Fields that are specific to the user who is indexing the document. + """ + + user_file_id: str | None = None + user_folder_id: str | None = None + + @dataclass class UpdateRequest: """ @@ -258,7 +268,8 @@ class Updatable(abc.ABC): *, tenant_id: str, chunk_count: int | None, - fields: VespaDocumentFields, + fields: VespaDocumentFields | None, + user_fields: VespaDocumentUserFields | None, ) -> int: """ Updates all chunks for a document with the specified fields. diff --git a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd index 4b7c7c1e01..d3fcf73a76 100644 --- a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd +++ b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd @@ -120,12 +120,22 @@ schema DANSWER_CHUNK_NAME { indexing: summary | attribute rank: filter attribute: fast-search - } + } field document_sets type weightedset { indexing: summary | attribute rank: filter attribute: fast-search } + field user_file type int { + indexing: summary | attribute + rank: filter + attribute: fast-search + } + field user_folder type int { + indexing: summary | attribute + rank: filter + attribute: fast-search + } } # If using different tokenization settings, the fieldset has to be removed, and the field must diff --git a/backend/onyx/document_index/vespa/index.py b/backend/onyx/document_index/vespa/index.py index b60eaa322f..3f6443de2e 100644 --- a/backend/onyx/document_index/vespa/index.py +++ b/backend/onyx/document_index/vespa/index.py @@ -36,6 +36,7 @@ from onyx.document_index.interfaces import MinimalDocumentIndexingInfo from onyx.document_index.interfaces import UpdateRequest from onyx.document_index.interfaces import VespaChunkRequest from onyx.document_index.interfaces import VespaDocumentFields +from onyx.document_index.interfaces import VespaDocumentUserFields from onyx.document_index.vespa.chunk_retrieval import batch_search_api_retrieval from onyx.document_index.vespa.chunk_retrieval import ( parallel_visit_api_retrieval, @@ -70,6 +71,8 @@ from onyx.document_index.vespa_constants import NUM_THREADS from onyx.document_index.vespa_constants import SEARCH_THREAD_NUMBER_PAT from onyx.document_index.vespa_constants import TENANT_ID_PAT from onyx.document_index.vespa_constants import TENANT_ID_REPLACEMENT +from onyx.document_index.vespa_constants import USER_FILE +from onyx.document_index.vespa_constants import USER_FOLDER from onyx.document_index.vespa_constants import VESPA_APPLICATION_ENDPOINT from onyx.document_index.vespa_constants import VESPA_DIM_REPLACEMENT_PAT from onyx.document_index.vespa_constants import VESPA_TIMEOUT @@ -592,7 +595,8 @@ class VespaIndex(DocumentIndex): self, doc_chunk_id: UUID, index_name: str, - fields: VespaDocumentFields, + fields: VespaDocumentFields | None, + user_fields: VespaDocumentUserFields | None, doc_id: str, http_client: httpx.Client, ) -> None: @@ -603,21 +607,31 @@ class VespaIndex(DocumentIndex): update_dict: dict[str, dict] = {"fields": {}} - if fields.boost is not None: - update_dict["fields"][BOOST] = {"assign": fields.boost} + if fields is not None: + if fields.boost is not None: + update_dict["fields"][BOOST] = {"assign": fields.boost} - if fields.document_sets is not None: - update_dict["fields"][DOCUMENT_SETS] = { - "assign": {document_set: 1 for document_set in fields.document_sets} - } + if fields.document_sets is not None: + update_dict["fields"][DOCUMENT_SETS] = { + "assign": {document_set: 1 for document_set in fields.document_sets} + } - if fields.access is not None: - update_dict["fields"][ACCESS_CONTROL_LIST] = { - "assign": {acl_entry: 1 for acl_entry in fields.access.to_acl()} - } + if fields.access is not None: + update_dict["fields"][ACCESS_CONTROL_LIST] = { + "assign": {acl_entry: 1 for acl_entry in fields.access.to_acl()} + } - if fields.hidden is not None: - update_dict["fields"][HIDDEN] = {"assign": fields.hidden} + if fields.hidden is not None: + update_dict["fields"][HIDDEN] = {"assign": fields.hidden} + + if user_fields is not None: + if user_fields.user_file_id is not None: + update_dict["fields"][USER_FILE] = {"assign": user_fields.user_file_id} + + if user_fields.user_folder_id is not None: + update_dict["fields"][USER_FOLDER] = { + "assign": user_fields.user_folder_id + } if not update_dict["fields"]: logger.error("Update request received but nothing to update.") @@ -649,7 +663,8 @@ class VespaIndex(DocumentIndex): *, chunk_count: int | None, tenant_id: str, - fields: VespaDocumentFields, + fields: VespaDocumentFields | None, + user_fields: VespaDocumentUserFields | None, ) -> int: """Note: if the document id does not exist, the update will be a no-op and the function will complete with no errors or exceptions. @@ -682,7 +697,12 @@ class VespaIndex(DocumentIndex): for doc_chunk_id in doc_chunk_ids: self._update_single_chunk( - doc_chunk_id, index_name, fields, doc_id, httpx_client + doc_chunk_id, + index_name, + fields, + user_fields, + doc_id, + httpx_client, ) return doc_chunk_count @@ -723,6 +743,7 @@ class VespaIndex(DocumentIndex): tenant_id=tenant_id, large_chunks_enabled=large_chunks_enabled, ) + for doc_chunk_ids_batch in batch_generator( chunks_to_delete, BATCH_SIZE ): diff --git a/backend/onyx/document_index/vespa/indexing_utils.py b/backend/onyx/document_index/vespa/indexing_utils.py index 9145ce63c0..2772d45a21 100644 --- a/backend/onyx/document_index/vespa/indexing_utils.py +++ b/backend/onyx/document_index/vespa/indexing_utils.py @@ -51,6 +51,8 @@ from onyx.document_index.vespa_constants import SOURCE_TYPE from onyx.document_index.vespa_constants import TENANT_ID from onyx.document_index.vespa_constants import TITLE from onyx.document_index.vespa_constants import TITLE_EMBEDDING +from onyx.document_index.vespa_constants import USER_FILE +from onyx.document_index.vespa_constants import USER_FOLDER from onyx.indexing.models import DocMetadataAwareIndexChunk from onyx.utils.logger import setup_logger @@ -205,6 +207,8 @@ def _index_vespa_chunk( ACCESS_CONTROL_LIST: {acl_entry: 1 for acl_entry in chunk.access.to_acl()}, DOCUMENT_SETS: {document_set: 1 for document_set in chunk.document_sets}, IMAGE_FILE_NAME: chunk.image_file_name, + USER_FILE: chunk.user_file if chunk.user_file is not None else None, + USER_FOLDER: chunk.user_folder if chunk.user_folder is not None else None, BOOST: chunk.boost, AGGREGATED_CHUNK_BOOST_FACTOR: chunk.aggregated_chunk_boost_factor, } diff --git a/backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py b/backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py index cc11a42d38..afd71a4e50 100644 --- a/backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py +++ b/backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py @@ -14,6 +14,8 @@ from onyx.document_index.vespa_constants import HIDDEN from onyx.document_index.vespa_constants import METADATA_LIST from onyx.document_index.vespa_constants import SOURCE_TYPE from onyx.document_index.vespa_constants import TENANT_ID +from onyx.document_index.vespa_constants import USER_FILE +from onyx.document_index.vespa_constants import USER_FOLDER from onyx.utils.logger import setup_logger from shared_configs.configs import MULTI_TENANT @@ -27,14 +29,26 @@ def build_vespa_filters( remove_trailing_and: bool = False, # Set to True when using as a complete Vespa query ) -> str: def _build_or_filters(key: str, vals: list[str] | None) -> str: - if vals is None: + """For string-based 'contains' filters, e.g. WSET fields or array fields.""" + if not key or not vals: + return "" + eq_elems = [f'{key} contains "{val}"' for val in vals if val] + if not eq_elems: + return "" + or_clause = " or ".join(eq_elems) + return f"({or_clause}) and " + + def _build_int_or_filters(key: str, vals: list[int] | None) -> str: + """ + For an integer field filter. + If vals is not None, we want *only* docs whose key matches one of vals. + """ + # If `vals` is None => skip the filter entirely + if vals is None or not vals: return "" - valid_vals = [val for val in vals if val] - if not key or not valid_vals: - return "" - - eq_elems = [f'{key} contains "{elem}"' for elem in valid_vals] + # Otherwise build the OR filter + eq_elems = [f"{key} = {val}" for val in vals] or_clause = " or ".join(eq_elems) result = f"({or_clause}) and " @@ -42,53 +56,59 @@ def build_vespa_filters( def _build_time_filter( cutoff: datetime | None, - # Slightly over 3 Months, approximately 1 fiscal quarter untimed_doc_cutoff: timedelta = timedelta(days=92), ) -> str: if not cutoff: return "" - - # For Documents that don't have an updated at, filter them out for queries asking for - # very recent documents (3 months) default. Documents that don't have an updated at - # time are assigned 3 months for time decay value include_untimed = datetime.now(timezone.utc) - untimed_doc_cutoff > cutoff cutoff_secs = int(cutoff.timestamp()) if include_untimed: - # Documents without updated_at are assigned -1 as their date return f"!({DOC_UPDATED_AT} < {cutoff_secs}) and " - return f"({DOC_UPDATED_AT} >= {cutoff_secs}) and " + # Start building the filter string filter_str = f"!({HIDDEN}=true) and " if not include_hidden else "" - # If running in multi-tenant mode, we may want to filter by tenant_id + # If running in multi-tenant mode if filters.tenant_id and MULTI_TENANT: filter_str += f'({TENANT_ID} contains "{filters.tenant_id}") and ' - # CAREFUL touching this one, currently there is no second ACL double-check post retrieval + # ACL filters if filters.access_control_list is not None: filter_str += _build_or_filters( ACCESS_CONTROL_LIST, filters.access_control_list ) + # Source type filters source_strs = ( [s.value for s in filters.source_type] if filters.source_type else None ) filter_str += _build_or_filters(SOURCE_TYPE, source_strs) + # Tag filters tag_attributes = None - tags = filters.tags - if tags: - tag_attributes = [tag.tag_key + INDEX_SEPARATOR + tag.tag_value for tag in tags] + if filters.tags: + # build e.g. "tag_key|tag_value" + tag_attributes = [ + f"{tag.tag_key}{INDEX_SEPARATOR}{tag.tag_value}" for tag in filters.tags + ] filter_str += _build_or_filters(METADATA_LIST, tag_attributes) + # Document sets filter_str += _build_or_filters(DOCUMENT_SETS, filters.document_set) + # New: user_file_ids as integer filters + filter_str += _build_int_or_filters(USER_FILE, filters.user_file_ids) + + filter_str += _build_int_or_filters(USER_FOLDER, filters.user_folder_ids) + + # Time filter filter_str += _build_time_filter(filters.time_cutoff) + # Trim trailing " and " if remove_trailing_and and filter_str.endswith(" and "): - filter_str = filter_str[:-5] # We remove the trailing " and " + filter_str = filter_str[:-5] return filter_str diff --git a/backend/onyx/document_index/vespa_constants.py b/backend/onyx/document_index/vespa_constants.py index 66a7fd99df..2b8f72c357 100644 --- a/backend/onyx/document_index/vespa_constants.py +++ b/backend/onyx/document_index/vespa_constants.py @@ -67,6 +67,8 @@ EMBEDDINGS = "embeddings" TITLE_EMBEDDING = "title_embedding" ACCESS_CONTROL_LIST = "access_control_list" DOCUMENT_SETS = "document_sets" +USER_FILE = "user_file" +USER_FOLDER = "user_folder" LARGE_CHUNK_REFERENCE_IDS = "large_chunk_reference_ids" METADATA = "metadata" METADATA_LIST = "metadata_list" diff --git a/backend/onyx/file_processing/unstructured.py b/backend/onyx/file_processing/unstructured.py index 3827c68914..7cbabadfd6 100644 --- a/backend/onyx/file_processing/unstructured.py +++ b/backend/onyx/file_processing/unstructured.py @@ -37,6 +37,7 @@ def delete_unstructured_api_key() -> None: def _sdk_partition_request( file: IO[Any], file_name: str, **kwargs: Any ) -> operations.PartitionRequest: + file.seek(0, 0) try: request = operations.PartitionRequest( partition_parameters=shared.PartitionParameters( diff --git a/backend/onyx/file_store/file_store.py b/backend/onyx/file_store/file_store.py index b042c86800..b7057546c0 100644 --- a/backend/onyx/file_store/file_store.py +++ b/backend/onyx/file_store/file_store.py @@ -31,6 +31,7 @@ class FileStore(ABC): file_origin: FileOrigin, file_type: str, file_metadata: dict | None = None, + commit: bool = True, ) -> None: """ Save a file to the blob store @@ -42,6 +43,8 @@ class FileStore(ABC): - display_name: Display name of the file - file_origin: Origin of the file - file_type: Type of the file + - file_metadata: Additional metadata for the file + - commit: Whether to commit the transaction after saving the file """ raise NotImplementedError @@ -90,6 +93,7 @@ class PostgresBackedFileStore(FileStore): file_origin: FileOrigin, file_type: str, file_metadata: dict | None = None, + commit: bool = True, ) -> None: try: # The large objects in postgres are saved as special objects can be listed with @@ -104,7 +108,8 @@ class PostgresBackedFileStore(FileStore): db_session=self.db_session, file_metadata=file_metadata, ) - self.db_session.commit() + if commit: + self.db_session.commit() except Exception: self.db_session.rollback() raise diff --git a/backend/onyx/file_store/models.py b/backend/onyx/file_store/models.py index 5bf964287e..86e48d7ed4 100644 --- a/backend/onyx/file_store/models.py +++ b/backend/onyx/file_store/models.py @@ -14,6 +14,7 @@ class ChatFileType(str, Enum): # Plain text only contain the text PLAIN_TEXT = "plain_text" CSV = "csv" + USER_KNOWLEDGE = "user_knowledge" class FileDescriptor(TypedDict): diff --git a/backend/onyx/file_store/utils.py b/backend/onyx/file_store/utils.py index 91198790a1..87095d62b1 100644 --- a/backend/onyx/file_store/utils.py +++ b/backend/onyx/file_store/utils.py @@ -10,12 +10,62 @@ from sqlalchemy.orm import Session from onyx.configs.constants import FileOrigin from onyx.db.engine import get_session_with_current_tenant from onyx.db.models import ChatMessage +from onyx.db.models import UserFile +from onyx.db.models import UserFolder from onyx.file_store.file_store import get_default_file_store +from onyx.file_store.models import ChatFileType from onyx.file_store.models import FileDescriptor from onyx.file_store.models import InMemoryChatFile from onyx.utils.b64 import get_image_type +from onyx.utils.logger import setup_logger from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel +logger = setup_logger() + + +def user_file_id_to_plaintext_file_name(user_file_id: int) -> str: + """Generate a consistent file name for storing plaintext content of a user file.""" + return f"plaintext_{user_file_id}" + + +def store_user_file_plaintext( + user_file_id: int, plaintext_content: str, db_session: Session +) -> bool: + """ + Store plaintext content for a user file in the file store. + + Args: + user_file_id: The ID of the user file + plaintext_content: The plaintext content to store + db_session: The database session + + Returns: + bool: True if storage was successful, False otherwise + """ + # Skip empty content + if not plaintext_content: + return False + + # Get plaintext file name + plaintext_file_name = user_file_id_to_plaintext_file_name(user_file_id) + + # Store the plaintext in the file store + file_store = get_default_file_store(db_session) + file_content = BytesIO(plaintext_content.encode("utf-8")) + try: + file_store.save_file( + file_name=plaintext_file_name, + content=file_content, + display_name=f"Plaintext for user file {user_file_id}", + file_origin=FileOrigin.PLAINTEXT_CACHE, + file_type="text/plain", + commit=False, + ) + return True + except Exception as e: + logger.warning(f"Failed to store plaintext for user file {user_file_id}: {e}") + return False + def load_chat_file( file_descriptor: FileDescriptor, db_session: Session @@ -53,6 +103,83 @@ def load_all_chat_files( return files +def load_user_folder(folder_id: int, db_session: Session) -> list[InMemoryChatFile]: + user_files = ( + db_session.query(UserFile).filter(UserFile.folder_id == folder_id).all() + ) + return [load_user_file(file.id, db_session) for file in user_files] + + +def load_user_file(file_id: int, db_session: Session) -> InMemoryChatFile: + user_file = db_session.query(UserFile).filter(UserFile.id == file_id).first() + if not user_file: + raise ValueError(f"User file with id {file_id} not found") + + # Try to load plaintext version first + file_store = get_default_file_store(db_session) + plaintext_file_name = user_file_id_to_plaintext_file_name(file_id) + + try: + file_io = file_store.read_file(plaintext_file_name, mode="b") + return InMemoryChatFile( + file_id=str(user_file.file_id), + content=file_io.read(), + file_type=ChatFileType.USER_KNOWLEDGE, + filename=user_file.name, + ) + except Exception as e: + logger.warning( + f"Failed to load plaintext file {plaintext_file_name}, defaulting to original file: {e}" + ) + # Fall back to original file if plaintext not available + file_io = file_store.read_file(user_file.file_id, mode="b") + return InMemoryChatFile( + file_id=str(user_file.file_id), + content=file_io.read(), + file_type=ChatFileType.USER_KNOWLEDGE, + filename=user_file.name, + ) + + +def load_all_user_files( + user_file_ids: list[int], + user_folder_ids: list[int], + db_session: Session, +) -> list[InMemoryChatFile]: + return cast( + list[InMemoryChatFile], + run_functions_tuples_in_parallel( + [(load_user_file, (file_id, db_session)) for file_id in user_file_ids] + ) + + [ + file + for folder_id in user_folder_ids + for file in load_user_folder(folder_id, db_session) + ], + ) + + +def load_all_user_file_files( + user_file_ids: list[int], + user_folder_ids: list[int], + db_session: Session, +) -> list[UserFile]: + user_files: list[UserFile] = [] + for user_file_id in user_file_ids: + user_file = ( + db_session.query(UserFile).filter(UserFile.id == user_file_id).first() + ) + if user_file is not None: + user_files.append(user_file) + for user_folder_id in user_folder_ids: + user_files.extend( + db_session.query(UserFile) + .filter(UserFile.folder_id == user_folder_id) + .all() + ) + return user_files + + def save_file_from_url(url: str) -> str: """NOTE: using multiple sessions here, since this is often called using multithreading. In practice, sharing a session has resulted in @@ -71,6 +198,7 @@ def save_file_from_url(url: str) -> str: display_name="GeneratedImage", file_origin=FileOrigin.CHAT_IMAGE_GEN, file_type="image/png;base64", + commit=True, ) return unique_id @@ -85,6 +213,7 @@ def save_file_from_base64(base64_string: str) -> str: display_name="GeneratedImage", file_origin=FileOrigin.CHAT_IMAGE_GEN, file_type=get_image_type(base64_string), + commit=True, ) return unique_id @@ -128,3 +257,39 @@ def save_files(urls: list[str], base64_files: list[str]) -> list[str]: ] return run_functions_tuples_in_parallel(funcs) + + +def load_all_persona_files_for_chat( + persona_id: int, db_session: Session +) -> tuple[list[InMemoryChatFile], list[int]]: + from onyx.db.models import Persona + from sqlalchemy.orm import joinedload + + persona = ( + db_session.query(Persona) + .filter(Persona.id == persona_id) + .options( + joinedload(Persona.user_files), + joinedload(Persona.user_folders).joinedload(UserFolder.files), + ) + .one() + ) + + persona_file_calls = [ + (load_user_file, (user_file.id, db_session)) for user_file in persona.user_files + ] + persona_loaded_files = run_functions_tuples_in_parallel(persona_file_calls) + + persona_folder_files = [] + persona_folder_file_ids = [] + for user_folder in persona.user_folders: + folder_files = load_user_folder(user_folder.id, db_session) + persona_folder_files.extend(folder_files) + persona_folder_file_ids.extend([file.id for file in user_folder.files]) + + persona_files = list(persona_loaded_files) + persona_folder_files + persona_file_ids = [ + file.id for file in persona.user_files + ] + persona_folder_file_ids + + return persona_files, persona_file_ids diff --git a/backend/onyx/indexing/indexing_pipeline.py b/backend/onyx/indexing/indexing_pipeline.py index 5dc6d4b252..7af0b3999f 100644 --- a/backend/onyx/indexing/indexing_pipeline.py +++ b/backend/onyx/indexing/indexing_pipeline.py @@ -49,6 +49,9 @@ from onyx.db.pg_file_store import read_lobj from onyx.db.search_settings import get_active_search_settings from onyx.db.tag import create_or_add_document_tag from onyx.db.tag import create_or_add_document_tag_list +from onyx.db.user_documents import fetch_user_files_for_documents +from onyx.db.user_documents import fetch_user_folders_for_documents +from onyx.db.user_documents import update_user_file_token_count__no_commit from onyx.document_index.document_index_utils import ( get_multipass_config, ) @@ -56,6 +59,7 @@ from onyx.document_index.interfaces import DocumentIndex from onyx.document_index.interfaces import DocumentMetadata from onyx.document_index.interfaces import IndexBatchParams from onyx.file_processing.image_summarization import summarize_image_with_error_handling +from onyx.file_store.utils import store_user_file_plaintext from onyx.indexing.chunker import Chunker from onyx.indexing.embedder import embed_chunks_with_failure_handling from onyx.indexing.embedder import IndexingEmbedder @@ -67,6 +71,7 @@ from onyx.indexing.models import UpdatableChunkData from onyx.indexing.vector_db_insertion import write_chunks_to_vector_db_with_backoff from onyx.llm.chat_llm import LLMRateLimitError from onyx.llm.factory import get_default_llm_with_vision +from onyx.llm.factory import get_default_llms from onyx.llm.factory import get_llm_for_contextual_rag from onyx.llm.interfaces import LLM from onyx.llm.utils import get_max_input_tokens @@ -769,6 +774,7 @@ def index_doc_batch( # NOTE: no special handling for failures here, since the chunker is not # a common source of failure for the indexing pipeline chunks: list[DocAwareChunk] = chunker.chunk(ctx.indexable_docs) + llm_tokenizer: BaseTokenizer | None = None # contextual RAG if enable_contextual_rag: @@ -826,6 +832,15 @@ def index_doc_batch( ) } + doc_id_to_user_file_id: dict[str, int | None] = fetch_user_files_for_documents( + document_ids=updatable_ids, db_session=db_session + ) + doc_id_to_user_folder_id: dict[ + str, int | None + ] = fetch_user_folders_for_documents( + document_ids=updatable_ids, db_session=db_session + ) + doc_id_to_previous_chunk_cnt: dict[str, int | None] = { document_id: chunk_count for document_id, chunk_count in fetch_chunk_counts_for_documents( @@ -845,6 +860,48 @@ def index_doc_batch( for document_id in updatable_ids } + try: + llm, _ = get_default_llms() + + llm_tokenizer = get_tokenizer( + model_name=llm.config.model_name, + provider_type=llm.config.model_provider, + ) + except Exception as e: + logger.error(f"Error getting tokenizer: {e}") + llm_tokenizer = None + + # Calculate token counts for each document by combining all its chunks' content + user_file_id_to_token_count: dict[int, int | None] = {} + user_file_id_to_raw_text: dict[int, str] = {} + for document_id in updatable_ids: + # Only calculate token counts for documents that have a user file ID + if ( + document_id in doc_id_to_user_file_id + and doc_id_to_user_file_id[document_id] is not None + ): + user_file_id = doc_id_to_user_file_id[document_id] + if not user_file_id: + continue + document_chunks = [ + chunk + for chunk in chunks_with_embeddings + if chunk.source_document.id == document_id + ] + if document_chunks: + combined_content = " ".join( + [chunk.content for chunk in document_chunks] + ) + token_count = ( + len(llm_tokenizer.encode(combined_content)) + if llm_tokenizer + else 0 + ) + user_file_id_to_token_count[user_file_id] = token_count + user_file_id_to_raw_text[user_file_id] = combined_content + else: + user_file_id_to_token_count[user_file_id] = None + # we're concerned about race conditions where multiple simultaneous indexings might result # in one set of metadata overwriting another one in vespa. # we still write data here for the immediate and most likely correct sync, but @@ -857,6 +914,10 @@ def index_doc_batch( document_sets=set( doc_id_to_document_set.get(chunk.source_document.id, []) ), + user_file=doc_id_to_user_file_id.get(chunk.source_document.id, None), + user_folder=doc_id_to_user_folder_id.get( + chunk.source_document.id, None + ), boost=( ctx.id_to_db_doc_map[chunk.source_document.id].boost if chunk.source_document.id in ctx.id_to_db_doc_map @@ -938,6 +999,11 @@ def index_doc_batch( db_session=db_session, ) + update_user_file_token_count__no_commit( + user_file_id_to_token_count=user_file_id_to_token_count, + db_session=db_session, + ) + # these documents can now be counted as part of the CC Pairs # document count, so we need to mark them as indexed # NOTE: even documents we skipped since they were already up @@ -949,12 +1015,22 @@ def index_doc_batch( document_ids=[doc.id for doc in filtered_documents], db_session=db_session, ) + # Store the plaintext in the file store for faster retrieval + for user_file_id, raw_text in user_file_id_to_raw_text.items(): + # Use the dedicated function to store plaintext + store_user_file_plaintext( + user_file_id=user_file_id, + plaintext_content=raw_text, + db_session=db_session, + ) # save the chunk boost components to postgres update_chunk_boost_components__no_commit( chunk_data=updatable_chunk_data, db_session=db_session ) + # Pause user file ccpairs + db_session.commit() result = IndexingPipelineResult( diff --git a/backend/onyx/indexing/models.py b/backend/onyx/indexing/models.py index d6283ee213..e2abd66ea6 100644 --- a/backend/onyx/indexing/models.py +++ b/backend/onyx/indexing/models.py @@ -100,6 +100,8 @@ class DocMetadataAwareIndexChunk(IndexChunk): tenant_id: str access: "DocumentAccess" document_sets: set[str] + user_file: int | None + user_folder: int | None boost: int aggregated_chunk_boost_factor: float @@ -109,6 +111,8 @@ class DocMetadataAwareIndexChunk(IndexChunk): index_chunk: IndexChunk, access: "DocumentAccess", document_sets: set[str], + user_file: int | None, + user_folder: int | None, boost: int, aggregated_chunk_boost_factor: float, tenant_id: str, @@ -118,6 +122,8 @@ class DocMetadataAwareIndexChunk(IndexChunk): **index_chunk_data, access=access, document_sets=document_sets, + user_file=user_file, + user_folder=user_folder, boost=boost, aggregated_chunk_boost_factor=aggregated_chunk_boost_factor, tenant_id=tenant_id, diff --git a/backend/onyx/llm/utils.py b/backend/onyx/llm/utils.py index 9f257b0408..68fb833d9e 100644 --- a/backend/onyx/llm/utils.py +++ b/backend/onyx/llm/utils.py @@ -1,4 +1,5 @@ import copy +import io import json from collections.abc import Callable from collections.abc import Iterator @@ -37,6 +38,7 @@ from onyx.configs.model_configs import DOC_EMBEDDING_CONTEXT_SIZE from onyx.configs.model_configs import GEN_AI_MAX_TOKENS from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS from onyx.configs.model_configs import GEN_AI_NUM_RESERVED_OUTPUT_TOKENS +from onyx.file_processing.extract_file_text import read_pdf_file from onyx.file_store.models import ChatFileType from onyx.file_store.models import InMemoryChatFile from onyx.llm.interfaces import LLM @@ -129,7 +131,12 @@ def _build_content( text_files = [ file for file in files - if file.file_type in (ChatFileType.PLAIN_TEXT, ChatFileType.CSV) + if file.file_type + in ( + ChatFileType.PLAIN_TEXT, + ChatFileType.CSV, + ChatFileType.USER_KNOWLEDGE, + ) ] if not text_files: @@ -137,7 +144,18 @@ def _build_content( final_message_with_files = "FILES:\n\n" for file in text_files: - file_content = file.content.decode("utf-8") + try: + file_content = file.content.decode("utf-8") + except UnicodeDecodeError: + # Try to decode as binary + try: + file_content, _, _ = read_pdf_file(io.BytesIO(file.content)) + except Exception: + file_content = f"[Binary file content - {file.file_type} format]" + logger.exception( + f"Could not decode binary file content for file type: {file.file_type}" + ) + # logger.warning(f"Could not decode binary file content for file type: {file.file_type}") file_name_section = f"DOCUMENT: {file.filename}\n" if file.filename else "" final_message_with_files += ( f"{file_name_section}{CODE_BLOCK_PAT.format(file_content.strip())}\n\n\n" @@ -165,7 +183,6 @@ def build_content_with_imgs( img_urls = img_urls or [] b64_imgs = b64_imgs or [] - message_main_content = _build_content(message, files) if exclude_images or (not img_files and not img_urls): @@ -413,14 +430,12 @@ def _find_model_obj(model_map: dict, provider: str, model_name: str) -> dict | N for model_name in filtered_model_names: model_obj = model_map.get(f"{provider}/{model_name}") if model_obj: - logger.debug(f"Using model object for {provider}/{model_name}") return model_obj # Then try all model names without provider prefix for model_name in filtered_model_names: model_obj = model_map.get(model_name) if model_obj: - logger.debug(f"Using model object for {model_name}") return model_obj return None @@ -516,14 +531,10 @@ def get_llm_max_tokens( if "max_input_tokens" in model_obj: max_tokens = model_obj["max_input_tokens"] - logger.debug( - f"Max tokens for {model_name}: {max_tokens} (from max_input_tokens)" - ) return max_tokens if "max_tokens" in model_obj: max_tokens = model_obj["max_tokens"] - logger.debug(f"Max tokens for {model_name}: {max_tokens} (from max_tokens)") return max_tokens logger.error(f"No max tokens found for LLM: {model_name}") @@ -545,21 +556,16 @@ def get_llm_max_output_tokens( model_obj = model_map.get(f"{model_provider}/{model_name}") if not model_obj: model_obj = model_map[model_name] - logger.debug(f"Using model object for {model_name}") else: - logger.debug(f"Using model object for {model_provider}/{model_name}") + pass if "max_output_tokens" in model_obj: max_output_tokens = model_obj["max_output_tokens"] - logger.info(f"Max output tokens for {model_name}: {max_output_tokens}") return max_output_tokens # Fallback to a fraction of max_tokens if max_output_tokens is not specified if "max_tokens" in model_obj: max_output_tokens = int(model_obj["max_tokens"] * 0.1) - logger.info( - f"Fallback max output tokens for {model_name}: {max_output_tokens} (10% of max_tokens)" - ) return max_output_tokens logger.error(f"No max output tokens found for LLM: {model_name}") diff --git a/backend/onyx/main.py b/backend/onyx/main.py index f436392182..d76728ac76 100644 --- a/backend/onyx/main.py +++ b/backend/onyx/main.py @@ -97,6 +97,7 @@ from onyx.server.settings.api import basic_router as settings_router from onyx.server.token_rate_limits.api import ( router as token_rate_limit_settings_router, ) +from onyx.server.user_documents.api import router as user_documents_router from onyx.server.utils import BasicAuthenticationError from onyx.setup import setup_multitenant_onyx from onyx.setup import setup_onyx @@ -297,6 +298,7 @@ def get_application() -> FastAPI: include_router_with_global_prefix_prepended(application, input_prompt_router) include_router_with_global_prefix_prepended(application, admin_input_prompt_router) include_router_with_global_prefix_prepended(application, cc_pair_router) + include_router_with_global_prefix_prepended(application, user_documents_router) include_router_with_global_prefix_prepended(application, folder_router) include_router_with_global_prefix_prepended(application, document_set_router) include_router_with_global_prefix_prepended(application, search_settings_router) diff --git a/backend/onyx/onyxbot/slack/listener.py b/backend/onyx/onyxbot/slack/listener.py index 20041d1b5c..518a81083d 100644 --- a/backend/onyx/onyxbot/slack/listener.py +++ b/backend/onyx/onyxbot/slack/listener.py @@ -594,7 +594,7 @@ def prefilter_requests(req: SocketModeRequest, client: TenantSocketModeClient) - bot_tag_id = get_onyx_bot_slack_bot_id(client.web_client) if event_type == "message": is_dm = event.get("channel_type") == "im" - is_tagged = bot_tag_id and bot_tag_id in msg + is_tagged = bot_tag_id and f"<@{bot_tag_id}>" in msg is_onyx_bot_msg = bot_tag_id and bot_tag_id in event.get("user", "") # OnyxBot should never respond to itself @@ -727,7 +727,11 @@ def build_request_details( event = cast(dict[str, Any], req.payload["event"]) msg = cast(str, event["text"]) channel = cast(str, event["channel"]) - tagged = event.get("type") == "app_mention" + # Check for both app_mention events and messages containing bot tag + bot_tag_id = get_onyx_bot_slack_bot_id(client.web_client) + tagged = (event.get("type") == "app_mention") or ( + event.get("type") == "message" and bot_tag_id and f"<@{bot_tag_id}>" in msg + ) message_ts = event.get("ts") thread_ts = event.get("thread_ts") sender_id = event.get("user") or None diff --git a/backend/onyx/onyxbot/slack/utils.py b/backend/onyx/onyxbot/slack/utils.py index ad7dbe2796..e9cec043d4 100644 --- a/backend/onyx/onyxbot/slack/utils.py +++ b/backend/onyx/onyxbot/slack/utils.py @@ -145,7 +145,7 @@ def update_emote_react( def remove_onyx_bot_tag(message_str: str, client: WebClient) -> str: bot_tag_id = get_onyx_bot_slack_bot_id(web_client=client) - return re.sub(rf"<@{bot_tag_id}>\s", "", message_str) + return re.sub(rf"<@{bot_tag_id}>\s*", "", message_str) def _check_for_url_in_block(block: Block) -> bool: diff --git a/backend/onyx/seeding/load_docs.py b/backend/onyx/seeding/load_docs.py index ab99f2b930..f8a9c81815 100644 --- a/backend/onyx/seeding/load_docs.py +++ b/backend/onyx/seeding/load_docs.py @@ -98,6 +98,8 @@ def _create_indexable_chunks( tenant_id=tenant_id if MULTI_TENANT else POSTGRES_DEFAULT_SCHEMA, access=default_public_access, document_sets=set(), + user_file=None, + user_folder=None, boost=DEFAULT_BOOST, large_chunk_id=None, image_file_name=None, diff --git a/backend/onyx/seeding/load_yamls.py b/backend/onyx/seeding/load_yamls.py index 204b7180aa..cfad371a1b 100644 --- a/backend/onyx/seeding/load_yamls.py +++ b/backend/onyx/seeding/load_yamls.py @@ -5,6 +5,7 @@ from onyx.configs.chat_configs import INPUT_PROMPT_YAML from onyx.configs.chat_configs import MAX_CHUNKS_FED_TO_CHAT from onyx.configs.chat_configs import PERSONAS_YAML from onyx.configs.chat_configs import PROMPTS_YAML +from onyx.configs.chat_configs import USER_FOLDERS_YAML from onyx.context.search.enums import RecencyBiasSetting from onyx.db.document_set import get_or_create_document_set_by_name from onyx.db.input_prompt import insert_input_prompt_if_not_exists @@ -15,6 +16,29 @@ from onyx.db.models import Tool as ToolDBModel from onyx.db.persona import upsert_persona from onyx.db.prompts import get_prompt_by_name from onyx.db.prompts import upsert_prompt +from onyx.db.user_documents import upsert_user_folder + + +def load_user_folders_from_yaml( + db_session: Session, + user_folders_yaml: str = USER_FOLDERS_YAML, +) -> None: + with open(user_folders_yaml, "r") as file: + data = yaml.safe_load(file) + + all_user_folders = data.get("user_folders", []) + for user_folder in all_user_folders: + upsert_user_folder( + db_session=db_session, + id=user_folder.get("id"), + name=user_folder.get("name"), + description=user_folder.get("description"), + created_at=user_folder.get("created_at"), + user=user_folder.get("user"), + files=user_folder.get("files"), + assistants=user_folder.get("assistants"), + ) + db_session.flush() def load_prompts_from_yaml( @@ -179,3 +203,4 @@ def load_chat_yamls( load_prompts_from_yaml(db_session, prompt_yaml) load_personas_from_yaml(db_session, personas_yaml) load_input_prompts_from_yaml(db_session, input_prompts_yaml) + load_user_folders_from_yaml(db_session) diff --git a/backend/onyx/seeding/user_folders.yaml b/backend/onyx/seeding/user_folders.yaml new file mode 100644 index 0000000000..e68b414c60 --- /dev/null +++ b/backend/onyx/seeding/user_folders.yaml @@ -0,0 +1,6 @@ +user_folders: + - id: -1 + name: "Recent Documents" + description: "Documents uploaded by the user" + files: [] + assistants: [] diff --git a/backend/onyx/server/documents/connector.py b/backend/onyx/server/documents/connector.py index 920c02b18c..f9791eaab6 100644 --- a/backend/onyx/server/documents/connector.py +++ b/backend/onyx/server/documents/connector.py @@ -389,12 +389,7 @@ def check_drive_tokens( return AuthStatus(authenticated=True) -@router.post("/admin/connector/file/upload") -def upload_files( - files: list[UploadFile], - _: User = Depends(current_curator_or_admin_user), - db_session: Session = Depends(get_session), -) -> FileUploadResponse: +def upload_files(files: list[UploadFile], db_session: Session) -> FileUploadResponse: for file in files: if not file.filename: raise HTTPException(status_code=400, detail="File name cannot be empty") @@ -455,6 +450,15 @@ def upload_files( return FileUploadResponse(file_paths=deduped_file_paths) +@router.post("/admin/connector/file/upload") +def upload_files_api( + files: list[UploadFile], + _: User = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> FileUploadResponse: + return upload_files(files, db_session) + + @router.get("/admin/connector") def get_connectors_by_credential( _: User = Depends(current_curator_or_admin_user), @@ -758,6 +762,16 @@ def get_connector_indexing_status( (connector.id, credential.id) ) + # Safely get the owner email, handling detached instances + owner_email = "" + try: + if credential.user: + owner_email = credential.user.email + except Exception: + # If there's any error accessing the user (like DetachedInstanceError), + # we'll just use an empty string for the owner email + pass + indexing_statuses.append( ConnectorIndexingStatus( cc_pair_id=cc_pair.id, @@ -769,7 +783,7 @@ def get_connector_indexing_status( ), credential=CredentialSnapshot.from_credential_db_model(credential), access_type=cc_pair.access_type, - owner=credential.user.email if credential.user else "", + owner=owner_email, groups=group_cc_pair_relationships_dict.get(cc_pair.id, []), last_finished_status=( latest_finished_attempt.status if latest_finished_attempt else None @@ -1042,55 +1056,16 @@ def connector_run_once( status_code=400, detail="Connector has no valid credentials, cannot create index attempts.", ) - - # Prevents index attempts for cc pairs that already have an index attempt currently running - skipped_credentials = [ - credential_id - for credential_id in credential_ids - if get_index_attempts_for_cc_pair( - cc_pair_identifier=ConnectorCredentialPairIdentifier( - connector_id=run_info.connector_id, - credential_id=credential_id, - ), - only_current=True, - db_session=db_session, - disinclude_finished=True, + try: + num_triggers = trigger_indexing_for_cc_pair( + credential_ids, + connector_id, + run_info.from_beginning, + tenant_id, + db_session, ) - ] - - connector_credential_pairs = [ - get_connector_credential_pair( - db_session=db_session, - connector_id=connector_id, - credential_id=credential_id, - ) - for credential_id in credential_ids - if credential_id not in skipped_credentials - ] - - num_triggers = 0 - for cc_pair in connector_credential_pairs: - if cc_pair is not None: - indexing_mode = IndexingMode.UPDATE - if run_info.from_beginning: - indexing_mode = IndexingMode.REINDEX - - mark_ccpair_with_indexing_trigger(cc_pair.id, indexing_mode, db_session) - num_triggers += 1 - - logger.info( - f"connector_run_once - marking cc_pair with indexing trigger: " - f"connector={run_info.connector_id} " - f"cc_pair={cc_pair.id} " - f"indexing_trigger={indexing_mode}" - ) - - # run the beat task to pick up the triggers immediately - primary_app.send_task( - OnyxCeleryTask.CHECK_FOR_INDEXING, - priority=OnyxCeleryPriority.HIGH, - kwargs={"tenant_id": tenant_id}, - ) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) logger.info("connector_run_once - running check_for_indexing") @@ -1264,3 +1239,85 @@ def get_basic_connector_indexing_status( for cc_pair in cc_pairs if cc_pair.connector.source != DocumentSource.INGESTION_API ] + + +def trigger_indexing_for_cc_pair( + specified_credential_ids: list[int], + connector_id: int, + from_beginning: bool, + tenant_id: str, + db_session: Session, + is_user_file: bool = False, +) -> int: + try: + possible_credential_ids = get_connector_credential_ids(connector_id, db_session) + except ValueError as e: + raise ValueError(f"Connector by id {connector_id} does not exist: {str(e)}") + + if not specified_credential_ids: + credential_ids = possible_credential_ids + else: + if set(specified_credential_ids).issubset(set(possible_credential_ids)): + credential_ids = specified_credential_ids + else: + raise ValueError( + "Not all specified credentials are associated with connector" + ) + + if not credential_ids: + raise ValueError( + "Connector has no valid credentials, cannot create index attempts." + ) + + # Prevents index attempts for cc pairs that already have an index attempt currently running + skipped_credentials = [ + credential_id + for credential_id in credential_ids + if get_index_attempts_for_cc_pair( + cc_pair_identifier=ConnectorCredentialPairIdentifier( + connector_id=connector_id, + credential_id=credential_id, + ), + only_current=True, + db_session=db_session, + disinclude_finished=True, + ) + ] + + connector_credential_pairs = [ + get_connector_credential_pair( + db_session=db_session, + connector_id=connector_id, + credential_id=credential_id, + ) + for credential_id in credential_ids + if credential_id not in skipped_credentials + ] + + num_triggers = 0 + for cc_pair in connector_credential_pairs: + if cc_pair is not None: + indexing_mode = IndexingMode.UPDATE + if from_beginning: + indexing_mode = IndexingMode.REINDEX + + mark_ccpair_with_indexing_trigger(cc_pair.id, indexing_mode, db_session) + num_triggers += 1 + + logger.info( + f"connector_run_once - marking cc_pair with indexing trigger: " + f"connector={connector_id} " + f"cc_pair={cc_pair.id} " + f"indexing_trigger={indexing_mode}" + ) + + # run the beat task to pick up the triggers immediately + priority = OnyxCeleryPriority.HIGHEST if is_user_file else OnyxCeleryPriority.HIGH + logger.info(f"Sending indexing check task with priority {priority}") + primary_app.send_task( + OnyxCeleryTask.CHECK_FOR_INDEXING, + priority=priority, + kwargs={"tenant_id": tenant_id}, + ) + + return num_triggers diff --git a/backend/onyx/server/documents/models.py b/backend/onyx/server/documents/models.py index 9cf97a1887..c557cae3ce 100644 --- a/backend/onyx/server/documents/models.py +++ b/backend/onyx/server/documents/models.py @@ -122,6 +122,7 @@ class CredentialBase(BaseModel): name: str | None = None curator_public: bool = False groups: list[int] = Field(default_factory=list) + is_user_file: bool = False class CredentialSnapshot(CredentialBase): @@ -392,7 +393,7 @@ class FileUploadResponse(BaseModel): class ObjectCreationIdResponse(BaseModel): - id: int | str + id: int credential: CredentialSnapshot | None = None diff --git a/backend/onyx/server/features/folder/api.py b/backend/onyx/server/features/folder/api.py index 813c5eabdc..cf06c2c7cc 100644 --- a/backend/onyx/server/features/folder/api.py +++ b/backend/onyx/server/features/folder/api.py @@ -18,9 +18,9 @@ from onyx.db.models import User from onyx.server.features.folder.models import DeleteFolderOptions from onyx.server.features.folder.models import FolderChatSessionRequest from onyx.server.features.folder.models import FolderCreationRequest -from onyx.server.features.folder.models import FolderResponse from onyx.server.features.folder.models import FolderUpdateRequest from onyx.server.features.folder.models import GetUserFoldersResponse +from onyx.server.features.folder.models import UserFolderSnapshot from onyx.server.models import DisplayPriorityRequest from onyx.server.query_and_chat.models import ChatSessionDetails @@ -39,7 +39,7 @@ def get_folders( folders.sort() return GetUserFoldersResponse( folders=[ - FolderResponse( + UserFolderSnapshot( folder_id=folder.id, folder_name=folder.name, display_priority=folder.display_priority, diff --git a/backend/onyx/server/features/folder/models.py b/backend/onyx/server/features/folder/models.py index acb3fa415a..f63921d30d 100644 --- a/backend/onyx/server/features/folder/models.py +++ b/backend/onyx/server/features/folder/models.py @@ -5,7 +5,7 @@ from pydantic import BaseModel from onyx.server.query_and_chat.models import ChatSessionDetails -class FolderResponse(BaseModel): +class UserFolderSnapshot(BaseModel): folder_id: int folder_name: str | None display_priority: int @@ -13,7 +13,7 @@ class FolderResponse(BaseModel): class GetUserFoldersResponse(BaseModel): - folders: list[FolderResponse] + folders: list[UserFolderSnapshot] class FolderCreationRequest(BaseModel): diff --git a/backend/onyx/server/features/persona/api.py b/backend/onyx/server/features/persona/api.py index 8d6c9b014b..a58ab1b677 100644 --- a/backend/onyx/server/features/persona/api.py +++ b/backend/onyx/server/features/persona/api.py @@ -59,7 +59,6 @@ from shared_configs.contextvars import get_current_tenant_id logger = setup_logger() - admin_router = APIRouter(prefix="/admin/persona") basic_router = APIRouter(prefix="/persona") @@ -210,6 +209,7 @@ def create_persona( and len(persona_upsert_request.prompt_ids) > 0 else None ) + prompt = upsert_prompt( db_session=db_session, user=user, diff --git a/backend/onyx/server/features/persona/models.py b/backend/onyx/server/features/persona/models.py index 7ed43f47a2..c9ea8bad29 100644 --- a/backend/onyx/server/features/persona/models.py +++ b/backend/onyx/server/features/persona/models.py @@ -85,6 +85,8 @@ class PersonaUpsertRequest(BaseModel): label_ids: list[int] | None = None is_default_persona: bool = False display_priority: int | None = None + user_file_ids: list[int] | None = None + user_folder_ids: list[int] | None = None class PersonaSnapshot(BaseModel): @@ -113,6 +115,8 @@ class PersonaSnapshot(BaseModel): is_default_persona: bool search_start_date: datetime | None = None labels: list["PersonaLabelSnapshot"] = [] + user_file_ids: list[int] | None = None + user_folder_ids: list[int] | None = None @classmethod def from_model( @@ -161,6 +165,8 @@ class PersonaSnapshot(BaseModel): uploaded_image_id=persona.uploaded_image_id, search_start_date=persona.search_start_date, labels=[PersonaLabelSnapshot.from_model(label) for label in persona.labels], + user_file_ids=[file.id for file in persona.user_files], + user_folder_ids=[folder.id for folder in persona.user_folders], ) diff --git a/backend/onyx/server/manage/llm/api.py b/backend/onyx/server/manage/llm/api.py index 0e6a6ea03c..66a3d992ac 100644 --- a/backend/onyx/server/manage/llm/api.py +++ b/backend/onyx/server/manage/llm/api.py @@ -1,4 +1,6 @@ from collections.abc import Callable +from datetime import datetime +from datetime import timezone from fastapi import APIRouter from fastapi import Depends @@ -138,15 +140,29 @@ def list_llm_providers( _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> list[LLMProviderView]: + start_time = datetime.now(timezone.utc) + logger.debug("Starting to fetch LLM providers") + llm_provider_list: list[LLMProviderView] = [] for llm_provider_model in fetch_existing_llm_providers(db_session): + from_model_start = datetime.now(timezone.utc) full_llm_provider = LLMProviderView.from_model(llm_provider_model) + from_model_end = datetime.now(timezone.utc) + from_model_duration = (from_model_end - from_model_start).total_seconds() + logger.debug( + f"LLMProviderView.from_model took {from_model_duration:.2f} seconds" + ) + if full_llm_provider.api_key: full_llm_provider.api_key = ( full_llm_provider.api_key[:4] + "****" + full_llm_provider.api_key[-4:] ) llm_provider_list.append(full_llm_provider) + end_time = datetime.now(timezone.utc) + duration = (end_time - start_time).total_seconds() + logger.debug(f"Completed fetching LLM providers in {duration:.2f} seconds") + return llm_provider_list @@ -282,12 +298,25 @@ def list_llm_provider_basics( user: User | None = Depends(current_chat_accessible_user), db_session: Session = Depends(get_session), ) -> list[LLMProviderDescriptor]: - return [ - LLMProviderDescriptor.from_model(llm_provider_model) - for llm_provider_model in fetch_existing_llm_providers_for_user( - db_session, user + start_time = datetime.now(timezone.utc) + logger.debug("Starting to fetch basic LLM providers for user") + + llm_provider_list: list[LLMProviderDescriptor] = [] + for llm_provider_model in fetch_existing_llm_providers_for_user(db_session, user): + from_model_start = datetime.now(timezone.utc) + full_llm_provider = LLMProviderDescriptor.from_model(llm_provider_model) + from_model_end = datetime.now(timezone.utc) + from_model_duration = (from_model_end - from_model_start).total_seconds() + logger.debug( + f"LLMProviderView.from_model took {from_model_duration:.2f} seconds" ) - ] + llm_provider_list.append(full_llm_provider) + + end_time = datetime.now(timezone.utc) + duration = (end_time - start_time).total_seconds() + logger.debug(f"Completed fetching basic LLM providers in {duration:.2f} seconds") + + return llm_provider_list @admin_router.get("/provider-contextual-cost") diff --git a/backend/onyx/server/manage/llm/models.py b/backend/onyx/server/manage/llm/models.py index 7b9c7bc588..1d72ac82f4 100644 --- a/backend/onyx/server/manage/llm/models.py +++ b/backend/onyx/server/manage/llm/models.py @@ -4,6 +4,7 @@ from pydantic import BaseModel from pydantic import Field from onyx.llm.llm_provider_options import fetch_models_for_provider +from onyx.llm.utils import get_max_input_tokens if TYPE_CHECKING: @@ -38,24 +39,50 @@ class LLMProviderDescriptor(BaseModel): is_default_vision_provider: bool | None default_vision_model: str | None display_model_names: list[str] | None + model_token_limits: dict[str, int] | None = None @classmethod def from_model( cls, llm_provider_model: "LLMProviderModel" ) -> "LLMProviderDescriptor": - return cls( + import time + + start_time = time.time() + + model_names = ( + llm_provider_model.model_names + or fetch_models_for_provider(llm_provider_model.provider) + or [llm_provider_model.default_model_name] + ) + + model_token_rate = ( + { + model_name: get_max_input_tokens( + model_name, llm_provider_model.provider + ) + for model_name in model_names + } + if model_names is not None + else None + ) + + result = cls( name=llm_provider_model.name, provider=llm_provider_model.provider, default_model_name=llm_provider_model.default_model_name, fast_default_model_name=llm_provider_model.fast_default_model_name, is_default_provider=llm_provider_model.is_default_provider, + model_names=model_names, + model_token_limits=model_token_rate, is_default_vision_provider=llm_provider_model.is_default_vision_provider, default_vision_model=llm_provider_model.default_vision_model, - model_names=llm_provider_model.model_names - or fetch_models_for_provider(llm_provider_model.provider), display_model_names=llm_provider_model.display_model_names, ) + time.time() - start_time + + return result + class LLMProvider(BaseModel): name: str @@ -87,6 +114,7 @@ class LLMProviderView(LLMProvider): is_default_provider: bool | None = None is_default_vision_provider: bool | None = None model_names: list[str] + model_token_limits: dict[str, int] | None = None @classmethod def from_model(cls, llm_provider_model: "LLMProviderModel") -> "LLMProviderView": @@ -109,6 +137,14 @@ class LLMProviderView(LLMProvider): or fetch_models_for_provider(llm_provider_model.provider) or [llm_provider_model.default_model_name] ), + model_token_limits={ + model_name: get_max_input_tokens( + model_name, llm_provider_model.provider + ) + for model_name in llm_provider_model.model_names + } + if llm_provider_model.model_names is not None + else None, is_public=llm_provider_model.is_public, groups=[group.id for group in llm_provider_model.groups], deployment_name=llm_provider_model.deployment_name, diff --git a/backend/onyx/server/query_and_chat/chat_backend.py b/backend/onyx/server/query_and_chat/chat_backend.py index ad3a3a18b1..55ac428e59 100644 --- a/backend/onyx/server/query_and_chat/chat_backend.py +++ b/backend/onyx/server/query_and_chat/chat_backend.py @@ -3,6 +3,7 @@ import datetime import io import json import os +import time import uuid from collections.abc import Callable from collections.abc import Generator @@ -29,10 +30,12 @@ from onyx.chat.prompt_builder.citations_prompt import ( compute_max_document_tokens_for_persona, ) from onyx.configs.app_configs import WEB_DOMAIN +from onyx.configs.constants import DocumentSource from onyx.configs.constants import FileOrigin from onyx.configs.constants import MessageType from onyx.configs.constants import MilestoneRecordType from onyx.configs.model_configs import LITELLM_PASS_THROUGH_HEADERS +from onyx.connectors.models import InputType from onyx.db.chat import add_chats_to_session_from_slack_thread from onyx.db.chat import create_chat_session from onyx.db.chat import create_new_chat_message @@ -48,12 +51,17 @@ from onyx.db.chat import set_as_latest_chat_message from onyx.db.chat import translate_db_message_to_chat_message_detail from onyx.db.chat import update_chat_session from onyx.db.chat_search import search_chat_sessions +from onyx.db.connector import create_connector +from onyx.db.connector_credential_pair import add_credential_to_connector +from onyx.db.credentials import create_credential from onyx.db.engine import get_session from onyx.db.engine import get_session_with_tenant +from onyx.db.enums import AccessType from onyx.db.feedback import create_chat_message_feedback from onyx.db.feedback import create_doc_retrieval_feedback from onyx.db.models import User from onyx.db.persona import get_persona_by_id +from onyx.db.user_documents import create_user_files from onyx.file_processing.extract_file_text import docx_to_txt_filename from onyx.file_processing.extract_file_text import extract_file_text from onyx.file_store.file_store import get_default_file_store @@ -66,6 +74,8 @@ from onyx.natural_language_processing.utils import get_tokenizer from onyx.secondary_llm_flows.chat_session_naming import ( get_renamed_conversation_name, ) +from onyx.server.documents.models import ConnectorBase +from onyx.server.documents.models import CredentialBase from onyx.server.query_and_chat.models import ChatFeedbackRequest from onyx.server.query_and_chat.models import ChatMessageIdentifier from onyx.server.query_and_chat.models import ChatRenameRequest @@ -91,6 +101,7 @@ from onyx.utils.logger import setup_logger from onyx.utils.telemetry import create_milestone_and_report from shared_configs.contextvars import get_current_tenant_id +RECENT_DOCS_FOLDER_ID = -1 logger = setup_logger() @@ -648,7 +659,7 @@ def seed_chat_from_slack( def upload_files_for_chat( files: list[UploadFile], db_session: Session = Depends(get_session), - _: User | None = Depends(current_user), + user: User | None = Depends(current_user), ) -> dict[str, list[FileDescriptor]]: image_content_types = {"image/jpeg", "image/png", "image/webp"} csv_content_types = {"text/csv"} @@ -686,17 +697,11 @@ def upload_files_for_chat( if file.content_type in image_content_types: error_detail = "Unsupported image file type. Supported image types include .jpg, .jpeg, .png, .webp." elif file.content_type in text_content_types: - error_detail = "Unsupported text file type. Supported text types include .txt, .csv, .md, .mdx, .conf, " - ".log, .tsv." + error_detail = "Unsupported text file type." elif file.content_type in csv_content_types: - error_detail = ( - "Unsupported CSV file type. Supported CSV types include .csv." - ) + error_detail = "Unsupported CSV file type." else: - error_detail = ( - "Unsupported document file type. Supported document types include .pdf, .docx, .pptx, .xlsx, " - ".json, .xml, .yml, .yaml, .eml, .epub." - ) + error_detail = "Unsupported document file type." raise HTTPException(status_code=400, detail=error_detail) if ( @@ -744,11 +749,12 @@ def upload_files_for_chat( file_type=new_content_type or file_type.value, ) - # if the file is a doc, extract text and store that so we don't need - # to re-extract it every time we send a message + # 4) If the file is a doc, extract text and store that separately if file_type == ChatFileType.DOC: + # Re-wrap bytes in a fresh BytesIO so we start at position 0 + extracted_text_io = io.BytesIO(file_content) extracted_text = extract_file_text( - file=file_content_io, # use the bytes we already read + file=extracted_text_io, # use the bytes we already read file_name=file.filename or "", ) text_file_id = str(uuid.uuid4()) @@ -760,13 +766,57 @@ def upload_files_for_chat( file_origin=FileOrigin.CHAT_UPLOAD, file_type="text/plain", ) - # for DOC type, just return this for the FileDescriptor - # as we would always use this as the ID to attach to the - # message + # Return the text file as the "main" file descriptor for doc types file_info.append((text_file_id, file.filename, ChatFileType.PLAIN_TEXT)) else: file_info.append((file_id, file.filename, file_type)) + # 5) Create a user file for each uploaded file + user_files = create_user_files([file], RECENT_DOCS_FOLDER_ID, user, db_session) + for user_file in user_files: + # 6) Create connector + connector_base = ConnectorBase( + name=f"UserFile-{int(time.time())}", + source=DocumentSource.FILE, + input_type=InputType.LOAD_STATE, + connector_specific_config={ + "file_locations": [user_file.file_id], + }, + refresh_freq=None, + prune_freq=None, + indexing_start=None, + ) + connector = create_connector( + db_session=db_session, + connector_data=connector_base, + ) + + # 7) Create credential + credential_info = CredentialBase( + credential_json={}, + admin_public=True, + source=DocumentSource.FILE, + curator_public=True, + groups=[], + name=f"UserFileCredential-{int(time.time())}", + is_user_file=True, + ) + credential = create_credential(credential_info, user, db_session) + + # 8) Create connector credential pair + cc_pair = add_credential_to_connector( + db_session=db_session, + user=user, + connector_id=connector.id, + credential_id=credential.id, + cc_pair_name=f"UserFileCCPair-{int(time.time())}", + access_type=AccessType.PRIVATE, + auto_sync_options=None, + groups=[], + ) + user_file.cc_pair_id = cc_pair.data + db_session.commit() + return { "files": [ {"id": file_id, "type": file_type, "name": file_name} diff --git a/backend/onyx/server/query_and_chat/models.py b/backend/onyx/server/query_and_chat/models.py index 132be33caf..2bc3b4eb75 100644 --- a/backend/onyx/server/query_and_chat/models.py +++ b/backend/onyx/server/query_and_chat/models.py @@ -92,6 +92,8 @@ class CreateChatMessageRequest(ChunkContext): message: str # Files that we should attach to this message file_descriptors: list[FileDescriptor] + user_file_ids: list[int] = [] + user_folder_ids: list[int] = [] # If no prompt provided, uses the largest prompt of the chat session # but really this should be explicitly specified, only in the simplified APIs is this inferred @@ -118,7 +120,7 @@ class CreateChatMessageRequest(ChunkContext): # this does persist in the chat thread details temperature_override: float | None = None - # allow user to specify an alternate assistnat + # allow user to specify an alternate assistant alternate_assistant_id: int | None = None # This takes the priority over the prompt_override @@ -135,6 +137,8 @@ class CreateChatMessageRequest(ChunkContext): # https://platform.openai.com/docs/guides/structured-outputs/introduction structured_response_format: dict | None = None + force_user_file_search: bool = False + # If true, ignores most of the search options and uses pro search instead. # TODO: decide how many of the above options we want to pass through to pro search use_agentic_search: bool = False diff --git a/backend/onyx/server/user_documents/api.py b/backend/onyx/server/user_documents/api.py new file mode 100644 index 0000000000..9e01198eff --- /dev/null +++ b/backend/onyx/server/user_documents/api.py @@ -0,0 +1,567 @@ +import io +import time +from datetime import datetime +from datetime import timedelta +from typing import List + +import requests +import sqlalchemy.exc +from bs4 import BeautifulSoup +from fastapi import APIRouter +from fastapi import Depends +from fastapi import File +from fastapi import Form +from fastapi import HTTPException +from fastapi import Query +from fastapi import UploadFile +from pydantic import BaseModel +from sqlalchemy.orm import Session + +from onyx.auth.users import current_user +from onyx.configs.constants import DocumentSource +from onyx.connectors.models import InputType +from onyx.db.connector import create_connector +from onyx.db.connector_credential_pair import add_credential_to_connector +from onyx.db.credentials import create_credential +from onyx.db.engine import get_session +from onyx.db.enums import AccessType +from onyx.db.enums import ConnectorCredentialPairStatus +from onyx.db.models import ConnectorCredentialPair +from onyx.db.models import User +from onyx.db.models import UserFile +from onyx.db.models import UserFolder +from onyx.db.user_documents import calculate_user_files_token_count +from onyx.db.user_documents import create_user_file_with_indexing +from onyx.db.user_documents import create_user_files +from onyx.db.user_documents import get_user_file_indexing_status +from onyx.db.user_documents import share_file_with_assistant +from onyx.db.user_documents import share_folder_with_assistant +from onyx.db.user_documents import unshare_file_with_assistant +from onyx.db.user_documents import unshare_folder_with_assistant +from onyx.file_processing.html_utils import web_html_cleanup +from onyx.server.documents.connector import trigger_indexing_for_cc_pair +from onyx.server.documents.models import ConnectorBase +from onyx.server.documents.models import CredentialBase +from onyx.server.user_documents.models import MessageResponse +from onyx.server.user_documents.models import UserFileSnapshot +from onyx.server.user_documents.models import UserFolderSnapshot +from onyx.setup import setup_logger +from shared_configs.contextvars import get_current_tenant_id + +logger = setup_logger() + +router = APIRouter() + + +class FolderCreationRequest(BaseModel): + name: str + description: str + + +@router.post("/user/folder") +def create_folder( + request: FolderCreationRequest, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> UserFolderSnapshot: + try: + new_folder = UserFolder( + user_id=user.id if user else None, + name=request.name, + description=request.description, + ) + db_session.add(new_folder) + db_session.commit() + return UserFolderSnapshot.from_model(new_folder) + except sqlalchemy.exc.DataError as e: + if "StringDataRightTruncation" in str(e): + raise HTTPException( + status_code=400, + detail="Folder name or description is too long. Please use a shorter name or description.", + ) + raise + + +@router.get( + "/user/folder", +) +def get_folders( + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> list[UserFolderSnapshot]: + user_id = user.id if user else None + folders = db_session.query(UserFolder).filter(UserFolder.user_id == user_id).all() + return [UserFolderSnapshot.from_model(folder) for folder in folders] + + +@router.get("/user/folder/{folder_id}") +def get_folder( + folder_id: int, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> UserFolderSnapshot: + user_id = user.id if user else None + folder = ( + db_session.query(UserFolder) + .filter(UserFolder.id == folder_id, UserFolder.user_id == user_id) + .first() + ) + if not folder: + raise HTTPException(status_code=404, detail="Folder not found") + + return UserFolderSnapshot.from_model(folder) + + +RECENT_DOCS_FOLDER_ID = -1 + + +@router.post("/user/file/upload") +def upload_user_files( + files: List[UploadFile] = File(...), + folder_id: int | None = Form(None), + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> list[UserFileSnapshot]: + if folder_id == 0: + folder_id = None + + try: + # Use our consolidated function that handles indexing properly + user_files = create_user_file_with_indexing( + files, folder_id or -1, user, db_session + ) + + return [UserFileSnapshot.from_model(user_file) for user_file in user_files] + + except Exception as e: + logger.error(f"Error uploading files: {str(e)}") + raise HTTPException(status_code=500, detail=f"Failed to upload files: {str(e)}") + + +class FolderUpdateRequest(BaseModel): + name: str | None = None + description: str | None = None + + +@router.put("/user/folder/{folder_id}") +def update_folder( + folder_id: int, + request: FolderUpdateRequest, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> UserFolderSnapshot: + user_id = user.id if user else None + folder = ( + db_session.query(UserFolder) + .filter(UserFolder.id == folder_id, UserFolder.user_id == user_id) + .first() + ) + if not folder: + raise HTTPException(status_code=404, detail="Folder not found") + if request.name: + folder.name = request.name + if request.description: + folder.description = request.description + db_session.commit() + + return UserFolderSnapshot.from_model(folder) + + +@router.delete("/user/folder/{folder_id}") +def delete_folder( + folder_id: int, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> MessageResponse: + user_id = user.id if user else None + folder = ( + db_session.query(UserFolder) + .filter(UserFolder.id == folder_id, UserFolder.user_id == user_id) + .first() + ) + if not folder: + raise HTTPException(status_code=404, detail="Folder not found") + db_session.delete(folder) + db_session.commit() + return MessageResponse(message="Folder deleted successfully") + + +@router.delete("/user/file/{file_id}") +def delete_file( + file_id: int, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> MessageResponse: + user_id = user.id if user else None + file = ( + db_session.query(UserFile) + .filter(UserFile.id == file_id, UserFile.user_id == user_id) + .first() + ) + if not file: + raise HTTPException(status_code=404, detail="File not found") + db_session.delete(file) + db_session.commit() + return MessageResponse(message="File deleted successfully") + + +class FileMoveRequest(BaseModel): + new_folder_id: int | None + + +@router.put("/user/file/{file_id}/move") +def move_file( + file_id: int, + request: FileMoveRequest, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> UserFileSnapshot: + user_id = user.id if user else None + file = ( + db_session.query(UserFile) + .filter(UserFile.id == file_id, UserFile.user_id == user_id) + .first() + ) + if not file: + raise HTTPException(status_code=404, detail="File not found") + file.folder_id = request.new_folder_id + db_session.commit() + return UserFileSnapshot.from_model(file) + + +@router.get("/user/file-system") +def get_file_system( + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> list[UserFolderSnapshot]: + user_id = user.id if user else None + folders = db_session.query(UserFolder).filter(UserFolder.user_id == user_id).all() + return [UserFolderSnapshot.from_model(folder) for folder in folders] + + +@router.put("/user/file/{file_id}/rename") +def rename_file( + file_id: int, + name: str, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> UserFileSnapshot: + user_id = user.id if user else None + file = ( + db_session.query(UserFile) + .filter(UserFile.id == file_id, UserFile.user_id == user_id) + .first() + ) + if not file: + raise HTTPException(status_code=404, detail="File not found") + file.name = name + db_session.commit() + return UserFileSnapshot.from_model(file) + + +class ShareRequest(BaseModel): + assistant_id: int + + +@router.post("/user/file/{file_id}/share") +def share_file( + file_id: int, + request: ShareRequest, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> MessageResponse: + user_id = user.id if user else None + file = ( + db_session.query(UserFile) + .filter(UserFile.id == file_id, UserFile.user_id == user_id) + .first() + ) + if not file: + raise HTTPException(status_code=404, detail="File not found") + + share_file_with_assistant(file_id, request.assistant_id, db_session) + return MessageResponse(message="File shared successfully with the assistant") + + +@router.post("/user/file/{file_id}/unshare") +def unshare_file( + file_id: int, + request: ShareRequest, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> MessageResponse: + user_id = user.id if user else None + file = ( + db_session.query(UserFile) + .filter(UserFile.id == file_id, UserFile.user_id == user_id) + .first() + ) + if not file: + raise HTTPException(status_code=404, detail="File not found") + + unshare_file_with_assistant(file_id, request.assistant_id, db_session) + return MessageResponse(message="File unshared successfully from the assistant") + + +@router.post("/user/folder/{folder_id}/share") +def share_folder( + folder_id: int, + request: ShareRequest, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> MessageResponse: + user_id = user.id if user else None + folder = ( + db_session.query(UserFolder) + .filter(UserFolder.id == folder_id, UserFolder.user_id == user_id) + .first() + ) + if not folder: + raise HTTPException(status_code=404, detail="Folder not found") + + share_folder_with_assistant(folder_id, request.assistant_id, db_session) + return MessageResponse( + message="Folder and its files shared successfully with the assistant" + ) + + +@router.post("/user/folder/{folder_id}/unshare") +def unshare_folder( + folder_id: int, + request: ShareRequest, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> MessageResponse: + user_id = user.id if user else None + folder = ( + db_session.query(UserFolder) + .filter(UserFolder.id == folder_id, UserFolder.user_id == user_id) + .first() + ) + if not folder: + raise HTTPException(status_code=404, detail="Folder not found") + + unshare_folder_with_assistant(folder_id, request.assistant_id, db_session) + return MessageResponse( + message="Folder and its files unshared successfully from the assistant" + ) + + +class CreateFileFromLinkRequest(BaseModel): + url: str + folder_id: int | None + + +@router.post("/user/file/create-from-link") +def create_file_from_link( + request: CreateFileFromLinkRequest, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> list[UserFileSnapshot]: + try: + response = requests.get(request.url) + response.raise_for_status() + content = response.text + soup = BeautifulSoup(content, "html.parser") + parsed_html = web_html_cleanup(soup, mintlify_cleanup_enabled=False) + + file_name = f"{parsed_html.title or 'Untitled'}.txt" + file_content = parsed_html.cleaned_text.encode() + + file = UploadFile(filename=file_name, file=io.BytesIO(file_content)) + user_files = create_user_files( + [file], request.folder_id or -1, user, db_session, link_url=request.url + ) + + # Create connector and credential (same as in upload_user_files) + for user_file in user_files: + connector_base = ConnectorBase( + name=f"UserFile-{user_file.file_id}-{int(time.time())}", + source=DocumentSource.FILE, + input_type=InputType.LOAD_STATE, + connector_specific_config={ + "file_locations": [user_file.file_id], + }, + refresh_freq=None, + prune_freq=None, + indexing_start=None, + ) + + connector = create_connector( + db_session=db_session, + connector_data=connector_base, + ) + + credential_info = CredentialBase( + credential_json={}, + admin_public=True, + source=DocumentSource.FILE, + curator_public=True, + groups=[], + name=f"UserFileCredential-{user_file.file_id}-{int(time.time())}", + ) + credential = create_credential(credential_info, user, db_session) + + cc_pair = add_credential_to_connector( + db_session=db_session, + user=user, + connector_id=connector.id, + credential_id=credential.id, + cc_pair_name=f"UserFileCCPair-{int(time.time())}", + access_type=AccessType.PRIVATE, + auto_sync_options=None, + groups=[], + is_user_file=True, + ) + user_file.cc_pair_id = cc_pair.data + db_session.commit() + + # Trigger immediate indexing with highest priority + tenant_id = get_current_tenant_id() + trigger_indexing_for_cc_pair( + [], connector.id, False, tenant_id, db_session, is_user_file=True + ) + + db_session.commit() + return [UserFileSnapshot.from_model(user_file) for user_file in user_files] + except requests.RequestException as e: + raise HTTPException(status_code=400, detail=f"Failed to fetch URL: {str(e)}") + + +@router.get("/user/file/indexing-status") +def get_files_indexing_status( + file_ids: list[int] = Query(...), + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> dict[int, bool]: + """Get indexing status for multiple files""" + return get_user_file_indexing_status(file_ids, db_session) + + +@router.get("/user/file/token-estimate") +def get_files_token_estimate( + file_ids: list[int] = Query([]), + folder_ids: list[int] = Query([]), + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> dict: + """Get token estimate for files and folders""" + total_tokens = calculate_user_files_token_count(file_ids, folder_ids, db_session) + return {"total_tokens": total_tokens} + + +class ReindexFileRequest(BaseModel): + file_id: int + + +@router.post("/user/file/reindex") +def reindex_file( + request: ReindexFileRequest, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> MessageResponse: + user_id = user.id if user else None + user_file_to_reindex = ( + db_session.query(UserFile) + .filter(UserFile.id == request.file_id, UserFile.user_id == user_id) + .first() + ) + + if not user_file_to_reindex: + raise HTTPException(status_code=404, detail="File not found") + + if not user_file_to_reindex.cc_pair_id: + raise HTTPException( + status_code=400, + detail="File does not have an associated connector-credential pair", + ) + + # Get the connector id from the cc_pair + cc_pair = ( + db_session.query(ConnectorCredentialPair) + .filter_by(id=user_file_to_reindex.cc_pair_id) + .first() + ) + if not cc_pair: + raise HTTPException( + status_code=404, detail="Associated connector-credential pair not found" + ) + + # Trigger immediate reindexing with highest priority + tenant_id = get_current_tenant_id() + # Update the cc_pair status to ACTIVE to ensure it's processed + cc_pair.status = ConnectorCredentialPairStatus.ACTIVE + db_session.commit() + try: + trigger_indexing_for_cc_pair( + [], cc_pair.connector_id, True, tenant_id, db_session, is_user_file=True + ) + return MessageResponse( + message="File reindexing has been triggered successfully" + ) + except Exception as e: + logger.error( + f"Error triggering reindexing for file {request.file_id}: {str(e)}" + ) + raise HTTPException( + status_code=500, detail=f"Failed to trigger reindexing: {str(e)}" + ) + + +class BulkCleanupRequest(BaseModel): + folder_id: int + days_older_than: int | None = None + + +@router.post("/user/file/bulk-cleanup") +def bulk_cleanup_files( + request: BulkCleanupRequest, + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> MessageResponse: + """Bulk delete files older than specified days in a folder""" + user_id = user.id if user else None + + logger.info( + f"Bulk cleanup request: folder_id={request.folder_id}, days_older_than={request.days_older_than}" + ) + + # Check if folder exists + if request.folder_id != RECENT_DOCS_FOLDER_ID: + folder = ( + db_session.query(UserFolder) + .filter(UserFolder.id == request.folder_id, UserFolder.user_id == user_id) + .first() + ) + if not folder: + raise HTTPException(status_code=404, detail="Folder not found") + + filter_criteria = [UserFile.user_id == user_id] + + # Filter by folder + if request.folder_id != -2: # -2 means all folders + filter_criteria.append(UserFile.folder_id == request.folder_id) + + # Filter by date if days_older_than is provided + if request.days_older_than is not None: + cutoff_date = datetime.utcnow() - timedelta(days=request.days_older_than) + logger.info(f"Filtering files older than {cutoff_date} (UTC)") + filter_criteria.append(UserFile.created_at < cutoff_date) + + # Get all files matching the criteria + files_to_delete = db_session.query(UserFile).filter(*filter_criteria).all() + + logger.info(f"Found {len(files_to_delete)} files to delete") + + # Delete files + delete_count = 0 + for file in files_to_delete: + logger.debug( + f"Deleting file: id={file.id}, name={file.name}, created_at={file.created_at}" + ) + db_session.delete(file) + delete_count += 1 + + db_session.commit() + + return MessageResponse(message=f"Successfully deleted {delete_count} files") diff --git a/backend/onyx/server/user_documents/models.py b/backend/onyx/server/user_documents/models.py new file mode 100644 index 0000000000..4a43c2e53e --- /dev/null +++ b/backend/onyx/server/user_documents/models.py @@ -0,0 +1,101 @@ +from datetime import datetime +from enum import Enum as PyEnum +from typing import List +from uuid import UUID + +from pydantic import BaseModel + +from onyx.db.enums import ConnectorCredentialPairStatus +from onyx.db.models import UserFile +from onyx.db.models import UserFolder + + +class UserFileStatus(str, PyEnum): + FAILED = "FAILED" + INDEXING = "INDEXING" + INDEXED = "INDEXED" + REINDEXING = "REINDEXING" + + +class UserFileSnapshot(BaseModel): + id: int + name: str + document_id: str + folder_id: int | None = None + user_id: UUID | None + file_id: str + created_at: datetime + assistant_ids: List[int] = [] # List of assistant IDs + token_count: int | None + indexed: bool + link_url: str | None + status: UserFileStatus + + @classmethod + def from_model(cls, model: UserFile) -> "UserFileSnapshot": + return cls( + id=model.id, + name=model.name[:-4] + if model.link_url and model.name.endswith(".txt") + else model.name, + folder_id=model.folder_id, + document_id=model.document_id, + user_id=model.user_id, + file_id=model.file_id, + created_at=model.created_at, + assistant_ids=[assistant.id for assistant in model.assistants], + token_count=model.token_count, + status=( + UserFileStatus.FAILED + if model.cc_pair + and len(model.cc_pair.index_attempts) > 0 + and model.cc_pair.last_successful_index_time is None + and model.cc_pair.status == ConnectorCredentialPairStatus.PAUSED + else UserFileStatus.INDEXED + if model.cc_pair + and model.cc_pair.last_successful_index_time is not None + else UserFileStatus.REINDEXING + if model.cc_pair + and len(model.cc_pair.index_attempts) > 1 + and model.cc_pair.last_successful_index_time is None + and model.cc_pair.status != ConnectorCredentialPairStatus.PAUSED + else UserFileStatus.INDEXING + ), + indexed=model.cc_pair.last_successful_index_time is not None + if model.cc_pair + else False, + link_url=model.link_url, + ) + + +class UserFolderSnapshot(BaseModel): + id: int + name: str + description: str + files: List[UserFileSnapshot] + created_at: datetime + user_id: UUID | None + assistant_ids: List[int] = [] # List of assistant IDs + token_count: int | None + + @classmethod + def from_model(cls, model: UserFolder) -> "UserFolderSnapshot": + return cls( + id=model.id, + name=model.name, + description=model.description, + files=[UserFileSnapshot.from_model(file) for file in model.files], + created_at=model.created_at, + user_id=model.user_id, + assistant_ids=[assistant.id for assistant in model.assistants], + token_count=sum(file.token_count or 0 for file in model.files) or None, + ) + + +class MessageResponse(BaseModel): + message: str + + +class FileSystemResponse(BaseModel): + folders: list[UserFolderSnapshot] + files: list[UserFileSnapshot] diff --git a/backend/onyx/tools/force.py b/backend/onyx/tools/force.py index 25747747e3..322558ed3d 100644 --- a/backend/onyx/tools/force.py +++ b/backend/onyx/tools/force.py @@ -12,6 +12,7 @@ class ForceUseTool(BaseModel): force_use: bool tool_name: str args: dict[str, Any] | None = None + override_kwargs: Any = None # This will hold tool-specific override kwargs def build_openai_tool_choice_dict(self) -> dict[str, Any]: """Build dict in the format that OpenAI expects which tells them to use this tool.""" diff --git a/backend/onyx/tools/models.py b/backend/onyx/tools/models.py index c26e0b9426..414a7f406b 100644 --- a/backend/onyx/tools/models.py +++ b/backend/onyx/tools/models.py @@ -70,6 +70,11 @@ class SearchToolOverrideKwargs(BaseModel): precomputed_query_embedding: Embedding | None = None precomputed_is_keyword: bool | None = None precomputed_keywords: list[str] | None = None + user_file_ids: list[int] | None = None + user_folder_ids: list[int] | None = None + ordering_only: bool | None = ( + None # Flag for fast path when search is only needed for ordering + ) class Config: arbitrary_types_allowed = True diff --git a/backend/onyx/tools/tool_constructor.py b/backend/onyx/tools/tool_constructor.py index 4650dd3b0e..66e1fe5961 100644 --- a/backend/onyx/tools/tool_constructor.py +++ b/backend/onyx/tools/tool_constructor.py @@ -138,10 +138,12 @@ def construct_tools( user: User | None, llm: LLM, fast_llm: LLM, + use_file_search: bool, search_tool_config: SearchToolConfig | None = None, internet_search_tool_config: InternetSearchToolConfig | None = None, image_generation_tool_config: ImageGenerationToolConfig | None = None, custom_tool_config: CustomToolConfig | None = None, + user_knowledge_present: bool = False, ) -> dict[int, list[Tool]]: """Constructs tools based on persona configuration and available APIs""" tool_dict: dict[int, list[Tool]] = {} @@ -158,7 +160,7 @@ def construct_tools( ) # Handle Search Tool - if tool_cls.__name__ == SearchTool.__name__: + if tool_cls.__name__ == SearchTool.__name__ and not user_knowledge_present: if not search_tool_config: search_tool_config = SearchToolConfig() @@ -251,6 +253,33 @@ def construct_tools( for tool_list in tool_dict.values(): tools.extend(tool_list) + if use_file_search: + search_tool_config = SearchToolConfig() + + search_tool = SearchTool( + db_session=db_session, + user=user, + persona=persona, + retrieval_options=search_tool_config.retrieval_options, + prompt_config=prompt_config, + llm=llm, + fast_llm=fast_llm, + pruning_config=search_tool_config.document_pruning_config, + answer_style_config=search_tool_config.answer_style_config, + selected_sections=search_tool_config.selected_sections, + chunks_above=search_tool_config.chunks_above, + chunks_below=search_tool_config.chunks_below, + full_doc=search_tool_config.full_doc, + evaluation_type=( + LLMEvaluationType.BASIC + if persona.llm_relevance_filter + else LLMEvaluationType.SKIP + ), + rerank_settings=search_tool_config.rerank_settings, + bypass_acl=search_tool_config.bypass_acl, + ) + tool_dict[1] = [search_tool] + # factor in tool definition size when pruning if search_tool_config: search_tool_config.document_pruning_config.tool_num_tokens = ( diff --git a/backend/onyx/tools/tool_implementations/custom/custom_tool.py b/backend/onyx/tools/tool_implementations/custom/custom_tool.py index d38e0accbd..cf11d137f7 100644 --- a/backend/onyx/tools/tool_implementations/custom/custom_tool.py +++ b/backend/onyx/tools/tool_implementations/custom/custom_tool.py @@ -64,7 +64,7 @@ logger = setup_logger() CUSTOM_TOOL_RESPONSE_ID = "custom_tool_response" -class CustomToolFileResponse(BaseModel): +class CustomToolUserFileSnapshot(BaseModel): file_ids: List[str] # References to saved images or CSVs @@ -131,7 +131,7 @@ class CustomTool(BaseTool): response = cast(CustomToolCallSummary, args[0].response) if response.response_type == "image" or response.response_type == "csv": - image_response = cast(CustomToolFileResponse, response.tool_result) + image_response = cast(CustomToolUserFileSnapshot, response.tool_result) return json.dumps({"file_ids": image_response.file_ids}) # For JSON or other responses, return as-is @@ -267,14 +267,14 @@ class CustomTool(BaseTool): file_ids = self._save_and_get_file_references( response.content, content_type ) - tool_result = CustomToolFileResponse(file_ids=file_ids) + tool_result = CustomToolUserFileSnapshot(file_ids=file_ids) response_type = "csv" elif "image/" in content_type: file_ids = self._save_and_get_file_references( response.content, content_type ) - tool_result = CustomToolFileResponse(file_ids=file_ids) + tool_result = CustomToolUserFileSnapshot(file_ids=file_ids) response_type = "image" else: @@ -358,7 +358,7 @@ class CustomTool(BaseTool): def final_result(self, *args: ToolResponse) -> JSON_ro: response = cast(CustomToolCallSummary, args[0].response) - if isinstance(response.tool_result, CustomToolFileResponse): + if isinstance(response.tool_result, CustomToolUserFileSnapshot): return response.tool_result.model_dump() return response.tool_result diff --git a/backend/onyx/tools/tool_implementations/search/search_tool.py b/backend/onyx/tools/tool_implementations/search/search_tool.py index 00a4907d68..379b2dd2a9 100644 --- a/backend/onyx/tools/tool_implementations/search/search_tool.py +++ b/backend/onyx/tools/tool_implementations/search/search_tool.py @@ -1,4 +1,5 @@ import json +import time from collections.abc import Callable from collections.abc import Generator from typing import Any @@ -23,6 +24,8 @@ from onyx.configs.chat_configs import CONTEXT_CHUNKS_BELOW from onyx.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS from onyx.context.search.enums import LLMEvaluationType from onyx.context.search.enums import QueryFlow +from onyx.context.search.enums import SearchType +from onyx.context.search.models import BaseFilters from onyx.context.search.models import IndexFilters from onyx.context.search.models import InferenceSection from onyx.context.search.models import RerankingDetails @@ -286,6 +289,9 @@ class SearchTool(Tool[SearchToolOverrideKwargs]): alternate_db_session = None retrieved_sections_callback = None skip_query_analysis = False + user_file_ids = None + user_folder_ids = None + ordering_only = False if override_kwargs: force_no_rerank = use_alt_not_None(override_kwargs.force_no_rerank, False) alternate_db_session = override_kwargs.alternate_db_session @@ -293,13 +299,41 @@ class SearchTool(Tool[SearchToolOverrideKwargs]): skip_query_analysis = use_alt_not_None( override_kwargs.skip_query_analysis, False ) - precomputed_query_embedding = override_kwargs.precomputed_query_embedding - precomputed_is_keyword = override_kwargs.precomputed_is_keyword - precomputed_keywords = override_kwargs.precomputed_keywords + user_file_ids = override_kwargs.user_file_ids + user_folder_ids = override_kwargs.user_folder_ids + ordering_only = use_alt_not_None(override_kwargs.ordering_only, False) + + # Fast path for ordering-only search + if ordering_only: + yield from self._run_ordering_only_search( + query, user_file_ids, user_folder_ids + ) + return + if self.selected_sections: yield from self._build_response_for_specified_sections(query) return + # Create a copy of the retrieval options with user_file_ids if provided + retrieval_options = self.retrieval_options + if (user_file_ids or user_folder_ids) and retrieval_options: + # Create a copy to avoid modifying the original + filters = ( + retrieval_options.filters.model_copy() + if retrieval_options.filters + else BaseFilters() + ) + filters.user_file_ids = user_file_ids + retrieval_options = retrieval_options.model_copy( + update={"filters": filters} + ) + elif user_file_ids or user_folder_ids: + # Create new retrieval options with user_file_ids + filters = BaseFilters( + user_file_ids=user_file_ids, user_folder_ids=user_folder_ids + ) + retrieval_options = RetrievalDetails(filters=filters) + search_pipeline = SearchPipeline( search_request=SearchRequest( query=query, @@ -307,13 +341,11 @@ class SearchTool(Tool[SearchToolOverrideKwargs]): if force_no_rerank else self.evaluation_type, human_selected_filters=( - self.retrieval_options.filters if self.retrieval_options else None + retrieval_options.filters if retrieval_options else None ), persona=self.persona, - offset=( - self.retrieval_options.offset if self.retrieval_options else None - ), - limit=self.retrieval_options.limit if self.retrieval_options else None, + offset=(retrieval_options.offset if retrieval_options else None), + limit=retrieval_options.limit if retrieval_options else None, rerank_settings=RerankingDetails( rerank_model_name=None, rerank_api_url=None, @@ -328,8 +360,8 @@ class SearchTool(Tool[SearchToolOverrideKwargs]): chunks_below=self.chunks_below, full_doc=self.full_doc, enable_auto_detect_filters=( - self.retrieval_options.enable_auto_detect_filters - if self.retrieval_options + retrieval_options.enable_auto_detect_filters + if retrieval_options else None ), precomputed_query_embedding=precomputed_query_embedding, @@ -387,6 +419,104 @@ class SearchTool(Tool[SearchToolOverrideKwargs]): prompt_config=self.prompt_config, ) + def _run_ordering_only_search( + self, + query: str, + user_file_ids: list[int] | None, + user_folder_ids: list[int] | None, + ) -> Generator[ToolResponse, None, None]: + """Optimized search that only retrieves document order with minimal processing.""" + start_time = time.time() + + logger.info("Fast path: Starting optimized ordering-only search") + + # Create temporary search pipeline for optimized retrieval + search_pipeline = SearchPipeline( + search_request=SearchRequest( + query=query, + evaluation_type=LLMEvaluationType.SKIP, # Force skip evaluation + persona=self.persona, + # Minimal configuration needed + chunks_above=0, + chunks_below=0, + ), + user=self.user, + llm=self.llm, + fast_llm=self.fast_llm, + skip_query_analysis=True, # Skip unnecessary analysis + db_session=self.db_session, + bypass_acl=self.bypass_acl, + prompt_config=self.prompt_config, + ) + + # Log what we're doing + logger.info( + f"Fast path: Using {len(user_file_ids or [])} files and {len(user_folder_ids or [])} folders" + ) + + # Get chunks using the optimized method in SearchPipeline + retrieval_start = time.time() + retrieved_chunks = search_pipeline.get_ordering_only_chunks( + query=query, user_file_ids=user_file_ids, user_folder_ids=user_folder_ids + ) + retrieval_time = time.time() - retrieval_start + + logger.info( + f"Fast path: Retrieved {len(retrieved_chunks)} chunks in {retrieval_time:.2f}s" + ) + + # Convert chunks to minimal sections (we don't need full content) + minimal_sections = [] + for chunk in retrieved_chunks: + # Create a minimal section with just center_chunk + minimal_section = InferenceSection( + center_chunk=chunk, + chunks=[chunk], + combined_content=chunk.content, # Use the chunk content as combined content + ) + minimal_sections.append(minimal_section) + + # Log document IDs found for debugging + doc_ids = [chunk.document_id for chunk in retrieved_chunks] + logger.info( + f"Fast path: Document IDs in order: {doc_ids[:5]}{'...' if len(doc_ids) > 5 else ''}" + ) + + # Yield just the required responses for document ordering + yield ToolResponse( + id=SEARCH_RESPONSE_SUMMARY_ID, + response=SearchResponseSummary( + rephrased_query=query, + top_sections=minimal_sections, + predicted_flow=QueryFlow.QUESTION_ANSWER, + predicted_search=SearchType.SEMANTIC, + final_filters=IndexFilters( + user_file_ids=user_file_ids or [], + user_folder_ids=user_folder_ids or [], + access_control_list=None, + ), + recency_bias_multiplier=1.0, + ), + ) + + # For fast path, don't trigger any LLM evaluation for relevance + logger.info( + "Fast path: Skipping section relevance evaluation to optimize performance" + ) + yield ToolResponse( + id=SECTION_RELEVANCE_LIST_ID, + response=None, + ) + + # We need to yield this for the caller to extract document order + minimal_docs = [ + llm_doc_from_inference_section(section) for section in minimal_sections + ] + yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS_ID, response=minimal_docs) + + total_time = time.time() - start_time + logger.info(f"Fast path: Completed ordering-only search in {total_time:.2f}s") + # Allows yielding the same responses as a SearchTool without being a SearchTool. # SearchTool passed in to allow for access to SearchTool properties. @@ -405,6 +535,10 @@ def yield_search_responses( get_section_relevance: Callable[[], list[SectionRelevancePiece] | None], search_tool: SearchTool, ) -> Generator[ToolResponse, None, None]: + # Get the search query to check if we're in ordering-only mode + # We can infer this from the reranked_sections not containing any relevance scoring + is_ordering_only = search_tool.evaluation_type == LLMEvaluationType.SKIP + yield ToolResponse( id=SEARCH_RESPONSE_SUMMARY_ID, response=SearchResponseSummary( @@ -417,25 +551,48 @@ def yield_search_responses( ), ) - section_relevance = get_section_relevance() - yield ToolResponse( - id=SECTION_RELEVANCE_LIST_ID, - response=section_relevance, - ) + section_relevance: list[SectionRelevancePiece] | None = None + + # Skip section relevance in ordering-only mode + if is_ordering_only: + logger.info( + "Fast path: Skipping section relevance evaluation in yield_search_responses" + ) + yield ToolResponse( + id=SECTION_RELEVANCE_LIST_ID, + response=None, + ) + else: + section_relevance = get_section_relevance() + yield ToolResponse( + id=SECTION_RELEVANCE_LIST_ID, + response=section_relevance, + ) final_context_sections = get_final_context_sections() - pruned_sections = prune_sections( - sections=final_context_sections, - section_relevance_list=section_relevance_list_impl( - section_relevance, final_context_sections - ), - prompt_config=search_tool.prompt_config, - llm_config=search_tool.llm.config, - question=query, - contextual_pruning_config=search_tool.contextual_pruning_config, - ) - llm_docs = [llm_doc_from_inference_section(section) for section in pruned_sections] + # Skip pruning sections in ordering-only mode + if is_ordering_only: + logger.info("Fast path: Skipping section pruning in ordering-only mode") + llm_docs = [ + llm_doc_from_inference_section(section) + for section in final_context_sections + ] + else: + # Use the section_relevance we already computed above + pruned_sections = prune_sections( + sections=final_context_sections, + section_relevance_list=section_relevance_list_impl( + section_relevance, final_context_sections + ), + prompt_config=search_tool.prompt_config, + llm_config=search_tool.llm.config, + question=query, + contextual_pruning_config=search_tool.contextual_pruning_config, + ) + llm_docs = [ + llm_doc_from_inference_section(section) for section in pruned_sections + ] yield ToolResponse(id=FINAL_CONTEXT_DOCUMENTS_ID, response=llm_docs) diff --git a/backend/scripts/debugging/onyx_vespa.py b/backend/scripts/debugging/onyx_vespa.py index f1c7eab4a8..7948251f52 100644 --- a/backend/scripts/debugging/onyx_vespa.py +++ b/backend/scripts/debugging/onyx_vespa.py @@ -5,17 +5,19 @@ Usage: python vespa_debug_tool.py --action [options] Actions: - config : Print Vespa configuration - connect : Check Vespa connectivity - list_docs : List documents - search : Search documents - update : Update a document - delete : Delete a document - get_acls : Get document ACLs + config : Print Vespa configuration + connect : Check Vespa connectivity + list_docs : List documents + list_connector : List documents for a specific connector-credential pair + search : Search documents + update : Update a document + delete : Delete a document + get_acls : Get document ACLs Options: --tenant-id : Tenant ID --connector-id : Connector ID + --cc-pair-id : Connector-Credential Pair ID --n : Number of documents (default 10) --query : Search query --doc-id : Document ID @@ -23,6 +25,7 @@ Options: Example: python vespa_debug_tool.py --action list_docs --tenant-id my_tenant --connector-id 1 --n 5 + python vespa_debug_tool.py --action list_connector --tenant-id my_tenant --cc-pair-id 1 --n 5 """ import argparse import json @@ -59,7 +62,6 @@ from onyx.document_index.vespa_constants import HIDDEN from onyx.document_index.vespa_constants import METADATA_LIST from onyx.document_index.vespa_constants import SEARCH_ENDPOINT from onyx.document_index.vespa_constants import SOURCE_TYPE -from onyx.document_index.vespa_constants import TENANT_ID from onyx.document_index.vespa_constants import VESPA_APP_CONTAINER_URL from onyx.document_index.vespa_constants import VESPA_APPLICATION_ENDPOINT from onyx.utils.logger import setup_logger @@ -108,8 +110,8 @@ def build_vespa_filters( if not include_hidden: filter_str += f"AND !({HIDDEN}=true) " - if filters.tenant_id and MULTI_TENANT: - filter_str += f'AND ({TENANT_ID} contains "{filters.tenant_id}") ' + # if filters.tenant_id and MULTI_TENANT: + # filter_str += f'AND ({TENANT_ID} contains "{filters.tenant_id}") ' if filters.access_control_list is not None: acl_str = _build_or_filters(ACCESS_CONTROL_LIST, filters.access_control_list) @@ -269,8 +271,8 @@ def search_for_document( if document_id is not None: conditions.append(f'document_id contains "{document_id}"') - if tenant_id is not None: - conditions.append(f'tenant_id contains "{tenant_id}"') + # if tenant_id is not None: + # conditions.append(f'tenant_id contains "{tenant_id}"') if conditions: yql_query += " where " + " and ".join(conditions) @@ -336,8 +338,8 @@ def list_documents(n: int = 10, tenant_id: Optional[str] = None) -> None: # List documents from any source, filtered by tenant if provided. logger.info(f"Listing up to {n} documents for tenant={tenant_id or 'ALL'}") yql = "select * from sources * where true" - if tenant_id: - yql += f" and tenant_id contains '{tenant_id}'" + # if tenant_id: + # yql += f" and tenant_id contains '{tenant_id}'" documents = query_vespa(yql, tenant_id=tenant_id, limit=n) print(f"Total documents found: {len(documents)}") logger.info(f"Total documents found: {len(documents)}") @@ -444,12 +446,15 @@ def get_document_acls( response = vespa_client.get(document_url) if response.status_code == 200: fields = response.json().get("fields", {}) + document_id = fields.get("document_id") or fields.get( "documentid", "Unknown" ) acls = fields.get("access_control_list", {}) title = fields.get("title", "") source_type = fields.get("source_type", "") + doc_sets = fields.get("document_sets", []) + user_file = fields.get("user_file", None) source_links_raw = fields.get("source_links", "{}") try: source_links = json.loads(source_links_raw) @@ -462,6 +467,8 @@ def get_document_acls( print(f"Source Links: {source_links}") print(f"Title: {title}") print(f"Source Type: {source_type}") + print(f"Document Sets: {doc_sets}") + print(f"User File: {user_file}") if MULTI_TENANT: print(f"Tenant ID: {fields.get('tenant_id', 'N/A')}") print("-" * 80) @@ -576,6 +583,90 @@ class VespaDebugging: # List documents for a tenant. list_documents(n, self.tenant_id) + def list_connector(self, cc_pair_id: int, n: int = 10) -> None: + # List documents for a specific connector-credential pair in the tenant + logger.info( + f"Listing documents for tenant={self.tenant_id}, cc_pair_id={cc_pair_id}" + ) + + # Get document IDs for this connector-credential pair + with get_session_with_tenant(tenant_id=self.tenant_id) as session: + # First get the connector_id from the cc_pair_id + cc_pair = ( + session.query(ConnectorCredentialPair) + .filter(ConnectorCredentialPair.id == cc_pair_id) + .first() + ) + + if not cc_pair: + print(f"No connector-credential pair found with ID {cc_pair_id}") + return + + connector_id = cc_pair.connector_id + + # Now get document IDs for this connector + doc_ids_data = ( + session.query(DocumentByConnectorCredentialPair.id) + .filter(DocumentByConnectorCredentialPair.connector_id == connector_id) + .distinct() + .all() + ) + + doc_ids = [doc_id[0] for doc_id in doc_ids_data] + + if not doc_ids: + print(f"No documents found for connector-credential pair ID {cc_pair_id}") + return + + print( + f"Found {len(doc_ids)} documents for connector-credential pair ID {cc_pair_id}" + ) + + # Limit to the first n document IDs + target_doc_ids = doc_ids[:n] + print(f"Retrieving details for first {len(target_doc_ids)} documents") + # Search for each document in Vespa + for doc_id in target_doc_ids: + docs = search_for_document(self.index_name, doc_id, self.tenant_id) + if not docs: + print(f"No chunks found in Vespa for document ID: {doc_id}") + continue + + print(f"Document ID: {doc_id}") + print(f"Found {len(docs)} chunks in Vespa") + + # Print each chunk with all fields except embeddings + for i, doc in enumerate(docs): + print(f" Chunk {i+1}:") + fields = doc.get("fields", {}) + + # Print all fields except embeddings + for field_name, field_value in sorted(fields.items()): + # Skip embedding fields + if "embedding" in field_name: + continue + + # Format the output based on field type + if isinstance(field_value, dict) or isinstance(field_value, list): + # Truncate dictionaries and lists + truncated = ( + str(field_value)[:50] + "..." + if len(str(field_value)) > 50 + else str(field_value) + ) + print(f" {field_name}: {truncated}") + else: + # Truncate strings and other values + str_value = str(field_value) + truncated = ( + str_value[:50] + "..." if len(str_value) > 50 else str_value + ) + print(f" {field_name}: {truncated}") + + print("-" * 40) # Separator between chunks + + print("=" * 80) # Separator between documents + def compare_chunk_count(self, document_id: str) -> tuple[int, int]: docs = search_for_document(self.index_name, document_id, max_hits=None) number_of_chunks_we_think_exist = get_number_of_chunks_we_think_exist( @@ -770,6 +861,7 @@ def main() -> None: "config", "connect", "list_docs", + "list_connector", "search", "update", "delete", @@ -781,6 +873,7 @@ def main() -> None: ) parser.add_argument("--tenant-id", help="Tenant ID") parser.add_argument("--connector-id", type=int, help="Connector ID") + parser.add_argument("--cc-pair-id", type=int, help="Connector-Credential Pair ID") parser.add_argument( "--n", type=int, default=10, help="Number of documents to retrieve" ) @@ -809,6 +902,10 @@ def main() -> None: vespa_debug.check_connectivity() elif args.action == "list_docs": vespa_debug.list_documents(args.n) + elif args.action == "list_connector": + if args.cc_pair_id is None: + parser.error("--cc-pair-id is required for list_connector action") + vespa_debug.list_connector(args.cc_pair_id, args.n) elif args.action == "search": if not args.query or args.connector_id is None: parser.error("--query and --connector-id are required for search action") @@ -825,9 +922,9 @@ def main() -> None: parser.error("--doc-id and --connector-id are required for delete action") vespa_debug.delete_document(args.connector_id, args.doc_id) elif args.action == "get_acls": - if args.connector_id is None: - parser.error("--connector-id is required for get_acls action") - vespa_debug.acls(args.connector_id, args.n) + if args.cc_pair_id is None: + parser.error("--cc-pair-id is required for get_acls action") + vespa_debug.acls(args.cc_pair_id, args.n) if __name__ == "__main__": diff --git a/backend/scripts/dev_run_background_jobs.py b/backend/scripts/dev_run_background_jobs.py index ef638aebae..15b298a5d5 100644 --- a/backend/scripts/dev_run_background_jobs.py +++ b/backend/scripts/dev_run_background_jobs.py @@ -72,6 +72,19 @@ def run_jobs() -> None: "--queues=connector_indexing", ] + cmd_worker_user_files_indexing = [ + "celery", + "-A", + "onyx.background.celery.versioned_apps.indexing", + "worker", + "--pool=threads", + "--concurrency=1", + "--prefetch-multiplier=1", + "--loglevel=INFO", + "--hostname=user_files_indexing@%n", + "--queues=user_files_indexing", + ] + cmd_worker_monitoring = [ "celery", "-A", @@ -110,6 +123,13 @@ def run_jobs() -> None: cmd_worker_indexing, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True ) + worker_user_files_indexing_process = subprocess.Popen( + cmd_worker_user_files_indexing, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + worker_monitoring_process = subprocess.Popen( cmd_worker_monitoring, stdout=subprocess.PIPE, @@ -134,6 +154,10 @@ def run_jobs() -> None: worker_indexing_thread = threading.Thread( target=monitor_process, args=("INDEX", worker_indexing_process) ) + worker_user_files_indexing_thread = threading.Thread( + target=monitor_process, + args=("USER_FILES_INDEX", worker_user_files_indexing_process), + ) worker_monitoring_thread = threading.Thread( target=monitor_process, args=("MONITORING", worker_monitoring_process) ) @@ -143,6 +167,7 @@ def run_jobs() -> None: worker_light_thread.start() worker_heavy_thread.start() worker_indexing_thread.start() + worker_user_files_indexing_thread.start() worker_monitoring_thread.start() beat_thread.start() @@ -150,6 +175,7 @@ def run_jobs() -> None: worker_light_thread.join() worker_heavy_thread.join() worker_indexing_thread.join() + worker_user_files_indexing_thread.join() worker_monitoring_thread.join() beat_thread.join() diff --git a/backend/scripts/query_time_check/seed_dummy_docs.py b/backend/scripts/query_time_check/seed_dummy_docs.py index 29c668f8d6..e40d20092a 100644 --- a/backend/scripts/query_time_check/seed_dummy_docs.py +++ b/backend/scripts/query_time_check/seed_dummy_docs.py @@ -93,6 +93,8 @@ def generate_dummy_chunk( return DocMetadataAwareIndexChunk.from_index_chunk( index_chunk=chunk, + user_file=None, + user_folder=None, access=DocumentAccess.build( user_emails=user_emails, user_groups=user_groups, diff --git a/backend/supervisord.conf b/backend/supervisord.conf index c8f0e4f445..b78d7b49f0 100644 --- a/backend/supervisord.conf +++ b/backend/supervisord.conf @@ -65,6 +65,18 @@ autorestart=true startsecs=10 stopasgroup=true +[program:celery_worker_user_files_indexing] +command=celery -A onyx.background.celery.versioned_apps.indexing worker + --loglevel=INFO + --hostname=user_files_indexing@%%n + -Q user_files_indexing +stdout_logfile=/var/log/celery_worker_user_files_indexing.log +stdout_logfile_maxbytes=16MB +redirect_stderr=true +autorestart=true +startsecs=10 +stopasgroup=true + [program:celery_worker_monitoring] command=celery -A onyx.background.celery.versioned_apps.monitoring worker --loglevel=INFO @@ -108,6 +120,7 @@ command=tail -qF /var/log/celery_worker_light.log /var/log/celery_worker_heavy.log /var/log/celery_worker_indexing.log + /var/log/celery_worker_user_files_indexing.log /var/log/celery_worker_monitoring.log /var/log/slack_bot.log stdout_logfile=/dev/stdout diff --git a/backend/tests/integration/common_utils/constants.py b/backend/tests/integration/common_utils/constants.py index 2a5f338b36..5054e4963d 100644 --- a/backend/tests/integration/common_utils/constants.py +++ b/backend/tests/integration/common_utils/constants.py @@ -3,7 +3,7 @@ import os ADMIN_USER_NAME = "admin_user" API_SERVER_PROTOCOL = os.getenv("API_SERVER_PROTOCOL") or "http" -API_SERVER_HOST = os.getenv("API_SERVER_HOST") or "localhost" +API_SERVER_HOST = os.getenv("API_SERVER_HOST") or "127.0.0.1" API_SERVER_PORT = os.getenv("API_SERVER_PORT") or "8080" API_SERVER_URL = f"{API_SERVER_PROTOCOL}://{API_SERVER_HOST}:{API_SERVER_PORT}" MAX_DELAY = 60 diff --git a/backend/tests/integration/common_utils/managers/document.py b/backend/tests/integration/common_utils/managers/document.py index 29c4bfd221..9f4c46fd7e 100644 --- a/backend/tests/integration/common_utils/managers/document.py +++ b/backend/tests/integration/common_utils/managers/document.py @@ -166,18 +166,24 @@ class DocumentManager: } # Left this here for debugging purposes. - # import json - # for doc in retrieved_docs.values(): - # printable_doc = doc.copy() - # print(printable_doc.keys()) - # printable_doc.pop("embeddings") - # printable_doc.pop("title_embedding") - # print(json.dumps(printable_doc, indent=2)) + import json + + print("DEBUGGING DOCUMENTS") + print(retrieved_docs) + for doc in retrieved_docs.values(): + printable_doc = doc.copy() + print(printable_doc.keys()) + printable_doc.pop("embeddings") + printable_doc.pop("title_embedding") + print(json.dumps(printable_doc, indent=2)) for document in cc_pair.documents: retrieved_doc = retrieved_docs.get(document.id) if not retrieved_doc: if not verify_deleted: + print(f"Document not found: {document.id}") + print(retrieved_docs.keys()) + print(retrieved_docs.values()) raise ValueError(f"Document not found: {document.id}") continue if verify_deleted: diff --git a/backend/tests/integration/common_utils/managers/document_set.py b/backend/tests/integration/common_utils/managers/document_set.py index 7670f42fa3..774cbc9c9e 100644 --- a/backend/tests/integration/common_utils/managers/document_set.py +++ b/backend/tests/integration/common_utils/managers/document_set.py @@ -139,11 +139,23 @@ class DocumentSetManager: break if time.time() - start > MAX_DELAY: + not_synced_doc_sets = [ + doc_set for doc_set in doc_sets if not doc_set.is_up_to_date + ] raise TimeoutError( - f"Document sets were not synced within the {MAX_DELAY} seconds" + f"Document sets were not synced within the {MAX_DELAY} seconds. " + f"Remaining unsynced document sets: {len(not_synced_doc_sets)}. " + f"IDs: {[doc_set.id for doc_set in not_synced_doc_sets]}" ) else: - print("Document sets were not synced yet, waiting...") + not_synced_doc_sets = [ + doc_set for doc_set in doc_sets if not doc_set.is_up_to_date + ] + print( + f"Document sets were not synced yet, waiting... " + f"{len(not_synced_doc_sets)}/{len(doc_sets)} document sets still syncing. " + f"IDs: {[doc_set.id for doc_set in not_synced_doc_sets]}" + ) time.sleep(2) diff --git a/web/Dockerfile b/web/Dockerfile index 557ec3957d..26e50f2068 100644 --- a/web/Dockerfile +++ b/web/Dockerfile @@ -176,4 +176,3 @@ ENV NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY=${NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY} # expose via cli CMD ["node", "server.js"] - diff --git a/web/next.config.js b/web/next.config.js index 519c313e25..a16d396434 100644 --- a/web/next.config.js +++ b/web/next.config.js @@ -37,6 +37,7 @@ const nextConfig = { pathname: "/s2/favicons/**", }, ], + unoptimized: true, // Disable image optimization to avoid requiring Sharp }, async headers() { return [ diff --git a/web/public/Amazon.png b/web/public/Amazon.png new file mode 100644 index 0000000000..83a8cdeb5f Binary files /dev/null and b/web/public/Amazon.png differ diff --git a/web/public/Amazon.webp b/web/public/Amazon.webp deleted file mode 100644 index cd8574fa59..0000000000 Binary files a/web/public/Amazon.webp and /dev/null differ diff --git a/web/public/Google.png b/web/public/Google.png new file mode 100644 index 0000000000..3d6653a6f3 Binary files /dev/null and b/web/public/Google.png differ diff --git a/web/public/Google.webp b/web/public/Google.webp deleted file mode 100644 index 7b903159b0..0000000000 Binary files a/web/public/Google.webp and /dev/null differ diff --git a/web/public/Productboard.png b/web/public/Productboard.png new file mode 100644 index 0000000000..29931d2656 Binary files /dev/null and b/web/public/Productboard.png differ diff --git a/web/public/Productboard.webp b/web/public/Productboard.webp deleted file mode 100644 index 2f19fdd766..0000000000 Binary files a/web/public/Productboard.webp and /dev/null differ diff --git a/web/public/discord.png b/web/public/discord.png new file mode 100644 index 0000000000..f0a344a744 Binary files /dev/null and b/web/public/discord.png differ diff --git a/web/public/discord.webp b/web/public/discord.webp deleted file mode 100644 index 365ad8153e..0000000000 Binary files a/web/public/discord.webp and /dev/null differ diff --git a/web/src/app/admin/assistants/AssistantEditor.tsx b/web/src/app/admin/assistants/AssistantEditor.tsx index 2ca71df92f..c47573416e 100644 --- a/web/src/app/admin/assistants/AssistantEditor.tsx +++ b/web/src/app/admin/assistants/AssistantEditor.tsx @@ -64,10 +64,10 @@ import { debounce } from "lodash"; import { LLMProviderView } from "../configuration/llm/interfaces"; import StarterMessagesList from "./StarterMessageList"; -import { Switch, SwitchField } from "@/components/ui/switch"; +import { SwitchField } from "@/components/ui/switch"; import { generateIdenticon } from "@/components/assistants/AssistantIcon"; import { BackButton } from "@/components/BackButton"; -import { Checkbox, CheckboxField } from "@/components/ui/checkbox"; +import { Checkbox } from "@/components/ui/checkbox"; import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle"; import { MinimalUserSnapshot } from "@/lib/types"; import { useUserGroups } from "@/lib/hooks"; @@ -76,13 +76,31 @@ import { Option as DropdownOption, } from "@/components/Dropdown"; import { SourceChip } from "@/app/chat/input/ChatInputBar"; -import { TagIcon, UserIcon, XIcon, InfoIcon } from "lucide-react"; +import { + TagIcon, + UserIcon, + FileIcon, + FolderIcon, + InfoIcon, + BookIcon, +} from "lucide-react"; import { LLMSelector } from "@/components/llm/LLMSelector"; import useSWR from "swr"; import { errorHandlingFetcher } from "@/lib/fetcher"; import { ConfirmEntityModal } from "@/components/modals/ConfirmEntityModal"; -import Title from "@/components/ui/title"; + +import { FilePickerModal } from "@/app/chat/my-documents/components/FilePicker"; +import { useDocumentsContext } from "@/app/chat/my-documents/DocumentsContext"; +import { + FileResponse, + FolderResponse, +} from "@/app/chat/my-documents/DocumentsContext"; +import { RadioGroup } from "@/components/ui/radio-group"; +import { RadioGroupItemField } from "@/components/ui/RadioGroupItemField"; import { SEARCH_TOOL_ID } from "@/app/chat/tools/constants"; +import TextView from "@/components/chat/TextView"; +import { MinimalOnyxDocument } from "@/lib/search/interfaces"; +import { TabToggle } from "@/components/ui/TabToggle"; function findSearchTool(tools: ToolSnapshot[]) { return tools.find((tool) => tool.in_code_tool_id === SEARCH_TOOL_ID); @@ -147,6 +165,9 @@ export function AssistantEditor({ "#6FFFFF", ]; + const [presentingDocument, setPresentingDocument] = + useState(null); + const [filePickerModalOpen, setFilePickerModalOpen] = useState(false); const [showAdvancedOptions, setShowAdvancedOptions] = useState(false); // state to persist across formik reformatting @@ -221,6 +242,16 @@ export function AssistantEditor({ enabledToolsMap[tool.id] = personaCurrentToolIds.includes(tool.id); }); + const { + selectedFiles, + selectedFolders, + addSelectedFile, + removeSelectedFile, + addSelectedFolder, + removeSelectedFolder, + clearSelectedItems, + } = useDocumentsContext(); + const [showVisibilityWarning, setShowVisibilityWarning] = useState(false); const initialValues = { @@ -259,6 +290,9 @@ export function AssistantEditor({ (u) => u.id !== existingPersona.owner?.id ) ?? [], selectedGroups: existingPersona?.groups ?? [], + user_file_ids: existingPersona?.user_file_ids ?? [], + user_folder_ids: existingPersona?.user_folder_ids ?? [], + knowledge_source: "user_files", is_default_persona: existingPersona?.is_default_persona ?? false, }; @@ -352,6 +386,10 @@ export function AssistantEditor({ } } }; + const canShowKnowledgeSource = + ccPairs.length > 0 && + searchTool && + !(user?.role != "admin" && documentSets.length === 0); return (
@@ -368,7 +406,26 @@ export function AssistantEditor({
)} + {filePickerModalOpen && ( + { + setFilePickerModalOpen(false); + }} + onSave={() => { + setFilePickerModalOpen(false); + }} + buttonContent="Add to Assistant" + /> + )} + {presentingDocument && ( + setPresentingDocument(null)} + /> + )} {labelToDelete && ( file.id), + user_folder_ids: selectedFolders.map((folder) => folder.id), }; let personaResponse; + if (isUpdate) { personaResponse = await updatePersona( existingPersona.id, @@ -792,10 +853,7 @@ export function AssistantEditor({
-
+

Knowledge

@@ -834,92 +892,170 @@ export function AssistantEditor({
-

- Attach additional unique knowledge to this assistant -

)} - {ccPairs.length > 0 && - searchTool && - values.enabled_tools_map[searchTool.id] && - !(user?.role != "admin" && documentSets.length === 0) && ( - -
- {ccPairs.length > 0 && ( - <> - -
- - <> - Select which{" "} - {!user || user.role === "admin" ? ( - - Document Sets - - ) : ( - "Document Sets" - )}{" "} - this Assistant should use to inform its - responses. If none are specified, the - Assistant will reference all available - documents. - - + {searchTool && values.enabled_tools_map[searchTool.id] && ( +
+ {canShowKnowledgeSource && ( + <> +
+
+
+ setFieldValue( + "knowledge_source", + "user_files" + ) + } + > +
+ +
+

+ User Knowledge +

- {documentSets.length > 0 ? ( - ( -
-
- {documentSets.map((documentSet) => ( - { - const index = - values.document_set_ids.indexOf( - documentSet.id - ); - if (index !== -1) { - arrayHelpers.remove(index); - } else { - arrayHelpers.push( - documentSet.id - ); - } - }} - /> - ))} -
-
- )} - /> - ) : ( -

- - + Create Document Set - +

+ setFieldValue( + "knowledge_source", + "team_knowledge" + ) + } + > +
+ +
+

+ Team Knowledge

- )} - - )} -
- - )} +
+
+
+ + )} + + {values.knowledge_source === "user_files" && + !existingPersona?.is_default_persona && + !admin && ( +
+ + Click below to add documents or folders from the + My Document feature + + {(selectedFiles.length > 0 || + selectedFolders.length > 0) && ( +
+ {selectedFiles.map((file) => ( + {}} + title={file.name} + icon={} + /> + ))} + {selectedFolders.map((folder) => ( + {}} + title={folder.name} + icon={} + /> + ))} +
+ )} + +
+ )} + + {values.knowledge_source === "team_knowledge" && + ccPairs.length > 0 && ( +
+
+ + <> + Select which{" "} + {!user || user.role === "admin" ? ( + + Document Sets + + ) : ( + "Team Document Sets" + )}{" "} + this Assistant should use to inform its + responses. If none are specified, the + Assistant will reference all available + documents. + + +
+ + {documentSets.length > 0 ? ( + ( +
+
+ {documentSets.map((documentSet) => ( + { + const index = + values.document_set_ids.indexOf( + documentSet.id + ); + if (index !== -1) { + arrayHelpers.remove(index); + } else { + arrayHelpers.push(documentSet.id); + } + }} + /> + ))} +
+
+ )} + /> + ) : ( +

+ + + Create Document Set + +

+ )} +
+ )} +
+ )}
diff --git a/web/src/app/admin/assistants/assistantFileUtils.ts b/web/src/app/admin/assistants/assistantFileUtils.ts new file mode 100644 index 0000000000..92f332a79b --- /dev/null +++ b/web/src/app/admin/assistants/assistantFileUtils.ts @@ -0,0 +1,106 @@ +import { + FileResponse, + FolderResponse, +} from "@/app/chat/my-documents/DocumentsContext"; + +export interface AssistantFileChanges { + filesToShare: number[]; + filesToUnshare: number[]; + foldersToShare: number[]; + foldersToUnshare: number[]; +} + +export function calculateFileChanges( + existingFileIds: number[], + existingFolderIds: number[], + selectedFiles: FileResponse[], + selectedFolders: FolderResponse[] +): AssistantFileChanges { + const selectedFileIds = selectedFiles.map((file) => file.id); + const selectedFolderIds = selectedFolders.map((folder) => folder.id); + + return { + filesToShare: selectedFileIds.filter((id) => !existingFileIds.includes(id)), + filesToUnshare: existingFileIds.filter( + (id) => !selectedFileIds.includes(id) + ), + foldersToShare: selectedFolderIds.filter( + (id) => !existingFolderIds.includes(id) + ), + foldersToUnshare: existingFolderIds.filter( + (id) => !selectedFolderIds.includes(id) + ), + }; +} + +export async function shareFiles( + assistantId: number, + fileIds: number[] +): Promise { + for (const fileId of fileIds) { + await fetch(`/api/user/file/${fileId}/share`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ assistant_id: assistantId }), + }); + } +} + +export async function unshareFiles( + assistantId: number, + fileIds: number[] +): Promise { + for (const fileId of fileIds) { + await fetch(`/api/user/file/${fileId}/unshare`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ assistant_id: assistantId }), + }); + } +} + +export async function shareFolders( + assistantId: number, + folderIds: number[] +): Promise { + for (const folderId of folderIds) { + await fetch(`/api/user/folder/${folderId}/share`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ assistant_id: assistantId }), + }); + } +} + +export async function unshareFolders( + assistantId: number, + folderIds: number[] +): Promise { + for (const folderId of folderIds) { + await fetch(`/api/user/folder/${folderId}/unshare`, { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ assistant_id: assistantId }), + }); + } +} + +export async function updateAssistantFiles( + assistantId: number, + changes: AssistantFileChanges +): Promise { + await Promise.all([ + shareFiles(assistantId, changes.filesToShare), + unshareFiles(assistantId, changes.filesToUnshare), + shareFolders(assistantId, changes.foldersToShare), + unshareFolders(assistantId, changes.foldersToUnshare), + ]); +} diff --git a/web/src/app/admin/assistants/interfaces.ts b/web/src/app/admin/assistants/interfaces.ts index 25bcaf868b..4d74a35065 100644 --- a/web/src/app/admin/assistants/interfaces.ts +++ b/web/src/app/admin/assistants/interfaces.ts @@ -45,6 +45,8 @@ export interface Persona { icon_color?: string; uploaded_image_id?: string; labels?: PersonaLabel[]; + user_file_ids: number[]; + user_folder_ids: number[]; } export interface PersonaLabel { diff --git a/web/src/app/admin/assistants/lib.ts b/web/src/app/admin/assistants/lib.ts index 70dc8035b1..ae0e50b94e 100644 --- a/web/src/app/admin/assistants/lib.ts +++ b/web/src/app/admin/assistants/lib.ts @@ -29,6 +29,8 @@ interface PersonaUpsertRequest { is_default_persona: boolean; display_priority: number | null; label_ids: number[] | null; + user_file_ids: number[] | null; + user_folder_ids: number[] | null; } export interface PersonaUpsertParameters { @@ -56,6 +58,8 @@ export interface PersonaUpsertParameters { uploaded_image: File | null; is_default_persona: boolean; label_ids: number[] | null; + user_file_ids: number[]; + user_folder_ids: number[]; } export const createPersonaLabel = (name: string) => { @@ -114,7 +118,10 @@ function buildPersonaUpsertRequest( icon_shape, remove_image, search_start_date, + user_file_ids, + user_folder_ids, } = creationRequest; + return { name, description, @@ -145,6 +152,8 @@ function buildPersonaUpsertRequest( starter_messages: creationRequest.starter_messages ?? null, display_priority: null, label_ids: creationRequest.label_ids ?? null, + user_file_ids: user_file_ids ?? null, + user_folder_ids: user_folder_ids ?? null, }; } @@ -175,7 +184,6 @@ export async function createPersona( return null; } } - const createPersonaResponse = await fetch("/api/persona", { method: "POST", headers: { @@ -345,4 +353,6 @@ export const defaultPersona: Persona = { owner: null, icon_shape: 50910, icon_color: "#FF6F6F", + user_file_ids: [], + user_folder_ids: [], }; diff --git a/web/src/app/admin/configuration/llm/interfaces.ts b/web/src/app/admin/configuration/llm/interfaces.ts index 80971e0cc7..c4a0fcef03 100644 --- a/web/src/app/admin/configuration/llm/interfaces.ts +++ b/web/src/app/admin/configuration/llm/interfaces.ts @@ -36,6 +36,12 @@ export interface WellKnownLLMProviderDescriptor { groups: number[]; } +export interface LLMModelDescriptor { + modelName: string; + provider: string; + maxTokens: number; +} + export interface LLMProvider { name: string; provider: string; @@ -49,6 +55,7 @@ export interface LLMProvider { groups: number[]; display_model_names: string[] | null; deployment_name: string | null; + model_token_limits: { [key: string]: number } | null; default_vision_model: string | null; is_default_vision_provider: boolean | null; } @@ -74,6 +81,7 @@ export interface LLMProviderDescriptor { is_public: boolean; groups: number[]; display_model_names: string[] | null; + model_token_limits: { [key: string]: number } | null; } export const getProviderIcon = (providerName: string, modelName?: string) => { diff --git a/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx b/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx index ae2661e6a1..592c97f419 100644 --- a/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx +++ b/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx @@ -434,7 +434,7 @@ export default function AddConnector({ > {(formikProps) => { return ( -
+
{popup} {uploading && ( diff --git a/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx b/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx index e57be36a10..fe85f001a4 100644 --- a/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx +++ b/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx @@ -221,6 +221,7 @@ border border-border dark:border-neutral-700 {timeAgo(ccPairsIndexingStatus?.last_success) || "-"} + {getActivityBadge()} {isPaidEnterpriseFeaturesEnabled && ( @@ -251,12 +252,19 @@ border border-border dark:border-neutral-700 {isEditable && ( - - - + + + + + + +

Manage Connector

+
+
+
)}
diff --git a/web/src/app/assistants/SidebarWrapper.tsx b/web/src/app/assistants/SidebarWrapper.tsx index aaeccfeeb8..c16b8481e5 100644 --- a/web/src/app/assistants/SidebarWrapper.tsx +++ b/web/src/app/assistants/SidebarWrapper.tsx @@ -21,18 +21,20 @@ import { HistorySidebar } from "../chat/sessionSidebar/HistorySidebar"; import { useAssistants } from "@/components/context/AssistantsContext"; import AssistantModal from "./mine/AssistantModal"; import { useSidebarShortcut } from "@/lib/browserUtilities"; +import { UserSettingsModal } from "../chat/modal/UserSettingsModal"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { useUser } from "@/components/user/UserProvider"; interface SidebarWrapperProps { - initiallyToggled: boolean; size?: "sm" | "lg"; children: ReactNode; } export default function SidebarWrapper({ - initiallyToggled, size = "sm", children, }: SidebarWrapperProps) { + const { sidebarInitiallyVisible: initiallyToggled } = useChatContext(); const [sidebarVisible, setSidebarVisible] = useState(initiallyToggled); const [showDocSidebar, setShowDocSidebar] = useState(false); // State to track if sidebar is open // Used to maintain a "time out" for history sidebar so our existing refs can have time to process change @@ -61,6 +63,7 @@ export default function SidebarWrapper({ }, 200); }; + const { popup, setPopup } = usePopup(); const settings = useContext(SettingsContext); useSidebarVisibility({ sidebarVisible, @@ -70,13 +73,18 @@ export default function SidebarWrapper({ mobile: settings?.isMobile, }); + const { user } = useUser(); const [showAssistantsModal, setShowAssistantsModal] = useState(false); const router = useRouter(); + const [userSettingsToggled, setUserSettingsToggled] = useState(false); + const { llmProviders } = useChatContext(); useSidebarShortcut(router, toggleSidebar); return (
+ {popup} + {showAssistantsModal && ( setShowAssistantsModal(false)} /> )} @@ -114,9 +122,19 @@ export default function SidebarWrapper({ />
+ {userSettingsToggled && ( + setUserSettingsToggled(false)} + defaultModel={user?.preferences?.default_model!} + /> + )}
setUserSettingsToggled(true)} sidebarToggled={sidebarVisible} toggleSidebar={toggleSidebar} page="chat" @@ -135,13 +153,7 @@ export default function SidebarWrapper({ ${sidebarVisible ? "w-[250px]" : "w-[0px]"}`} /> -
- {children} -
+
{children}
diff --git a/web/src/app/chat/ChatPage.tsx b/web/src/app/chat/ChatPage.tsx index f899c0f34d..5ac868a835 100644 --- a/web/src/app/chat/ChatPage.tsx +++ b/web/src/app/chat/ChatPage.tsx @@ -24,6 +24,7 @@ import { constructSubQuestions, DocumentsResponse, AgenticMessageResponseIDInfo, + UserKnowledgeFilePacket, } from "./interfaces"; import Prism from "prismjs"; @@ -35,7 +36,6 @@ import { buildChatUrl, buildLatestMessageChain, createChatSession, - deleteAllChatSessions, getCitedDocumentsFromMessage, getHumanAndAIMessageFromMessageNumber, getLastSuccessfulMessageId, @@ -66,7 +66,6 @@ import { } from "react"; import { usePopup } from "@/components/admin/connectors/Popup"; import { SEARCH_PARAM_NAMES, shouldSubmitOnLoad } from "./searchParams"; -import { useDocumentSelection } from "./useDocumentSelection"; import { LlmDescriptor, useFilters, useLlmManager } from "@/lib/hooks"; import { ChatState, FeedbackType, RegenerationState } from "./types"; import { DocumentResults } from "./documentSidebar/DocumentResults"; @@ -87,6 +86,7 @@ import { SubQuestionPiece, AgentAnswerPiece, RefinedAnswerImprovement, + MinimalOnyxDocument, } from "@/lib/search/interfaces"; import { buildFilters } from "@/lib/search/utils"; import { SettingsContext } from "@/components/settings/SettingsProvider"; @@ -100,14 +100,13 @@ import { ChatInputBar } from "./input/ChatInputBar"; import { useChatContext } from "@/components/context/ChatContext"; import { v4 as uuidv4 } from "uuid"; import { ChatPopup } from "./ChatPopup"; - import FunctionalHeader from "@/components/chat/Header"; import { useSidebarVisibility } from "@/components/chat/hooks"; import { PRO_SEARCH_TOGGLED_COOKIE_NAME, SIDEBAR_TOGGLED_COOKIE_NAME, } from "@/components/resizable/constants"; -import FixedLogo from "../../components/logo/FixedLogo"; +import FixedLogo from "@/components/logo/FixedLogo"; import ExceptionTraceModal from "@/components/modals/ExceptionTraceModal"; @@ -134,6 +133,16 @@ import { UserSettingsModal } from "./modal/UserSettingsModal"; import { AgenticMessage } from "./message/AgenticMessage"; import AssistantModal from "../assistants/mine/AssistantModal"; import { useSidebarShortcut } from "@/lib/browserUtilities"; +import { FilePickerModal } from "./my-documents/components/FilePicker"; + +import { SourceMetadata } from "@/lib/search/interfaces"; +import { ValidSources } from "@/lib/types"; +import { + FileUploadResponse, + FileResponse, + FolderResponse, + useDocumentsContext, +} from "./my-documents/DocumentsContext"; import { ChatSearchModal } from "./chat_search/ChatSearchModal"; import { ErrorBanner } from "./message/Resubmit"; import MinimalMarkdown from "@/components/chat/MinimalMarkdown"; @@ -147,11 +156,15 @@ export function ChatPage({ documentSidebarInitialWidth, sidebarVisible, firstMessage, + initialFolders, + initialFiles, }: { toggle: (toggled?: boolean) => void; documentSidebarInitialWidth?: number; sidebarVisible: boolean; firstMessage?: string; + initialFolders?: any; + initialFiles?: any; }) { const router = useRouter(); const searchParams = useSearchParams(); @@ -168,11 +181,27 @@ export function ChatPage({ proSearchToggled, } = useChatContext(); + const { + selectedFiles, + selectedFolders, + addSelectedFile, + addSelectedFolder, + removeSelectedFolder, + clearSelectedItems, + folders: userFolders, + files: allUserFiles, + uploadFile, + removeSelectedFile, + currentMessageFiles, + setCurrentMessageFiles, + } = useDocumentsContext(); + const defaultAssistantIdRaw = searchParams.get(SEARCH_PARAM_NAMES.PERSONA_ID); const defaultAssistantId = defaultAssistantIdRaw ? parseInt(defaultAssistantIdRaw) : undefined; + // Function declarations need to be outside of blocks in strict mode function useScreenSize() { const [screenSize, setScreenSize] = useState({ width: typeof window !== "undefined" ? window.innerWidth : 0, @@ -201,6 +230,8 @@ export function ChatPage({ const settings = useContext(SettingsContext); const enterpriseSettings = settings?.enterpriseSettings; + const [viewingFilePicker, setViewingFilePicker] = useState(false); + const [toggleDocSelection, setToggleDocSelection] = useState(false); const [documentSidebarVisible, setDocumentSidebarVisible] = useState(false); const [proSearchEnabled, setProSearchEnabled] = useState(proSearchToggled); const toggleProSearch = () => { @@ -297,16 +328,6 @@ export function ChatPage({ SEARCH_PARAM_NAMES.TEMPERATURE ); - const defaultTemperature = search_param_temperature - ? parseFloat(search_param_temperature) - : selectedAssistant?.tools.some( - (tool) => - tool.in_code_tool_id === SEARCH_TOOL_ID || - tool.in_code_tool_id === INTERNET_SEARCH_TOOL_ID - ) - ? 0 - : 0.7; - const setSelectedAssistantFromId = (assistantId: number) => { // NOTE: also intentionally look through available assistants here, so that // even if the user has hidden an assistant they can still go back to it @@ -320,7 +341,7 @@ export function ChatPage({ useState(null); const [presentingDocument, setPresentingDocument] = - useState(null); + useState(null); // Current assistant is decided based on this ordering // 1. Alternative assistant (assistant selected explicitly by user) @@ -350,9 +371,14 @@ export function ChatPage({ const noAssistants = liveAssistant == null || liveAssistant == undefined; - const availableSources = ccPairs.map((ccPair) => ccPair.source); - const uniqueSources = Array.from(new Set(availableSources)); - const sources = uniqueSources.map((source) => getSourceMetadata(source)); + const availableSources: ValidSources[] = useMemo(() => { + return ccPairs.map((ccPair) => ccPair.source); + }, [ccPairs]); + + const sources: SourceMetadata[] = useMemo(() => { + const uniqueSources = Array.from(new Set(availableSources)); + return uniqueSources.map((source) => getSourceMetadata(source)); + }, [availableSources]); const stopGenerating = () => { const currentSession = currentSessionId(); @@ -426,7 +452,6 @@ export function ChatPage({ const isChatSessionSwitch = existingChatSessionId !== priorChatSessionId; if (isChatSessionSwitch) { // de-select documents - clearSelectedDocuments(); // reset all filters filterManager.setSelectedDocumentSets([]); @@ -440,6 +465,7 @@ export function ChatPage({ // if switching from one chat to another, then need to scroll again // if we're creating a brand new chat, then don't need to scroll if (chatSessionIdRef.current !== null) { + clearSelectedDocuments(); setHasPerformedInitialScroll(false); } } @@ -466,7 +492,6 @@ export function ChatPage({ return; } - clearSelectedDocuments(); setIsFetchingChatMessages(true); const response = await fetch( `/api/chat/get-chat-session/${existingChatSessionId}` @@ -549,6 +574,37 @@ export function ChatPage({ // eslint-disable-next-line react-hooks/exhaustive-deps }, [existingChatSessionId, searchParams.get(SEARCH_PARAM_NAMES.PERSONA_ID)]); + useEffect(() => { + const userFolderId = searchParams.get(SEARCH_PARAM_NAMES.USER_FOLDER_ID); + const allMyDocuments = searchParams.get( + SEARCH_PARAM_NAMES.ALL_MY_DOCUMENTS + ); + + if (userFolderId) { + const userFolder = userFolders.find( + (folder) => folder.id === parseInt(userFolderId) + ); + if (userFolder) { + addSelectedFolder(userFolder); + } + } else if (allMyDocuments === "true" || allMyDocuments === "1") { + // Clear any previously selected folders + + clearSelectedItems(); + + // Add all user folders to the current context + userFolders.forEach((folder) => { + addSelectedFolder(folder); + }); + } + }, [ + userFolders, + searchParams.get(SEARCH_PARAM_NAMES.USER_FOLDER_ID), + searchParams.get(SEARCH_PARAM_NAMES.ALL_MY_DOCUMENTS), + addSelectedFolder, + clearSelectedItems, + ]); + const [message, setMessage] = useState( searchParams.get(SEARCH_PARAM_NAMES.USER_PROMPT) || "" ); @@ -793,22 +849,17 @@ export function ChatPage({ const currentSessionChatState = currentChatState(); const currentSessionRegenerationState = currentRegenerationState(); - // uploaded files - const [currentMessageFiles, setCurrentMessageFiles] = useState< - FileDescriptor[] - >([]); - // for document display // NOTE: -1 is a special designation that means the latest AI message const [selectedMessageForDocDisplay, setSelectedMessageForDocDisplay] = useState(null); - const { aiMessage } = selectedMessageForDocDisplay + const { aiMessage, humanMessage } = selectedMessageForDocDisplay ? getHumanAndAIMessageFromMessageNumber( messageHistory, selectedMessageForDocDisplay ) - : { aiMessage: null }; + : { aiMessage: null, humanMessage: null }; const [chatSessionSharedStatus, setChatSessionSharedStatus] = useState(ChatSessionSharedStatus.Private); @@ -834,13 +885,6 @@ export function ChatPage({ ); } }, [submittedMessage, currentSessionChatState]); - - const [ - selectedDocuments, - toggleDocumentSelection, - clearSelectedDocuments, - selectedDocumentTokens, - ] = useDocumentSelection(); // just choose a conservative default, this will be updated in the // background on initial load / on persona change const [maxTokens, setMaxTokens] = useState(4096); @@ -1310,6 +1354,7 @@ export function ChatPage({ let includeAgentic = false; let secondLevelMessageId: number | null = null; let isAgentic: boolean = false; + let files: FileDescriptor[] = []; let initialFetchDetails: null | { user_message_id: number; @@ -1341,7 +1386,9 @@ export function ChatPage({ filterManager.selectedSources, filterManager.selectedDocumentSets, filterManager.timeRange, - filterManager.selectedTags + filterManager.selectedTags, + selectedFiles.map((file) => file.id), + selectedFolders.map((folder) => folder.id) ), selectedDocumentIds: selectedDocuments .filter( @@ -1351,6 +1398,11 @@ export function ChatPage({ .map((document) => document.db_doc_id as number), queryOverride, forceSearch, + userFolderIds: selectedFolders.map((folder) => folder.id), + userFileIds: selectedFiles + .filter((file) => file.id !== undefined && file.id !== null) + .map((file) => file.id), + regenerate: regenerationRequest !== undefined, modelProvider: modelOverride?.name || llmManager.currentLlm.name || undefined, @@ -1414,7 +1466,7 @@ export function ChatPage({ : user_message_id, message: currMessage, type: "user", - files: currentMessageFiles, + files: files, toolCall: null, parentMessageId: parentMessage?.messageId || SYSTEM_MESSAGE_ID, }, @@ -1473,6 +1525,15 @@ export function ChatPage({ second_level_generating = true; } } + if (Object.hasOwn(packet, "user_files")) { + const userFiles = (packet as UserKnowledgeFilePacket).user_files; + // Ensure files are unique by id + const newUserFiles = userFiles.filter( + (newFile) => + !files.some((existingFile) => existingFile.id === newFile.id) + ); + files = files.concat(newUserFiles); + } if (Object.hasOwn(packet, "is_agentic")) { isAgentic = (packet as any).is_agentic; } @@ -1676,7 +1737,7 @@ export function ChatPage({ : initialFetchDetails.user_message_id!, message: currMessage, type: "user", - files: currentMessageFiles, + files: files, toolCall: null, parentMessageId: error ? null : lastSuccessfulMessageId, childrenMessageIds: [ @@ -1853,38 +1914,18 @@ export function ChatPage({ return; } - const tempFileDescriptors = acceptedFiles.map((file) => ({ - id: uuidv4(), - type: file.type.startsWith("image/") - ? ChatFileType.IMAGE - : ChatFileType.DOCUMENT, - isUploading: true, - })); - - // only show loading spinner for reasonably large files - const totalSize = acceptedFiles.reduce((sum, file) => sum + file.size, 0); - if (totalSize > 50 * 1024) { - setCurrentMessageFiles((prev) => [...prev, ...tempFileDescriptors]); - } - - const removeTempFiles = (prev: FileDescriptor[]) => { - return prev.filter( - (file) => !tempFileDescriptors.some((newFile) => newFile.id === file.id) - ); - }; updateChatState("uploading", currentSessionId()); - await uploadFilesForChat(acceptedFiles).then(([files, error]) => { - if (error) { - setCurrentMessageFiles((prev) => removeTempFiles(prev)); - setPopup({ - type: "error", - message: error, - }); - } else { - setCurrentMessageFiles((prev) => [...removeTempFiles(prev), ...files]); - } - }); + const [uploadedFiles, error] = await uploadFilesForChat(acceptedFiles); + if (error) { + setPopup({ + type: "error", + message: error, + }); + } + + setCurrentMessageFiles((prev) => [...prev, ...uploadedFiles]); + updateChatState("input", currentSessionId()); }; @@ -1948,7 +1989,10 @@ export function ChatPage({ useEffect(() => { if (liveAssistant) { const hasSearchTool = liveAssistant.tools.some( - (tool) => tool.in_code_tool_id === SEARCH_TOOL_ID + (tool) => + tool.in_code_tool_id === SEARCH_TOOL_ID && + liveAssistant.user_file_ids?.length == 0 && + liveAssistant.user_folder_ids?.length == 0 ); setRetrievalEnabled(hasSearchTool); if (!hasSearchTool) { @@ -1960,7 +2004,10 @@ export function ChatPage({ const [retrievalEnabled, setRetrievalEnabled] = useState(() => { if (liveAssistant) { return liveAssistant.tools.some( - (tool) => tool.in_code_tool_id === SEARCH_TOOL_ID + (tool) => + tool.in_code_tool_id === SEARCH_TOOL_ID && + liveAssistant.user_file_ids?.length == 0 && + liveAssistant.user_folder_ids?.length == 0 ); } return false; @@ -1978,6 +2025,12 @@ export function ChatPage({ const innerSidebarElementRef = useRef(null); const [settingsToggled, setSettingsToggled] = useState(false); + + const [selectedDocuments, setSelectedDocuments] = useState( + [] + ); + const [selectedDocumentTokens, setSelectedDocumentTokens] = useState(0); + const currentPersona = alternativeAssistant || liveAssistant; const HORIZON_DISTANCE = 800; @@ -2054,6 +2107,42 @@ export function ChatPage({ useEffect(() => { abortControllersRef.current = abortControllers; }, [abortControllers]); + useEffect(() => { + const calculateTokensAndUpdateSearchMode = async () => { + if (selectedFiles.length > 0 || selectedFolders.length > 0) { + try { + // Prepare the query parameters for the API call + const fileIds = selectedFiles.map((file: FileResponse) => file.id); + const folderIds = selectedFolders.map( + (folder: FolderResponse) => folder.id + ); + + // Build the query string + const queryParams = new URLSearchParams(); + fileIds.forEach((id) => + queryParams.append("file_ids", id.toString()) + ); + folderIds.forEach((id) => + queryParams.append("folder_ids", id.toString()) + ); + + // Make the API call to get token estimate + const response = await fetch( + `/api/user/file/token-estimate?${queryParams.toString()}` + ); + + if (!response.ok) { + console.error("Failed to fetch token estimate"); + return; + } + } catch (error) { + console.error("Error calculating tokens:", error); + } + } + }; + + calculateTokensAndUpdateSearchMode(); + }, [selectedFiles, selectedFolders, llmManager.currentLlm]); useSidebarShortcut(router, toggleSidebar); @@ -2073,6 +2162,7 @@ export function ChatPage({ }); return; } + // We call onSubmit, passing a `messageOverride` onSubmit({ messageIdToResend: lastUserMsg.messageId, @@ -2122,6 +2212,20 @@ export function ChatPage({ ); + const clearSelectedDocuments = () => { + setSelectedDocuments([]); + setSelectedDocumentTokens(0); + clearSelectedItems(); + }; + + const toggleDocumentSelection = (document: OnyxDocument) => { + setSelectedDocuments((prev) => + prev.some((d) => d.document_id === document.document_id) + ? prev.filter((d) => d.document_id !== document.document_id) + : [...prev, document] + ); + }; + return ( <> @@ -2168,6 +2272,18 @@ export function ChatPage({ /> )} + {toggleDocSelection && ( + setToggleDocSelection(false)} + onSave={() => { + setToggleDocSelection(false); + }} + /> + )} + setIsChatSearchModalOpen(false)} @@ -2189,6 +2305,7 @@ export function ChatPage({ ? true : false } + humanMessage={humanMessage} setPresentingDocument={setPresentingDocument} modal={true} ref={innerSidebarElementRef} @@ -2344,6 +2461,7 @@ export function ChatPage({ `} > 0 || messageHistory.find( @@ -2527,6 +2645,9 @@ export function ChatPage({ key={messageReactComponentKey} > + file.type == ChatFileType.USER_KNOWLEDGE + ); + const userFiles = allUserFiles?.filter((file) => + attachedFileDescriptors?.some( + (descriptor) => + descriptor.id === file.file_id + ) + ); + return (
) : ( 0 @@ -3010,6 +3144,7 @@ export function ChatPage({ messageHistory[messageHistory.length - 1] ?.type != "user")) && ( + setToggleDocSelection(true) + } showConfigureAPIKey={() => setShowApiKeyModal(true) } - chatState={currentSessionChatState} - stopGenerating={stopGenerating} selectedDocuments={selectedDocuments} - // assistant stuff - selectedAssistant={liveAssistant} - setAlternativeAssistant={setAlternativeAssistant} - alternativeAssistant={alternativeAssistant} - // end assistant stuff message={message} setMessage={setMessage} + stopGenerating={stopGenerating} onSubmit={onSubmit} - files={currentMessageFiles} + chatState={currentSessionChatState} + alternativeAssistant={alternativeAssistant} + selectedAssistant={ + selectedAssistant || liveAssistant + } + setAlternativeAssistant={setAlternativeAssistant} setFiles={setCurrentMessageFiles} handleFileUpload={handleImageUpload} textAreaRef={textAreaRef} @@ -3188,7 +3325,6 @@ export function ChatPage({
- {/* Right Sidebar - DocumentSidebar */}
); diff --git a/web/src/app/chat/documentSidebar/ChatDocumentDisplay.tsx b/web/src/app/chat/documentSidebar/ChatDocumentDisplay.tsx index 0e2c94931f..348c03e38b 100644 --- a/web/src/app/chat/documentSidebar/ChatDocumentDisplay.tsx +++ b/web/src/app/chat/documentSidebar/ChatDocumentDisplay.tsx @@ -1,5 +1,5 @@ import { SourceIcon } from "@/components/SourceIcon"; -import { OnyxDocument } from "@/lib/search/interfaces"; +import { MinimalOnyxDocument, OnyxDocument } from "@/lib/search/interfaces"; import { FiTag } from "react-icons/fi"; import { DocumentSelector } from "./DocumentSelector"; import { buildDocumentSummaryDisplay } from "@/components/search/DocumentDisplay"; @@ -18,7 +18,7 @@ interface DocumentDisplayProps { handleSelect: (documentId: string) => void; tokenLimitReached: boolean; hideSelection?: boolean; - setPresentingDocument: Dispatch>; + setPresentingDocument: Dispatch>; } export function DocumentMetadataBlock({ diff --git a/web/src/app/chat/documentSidebar/DocumentResults.tsx b/web/src/app/chat/documentSidebar/DocumentResults.tsx index 7241fcd340..b5965aa6d7 100644 --- a/web/src/app/chat/documentSidebar/DocumentResults.tsx +++ b/web/src/app/chat/documentSidebar/DocumentResults.tsx @@ -1,7 +1,7 @@ -import { OnyxDocument } from "@/lib/search/interfaces"; +import { MinimalOnyxDocument, OnyxDocument } from "@/lib/search/interfaces"; import { ChatDocumentDisplay } from "./ChatDocumentDisplay"; import { removeDuplicateDocs } from "@/lib/documentUtils"; -import { Message } from "../interfaces"; +import { ChatFileType, Message } from "../interfaces"; import { Dispatch, ForwardedRef, @@ -11,9 +11,14 @@ import { useState, } from "react"; import { XIcon } from "@/components/icons/icons"; - +import { + FileSourceCard, + FileSourceCardInResults, +} from "../message/SourcesDisplay"; +import { useDocumentsContext } from "../my-documents/DocumentsContext"; interface DocumentResultsProps { agenticMessage: boolean; + humanMessage: Message | null; closeSidebar: () => void; selectedMessage: Message | null; selectedDocuments: OnyxDocument[] | null; @@ -25,7 +30,7 @@ interface DocumentResultsProps { isOpen: boolean; isSharedChat?: boolean; modal: boolean; - setPresentingDocument: Dispatch>; + setPresentingDocument: Dispatch>; removeHeader?: boolean; } @@ -33,6 +38,7 @@ export const DocumentResults = forwardRef( ( { agenticMessage, + humanMessage, closeSidebar, modal, selectedMessage, @@ -62,7 +68,14 @@ export const DocumentResults = forwardRef( return () => clearTimeout(timer); }, [selectedDocuments]); + const { files: allUserFiles } = useDocumentsContext(); + const humanFileDescriptors = humanMessage?.files.filter( + (file) => file.type == ChatFileType.USER_KNOWLEDGE + ); + const userFiles = allUserFiles?.filter((file) => + humanFileDescriptors?.some((descriptor) => descriptor.id === file.file_id) + ); const selectedDocumentIds = selectedDocuments?.map((document) => document.document_id) || []; @@ -72,7 +85,6 @@ export const DocumentResults = forwardRef( const tokenLimitReached = selectedDocumentTokens > maxTokens - 75; const hasSelectedDocuments = selectedDocumentIds.length > 0; - return ( <>
( )}
- {dedupedDocuments.length > 0 ? ( + {userFiles && userFiles.length > 0 ? ( +
+ {userFiles?.map((file, index) => ( + + doc.document_id === + `FILE_CONNECTOR__${file.file_id}` + )} + document={file} + setPresentingDocument={() => + setPresentingDocument({ + document_id: file.document_id, + semantic_identifier: file.file_id || null, + }) + } + /> + ))} +
+ ) : dedupedDocuments.length > 0 ? ( dedupedDocuments.map((document, ind) => (
( />
)) - ) : ( -
- )} + ) : null}
diff --git a/web/src/app/chat/folders/FolderManagement.tsx b/web/src/app/chat/folders/FolderManagement.tsx index f7f65d8e95..94be3c843d 100644 --- a/web/src/app/chat/folders/FolderManagement.tsx +++ b/web/src/app/chat/folders/FolderManagement.tsx @@ -8,7 +8,8 @@ export async function createFolder(folderName: string): Promise { body: JSON.stringify({ folder_name: folderName }), }); if (!response.ok) { - throw new Error("Failed to create folder"); + const errorData = await response.json(); + throw new Error(errorData.detail || "Failed to create folder"); } const data = await response.json(); return data; diff --git a/web/src/app/chat/input/ChatInputBar.tsx b/web/src/app/chat/input/ChatInputBar.tsx index 2366e30e29..a1e9c3d4dc 100644 --- a/web/src/app/chat/input/ChatInputBar.tsx +++ b/web/src/app/chat/input/ChatInputBar.tsx @@ -27,7 +27,7 @@ import { Hoverable } from "@/components/Hoverable"; import { ChatState } from "../types"; import UnconfiguredProviderText from "@/components/chat/UnconfiguredProviderText"; import { useAssistants } from "@/components/context/AssistantsContext"; -import { CalendarIcon, TagIcon, XIcon } from "lucide-react"; +import { CalendarIcon, TagIcon, XIcon, FolderIcon } from "lucide-react"; import { FilterPopup } from "@/components/search/filtering/FilterPopup"; import { DocumentSet, Tag } from "@/lib/types"; import { SourceIcon } from "@/components/SourceIcon"; @@ -35,11 +35,13 @@ import { getFormattedDateRangeString } from "@/lib/dateUtils"; import { truncateString } from "@/lib/utils"; import { buildImgUrl } from "../files/images/utils"; import { useUser } from "@/components/user/UserProvider"; +import { useDocumentSelection } from "../useDocumentSelection"; import { AgenticToggle } from "./AgenticToggle"; import { SettingsContext } from "@/components/settings/SettingsProvider"; import { LoadingIndicator } from "react-select/dist/declarations/src/components/indicators"; import { FidgetSpinner } from "react-loader-spinner"; import { LoadingAnimation } from "@/components/Loading"; +import { useDocumentsContext } from "../my-documents/DocumentsContext"; const MAX_INPUT_HEIGHT = 200; export const SourceChip2 = ({ @@ -172,6 +174,7 @@ export const SourceChip = ({ ); interface ChatInputBarProps { + toggleDocSelection: () => void; removeDocs: () => void; showConfigureAPIKey: () => void; selectedDocuments: OnyxDocument[]; @@ -186,7 +189,6 @@ interface ChatInputBarProps { selectedAssistant: Persona; setAlternativeAssistant: (alternativeAssistant: Persona | null) => void; toggleDocumentSidebar: () => void; - files: FileDescriptor[]; setFiles: (files: FileDescriptor[]) => void; handleFileUpload: (files: File[]) => void; textAreaRef: React.RefObject; @@ -200,6 +202,7 @@ interface ChatInputBarProps { } export function ChatInputBar({ + toggleDocSelection, retrievalEnabled, removeDocs, toggleDocumentSidebar, @@ -216,7 +219,6 @@ export function ChatInputBar({ selectedAssistant, setAlternativeAssistant, - files, setFiles, handleFileUpload, textAreaRef, @@ -229,6 +231,15 @@ export function ChatInputBar({ setProSearchEnabled, }: ChatInputBarProps) { const { user } = useUser(); + const { + selectedFiles, + selectedFolders, + removeSelectedFile, + removeSelectedFolder, + currentMessageFiles, + setCurrentMessageFiles, + } = useDocumentsContext(); + const settings = useContext(SettingsContext); useEffect(() => { const textarea = textAreaRef.current; @@ -628,7 +639,9 @@ export function ChatInputBar({ /> {(selectedDocuments.length > 0 || - files.length > 0 || + selectedFiles.length > 0 || + selectedFolders.length > 0 || + currentMessageFiles.length > 0 || filterManager.timeRange || filterManager.selectedDocumentSets.length > 0 || filterManager.selectedTags.length > 0 || @@ -651,6 +664,22 @@ export function ChatInputBar({ /> ))} + {selectedFiles.map((file) => ( + } + title={file.name} + onRemove={() => removeSelectedFile(file)} + /> + ))} + {selectedFolders.map((folder) => ( + } + title={folder.name} + onRemove={() => removeSelectedFolder(folder)} + /> + ))} {filterManager.timeRange && ( ))} - {filterManager.selectedSources.length > 0 && filterManager.selectedSources.map((source, index) => ( ))} - {selectedDocuments.length > 0 && ( )} - - {files.map((file, index) => + {currentMessageFiles.map((file, index) => file.type === ChatFileType.IMAGE ? ( { - setFiles( - files.filter( + setCurrentMessageFiles( + currentMessageFiles.filter( (fileInFilter) => fileInFilter.id !== file.id ) ); @@ -743,8 +769,8 @@ export function ChatInputBar({ icon={} title={file.name || "File"} onRemove={() => { - setFiles( - files.filter( + setCurrentMessageFiles( + currentMessageFiles.filter( (fileInFilter) => fileInFilter.id !== file.id ) ); @@ -763,20 +789,9 @@ export function ChatInputBar({ name="File" Icon={FiPlusCircle} onClick={() => { - const input = document.createElement("input"); - input.type = "file"; - input.multiple = true; - input.onchange = (event: any) => { - const files = Array.from( - event?.target?.files || [] - ) as File[]; - if (files.length > 0) { - handleFileUpload(files); - } - }; - input.click(); + toggleDocSelection(); }} - tooltipContent={"Upload files"} + tooltipContent={"Upload files and attach user files"} /> ; - }[]; - } = {}; - const uniqueModelNames = new Set(); + // Memoize the options to prevent unnecessary recalculations + const { + llmOptionsByProvider, + llmOptions, + defaultProvider, + defaultModelDisplayName, + } = useMemo(() => { + const llmOptionsByProvider: { + [provider: string]: { + name: string; + value: string; + icon: React.FC<{ size?: number; className?: string }>; + }[]; + } = {}; - llmProviders.forEach((llmProvider) => { - if (!llmOptionsByProvider[llmProvider.provider]) { - llmOptionsByProvider[llmProvider.provider] = []; - } + const uniqueModelNames = new Set(); - (llmProvider.display_model_names || llmProvider.model_names).forEach( - (modelName) => { - if (!uniqueModelNames.has(modelName)) { - uniqueModelNames.add(modelName); - llmOptionsByProvider[llmProvider.provider].push({ - name: modelName, - value: structureValue( - llmProvider.name, - llmProvider.provider, - modelName - ), - icon: getProviderIcon(llmProvider.provider, modelName), - }); - } + llmProviders.forEach((llmProvider) => { + if (!llmOptionsByProvider[llmProvider.provider]) { + llmOptionsByProvider[llmProvider.provider] = []; } + + (llmProvider.display_model_names || llmProvider.model_names).forEach( + (modelName) => { + if (!uniqueModelNames.has(modelName)) { + uniqueModelNames.add(modelName); + llmOptionsByProvider[llmProvider.provider].push({ + name: modelName, + value: structureValue( + llmProvider.name, + llmProvider.provider, + modelName + ), + icon: getProviderIcon(llmProvider.provider, modelName), + }); + } + } + ); + }); + + const llmOptions = Object.entries(llmOptionsByProvider).flatMap( + ([provider, options]) => [...options] ); - }); - const llmOptions = Object.entries(llmOptionsByProvider).flatMap( - ([provider, options]) => [...options] - ); + const defaultProvider = llmProviders.find( + (llmProvider) => llmProvider.is_default_provider + ); - const defaultProvider = llmProviders.find( - (llmProvider) => llmProvider.is_default_provider - ); + const defaultModelName = defaultProvider?.default_model_name; + const defaultModelDisplayName = defaultModelName + ? getDisplayNameForModel(defaultModelName) + : null; - const defaultModelName = defaultProvider?.default_model_name; - const defaultModelDisplayName = defaultModelName - ? getDisplayNameForModel(defaultModelName) - : null; + return { + llmOptionsByProvider, + llmOptions, + defaultProvider, + defaultModelDisplayName, + }; + }, [llmProviders]); const [localTemperature, setLocalTemperature] = useState( llmManager.temperature ?? 0.5 @@ -104,42 +121,52 @@ export default function LLMPopover({ setLocalTemperature(llmManager.temperature ?? 0.5); }, [llmManager.temperature]); - const handleTemperatureChange = (value: number[]) => { + // Use useCallback to prevent function recreation + const handleTemperatureChange = useCallback((value: number[]) => { setLocalTemperature(value[0]); - }; + }, []); - const handleTemperatureChangeComplete = (value: number[]) => { - llmManager.updateTemperature(value[0]); - }; + const handleTemperatureChangeComplete = useCallback( + (value: number[]) => { + llmManager.updateTemperature(value[0]); + }, + [llmManager] + ); + + // Memoize trigger content to prevent rerendering + const triggerContent = useMemo( + () => ( + + ), + [defaultModelDisplayName, defaultProvider, llmManager?.currentLlm] + ); return ( - - - + {triggerContent} { const documentsAreSelected = @@ -206,7 +214,10 @@ export async function* sendMessage({ message: message, prompt_id: promptId, search_doc_ids: documentsAreSelected ? selectedDocumentIds : null, + force_user_file_search: forceUserFileSearch, file_descriptors: fileDescriptors, + user_file_ids: userFileIds, + user_folder_ids: userFolderIds, regenerate, retrieval_options: !documentsAreSelected ? { @@ -632,7 +643,11 @@ export function personaIncludesRetrieval(selectedPersona: Persona) { return selectedPersona.tools.some( (tool) => tool.in_code_tool_id && - [SEARCH_TOOL_ID, INTERNET_SEARCH_TOOL_ID].includes(tool.in_code_tool_id) + [SEARCH_TOOL_ID, INTERNET_SEARCH_TOOL_ID].includes( + tool.in_code_tool_id + ) && + selectedPersona.user_file_ids?.length === 0 && + selectedPersona.user_folder_ids?.length === 0 ); } diff --git a/web/src/app/chat/message/MemoizedTextComponents.tsx b/web/src/app/chat/message/MemoizedTextComponents.tsx index 7a440964d0..1672150049 100644 --- a/web/src/app/chat/message/MemoizedTextComponents.tsx +++ b/web/src/app/chat/message/MemoizedTextComponents.tsx @@ -10,12 +10,14 @@ import { SourceIcon } from "@/components/SourceIcon"; import { WebResultIcon } from "@/components/WebResultIcon"; import { SubQuestionDetail } from "../interfaces"; import { ValidSources } from "@/lib/types"; +import { FileResponse } from "../my-documents/DocumentsContext"; export const MemoizedAnchor = memo( ({ docs, subQuestions, openQuestion, + userFiles, href, updatePresentingDocument, children, @@ -23,6 +25,7 @@ export const MemoizedAnchor = memo( subQuestions?: SubQuestionDetail[]; openQuestion?: (question: SubQuestionDetail) => void; docs?: OnyxDocument[] | null; + userFiles?: FileResponse[] | null; updatePresentingDocument: (doc: OnyxDocument) => void; href?: string; children: React.ReactNode; @@ -31,8 +34,14 @@ export const MemoizedAnchor = memo( if (value?.startsWith("[") && value?.endsWith("]")) { const match = value.match(/\[(D|Q)?(\d+)\]/); if (match) { - const isSubQuestion = match[1] === "Q"; - if (!isSubQuestion) { + const isUserFileCitation = userFiles?.length && userFiles.length > 0; + if (isUserFileCitation) { + const index = parseInt(match[2], 10) - 1; + const associatedUserFile = userFiles?.[index]; + if (!associatedUserFile) { + return {children}; + } + } else if (!isUserFileCitation) { const index = parseInt(match[2], 10) - 1; const associatedDoc = docs?.[index]; if (!associatedDoc) { diff --git a/web/src/app/chat/message/Messages.tsx b/web/src/app/chat/message/Messages.tsx index 26d3be2e3b..d25dd68916 100644 --- a/web/src/app/chat/message/Messages.tsx +++ b/web/src/app/chat/message/Messages.tsx @@ -16,16 +16,15 @@ import React, { useRef, useState, } from "react"; -import { unified } from "unified"; import ReactMarkdown from "react-markdown"; -import { OnyxDocument, FilteredOnyxDocument } from "@/lib/search/interfaces"; -import { SearchSummary } from "./SearchSummary"; +import { + OnyxDocument, + FilteredOnyxDocument, + MinimalOnyxDocument, +} from "@/lib/search/interfaces"; +import { SearchSummary, UserKnowledgeFiles } from "./SearchSummary"; import { SkippedSearch } from "./SkippedSearch"; import remarkGfm from "remark-gfm"; -import remarkParse from "remark-parse"; -import remarkRehype from "remark-rehype"; -import rehypeSanitize from "rehype-sanitize"; -import rehypeStringify from "rehype-stringify"; import { CopyButton } from "@/components/CopyButton"; import { ChatFileType, FileDescriptor, ToolCallMetadata } from "../interfaces"; import { @@ -48,7 +47,6 @@ import { CustomTooltip, TooltipGroup, } from "@/components/tooltip/CustomTooltip"; -import { ValidSources } from "@/lib/types"; import { Tooltip, TooltipContent, @@ -65,13 +63,17 @@ import { MemoizedAnchor, MemoizedParagraph } from "./MemoizedTextComponents"; import { extractCodeText, preprocessLaTeX } from "./codeUtils"; import ToolResult from "../../../components/tools/ToolResult"; import CsvContent from "../../../components/tools/CSVContent"; -import { SeeMoreBlock } from "@/components/chat/sources/SourceCard"; -import { SourceCard } from "./SourcesDisplay"; +import { + FilesSeeMoreBlock, + SeeMoreBlock, +} from "@/components/chat/sources/SourceCard"; +import { FileSourceCard, SourceCard } from "./SourcesDisplay"; import remarkMath from "remark-math"; import rehypeKatex from "rehype-katex"; import "katex/dist/katex.min.css"; import { copyAll, handleCopy } from "./copyingUtils"; import { transformLinkUri } from "@/lib/utils"; +import { FileResponse } from "../my-documents/DocumentsContext"; const TOOLS_WITH_CUSTOM_HANDLING = [ SEARCH_TOOL_NAME, @@ -82,27 +84,30 @@ const TOOLS_WITH_CUSTOM_HANDLING = [ function FileDisplay({ files, alignBubble, + setPresentingDocument, }: { files: FileDescriptor[]; alignBubble?: boolean; + setPresentingDocument: (document: MinimalOnyxDocument) => void; }) { const [close, setClose] = useState(true); + const [expandedKnowledge, setExpandedKnowledge] = useState(false); const imageFiles = files.filter((file) => file.type === ChatFileType.IMAGE); - const nonImgFiles = files.filter( - (file) => file.type !== ChatFileType.IMAGE && file.type !== ChatFileType.CSV + const textFiles = files.filter( + (file) => file.type == ChatFileType.PLAIN_TEXT ); const csvImgFiles = files.filter((file) => file.type == ChatFileType.CSV); return ( <> - {nonImgFiles && nonImgFiles.length > 0 && ( + {textFiles && textFiles.length > 0 && (
- {nonImgFiles.map((file) => { + {textFiles.map((file) => { return (
)} - {csvImgFiles && csvImgFiles.length > 0 && (
@@ -161,7 +165,48 @@ function FileDisplay({ ); } +function FileResponseDisplay({ + files, + alignBubble, + setPresentingDocument, +}: { + files: FileResponse[]; + alignBubble?: boolean; + setPresentingDocument: (document: MinimalOnyxDocument) => void; +}) { + if (!files || files.length === 0) { + return null; + } + + return ( +
+
+ {files.map((file) => { + return ( +
+ + setPresentingDocument({ + document_id: file.document_id, + semantic_identifier: file.name || file.document_id, + }) + } + /> +
+ ); + })} +
+
+ ); +} + export const AIMessage = ({ + userKnowledgeFiles = [], regenerate, overriddenModel, continueGenerating, @@ -191,6 +236,7 @@ export const AIMessage = ({ documentSidebarVisible, removePadding, }: { + userKnowledgeFiles?: FileResponse[]; index?: number; shared?: boolean; isActive?: boolean; @@ -217,7 +263,7 @@ export const AIMessage = ({ retrievalDisabled?: boolean; overriddenModel?: string; regenerate?: (modelOverRide: LlmDescriptor) => Promise; - setPresentingDocument: (document: OnyxDocument) => void; + setPresentingDocument: (document: MinimalOnyxDocument) => void; removePadding?: boolean; }) => { const toolCallGenerating = toolCall && !toolCall.tool_result; @@ -243,11 +289,13 @@ export const AIMessage = ({ return preprocessLaTeX(content); } } - // return content; + const processed = preprocessLaTeX(content); + + // Escape $ that are preceded by a space and followed by a non-$ character + const escapedDollarSigns = processed.replace(/([\s])\$([^\$])/g, "$1\\$$2"); return ( - preprocessLaTeX(content) + - (!isComplete && !toolCallGenerating ? " [*]() " : "") + escapedDollarSigns + (!isComplete && !toolCallGenerating ? " [*]() " : "") ); }; @@ -318,6 +366,7 @@ export const AIMessage = ({ {props.children} @@ -423,34 +472,46 @@ export const AIMessage = ({
- {!toolCall || toolCall.tool_name === SEARCH_TOOL_NAME ? ( - <> - {query !== undefined && !retrievalDisabled && ( -
- -
- )} - {handleForceSearch && - content && - query === undefined && - !hasDocs && - !retrievalDisabled && ( + {userKnowledgeFiles.length == 0 && + (!toolCall || toolCall.tool_name === SEARCH_TOOL_NAME ? ( + <> + {query !== undefined && (
-
)} - - ) : null} - {toolCall && + + {handleForceSearch && + content && + query === undefined && + !hasDocs && + !retrievalDisabled && ( +
+ +
+ )} + + ) : null)} + {userKnowledgeFiles && ( + + )} + + {!userKnowledgeFiles && + toolCall && !TOOLS_WITH_CUSTOM_HANDLING.includes( toolCall.tool_name ) && ( @@ -484,8 +545,51 @@ export const AIMessage = ({ isRunning={!toolCall.tool_result} /> )} - {docs && docs.length > 0 && ( + {userKnowledgeFiles.length == 0 && + docs && + docs.length > 0 && ( +
+
+
+ {!settings?.isMobile && + docs.length > 0 && + docs + .slice(0, 2) + .map((doc: OnyxDocument, ind: number) => ( + + setPresentingDocument({ + document_id: doc.document_id, + semantic_identifier: doc.document_id, + }) + } + /> + ))} + +
+
+
+ )} + + {userKnowledgeFiles && userKnowledgeFiles.length > 0 && (
{!settings?.isMobile && - docs.length > 0 && - docs + userKnowledgeFiles.length > 0 && + userKnowledgeFiles .slice(0, 2) - .map((doc: OnyxDocument, ind: number) => ( - ( + + doc.document_id === + `FILE_CONNECTOR__${file.file_id}` || + doc.document_id === + `USER_FILE_CONNECTOR__${file.file_id}` + )} key={ind} - setPresentingDocument={ - setPresentingDocument + document={file} + setPresentingDocument={() => + setPresentingDocument({ + document_id: file.document_id, + semantic_identifier: file.name, + }) } /> ))} - + + {userKnowledgeFiles.length > 2 && ( + + )}
)} {content || files ? ( <> - - + {typeof content === "string" ? (
null, disableSwitchingForStreaming = false, + setPresentingDocument, }: { shared?: boolean; content: string; @@ -809,6 +931,7 @@ export const HumanMessage = ({ onMessageSelection?: (messageId: number) => void; stopGenerating?: () => void; disableSwitchingForStreaming?: boolean; + setPresentingDocument: (document: MinimalOnyxDocument) => void; }) => { const textareaRef = useRef(null); @@ -856,7 +979,11 @@ export const HumanMessage = ({ >
- +
diff --git a/web/src/app/chat/message/SearchSummary.tsx b/web/src/app/chat/message/SearchSummary.tsx index 610613a6b4..b4cdbf5594 100644 --- a/web/src/app/chat/message/SearchSummary.tsx +++ b/web/src/app/chat/message/SearchSummary.tsx @@ -14,7 +14,9 @@ import { import { OnyxDocument } from "@/lib/search/interfaces"; import { ValidSources } from "@/lib/types"; import { useEffect, useRef, useState } from "react"; -import { FiCheck, FiEdit2, FiSearch, FiX } from "react-icons/fi"; +import { FiBook, FiCheck, FiEdit2, FiSearch, FiX } from "react-icons/fi"; +import { FileDescriptor } from "../interfaces"; +import { FileResponse } from "../my-documents/DocumentsContext"; export function ShowHideDocsButton({ messageId, @@ -50,6 +52,7 @@ export function SearchSummary({ handleSearchQueryEdit, docs, toggleDocumentSelection, + userFileSearch, }: { index: number; finished: boolean; @@ -57,6 +60,7 @@ export function SearchSummary({ handleSearchQueryEdit?: (query: string) => void; docs: OnyxDocument[]; toggleDocumentSelection: () => void; + userFileSearch: boolean; }) { const [isEditing, setIsEditing] = useState(false); const [finalQuery, setFinalQuery] = useState(query); @@ -107,14 +111,20 @@ export function SearchSummary({ } text-xs desktop:text-sm mobile:ml-auto !line-clamp-1 !break-all px-0.5 flex-grow`} ref={searchingForRef} > - {finished ? "Searched" : "Searching"} for:{" "} - - {index === 1 - ? finalQuery.length > 50 - ? `${finalQuery.slice(0, 50)}...` - : finalQuery - : finalQuery} - + {userFileSearch ? ( + "Reading context" + ) : ( + <> + {finished ? "Searched" : "Searching"} for:{" "} + + {index === 1 + ? finalQuery.length > 50 + ? `${finalQuery.slice(0, 50)}...` + : finalQuery + : finalQuery} + + + )}
@@ -242,3 +252,25 @@ export function SearchSummary({
); } + +export function UserKnowledgeFiles({ + userKnowledgeFiles, +}: { + userKnowledgeFiles: FileResponse[]; +}): JSX.Element { + if (!userKnowledgeFiles || userKnowledgeFiles.length === 0) { + return <>; + } + + return ( +
+
+ + + Referenced {userKnowledgeFiles.length}{" "} + {userKnowledgeFiles.length === 1 ? "document" : "documents"} + +
+
+ ); +} diff --git a/web/src/app/chat/message/SourcesDisplay.tsx b/web/src/app/chat/message/SourcesDisplay.tsx index ce106e9263..2e81e45440 100644 --- a/web/src/app/chat/message/SourcesDisplay.tsx +++ b/web/src/app/chat/message/SourcesDisplay.tsx @@ -1,9 +1,14 @@ import React, { useState, useEffect } from "react"; -import { OnyxDocument } from "@/lib/search/interfaces"; +import { MinimalOnyxDocument, OnyxDocument } from "@/lib/search/interfaces"; import { ResultIcon, SeeMoreBlock } from "@/components/chat/sources/SourceCard"; import { openDocument } from "@/lib/search/utils"; import { buildDocumentSummaryDisplay } from "@/components/search/DocumentDisplay"; import { ValidSources } from "@/lib/types"; +import { FiFileText } from "react-icons/fi"; +import { FileDescriptor } from "../interfaces"; +import { getFileIconFromFileNameAndLink } from "@/lib/assistantIconUtils"; +import { truncateString } from "@/lib/utils"; +import { FileResponse } from "../my-documents/DocumentsContext"; interface SourcesDisplayProps { documents: OnyxDocument[]; @@ -63,6 +68,107 @@ export const SourceCard: React.FC<{ ); }; +export const FileSourceCard: React.FC<{ + document: FileResponse; + setPresentingDocument: (document: FileResponse) => void; + relevantDocument: OnyxDocument | undefined; +}> = ({ document, setPresentingDocument, relevantDocument }) => { + const openDocument = () => { + if (document.link_url) { + window.open(document.link_url, "_blank"); + } else { + setPresentingDocument(document as any); + } + }; + const fileName = document.name || document.id; + + return ( + + ); +}; + +export const FileSourceCardInResults: React.FC<{ + document: FileResponse; + setPresentingDocument: (document: FileResponse) => void; + relevantDocument: OnyxDocument | undefined; +}> = ({ document, setPresentingDocument, relevantDocument }) => { + const openDocument = () => { + if (document.link_url) { + window.open(document.link_url, "_blank"); + } else { + setPresentingDocument(document as any); + } + }; + return ( + + ); +}; + export const SourcesDisplay: React.FC = ({ documents, toggleDocumentSelection, diff --git a/web/src/app/chat/my-documents/DocumentsContext.tsx b/web/src/app/chat/my-documents/DocumentsContext.tsx new file mode 100644 index 0000000000..4844d77528 --- /dev/null +++ b/web/src/app/chat/my-documents/DocumentsContext.tsx @@ -0,0 +1,551 @@ +"use client"; +import React, { + createContext, + useContext, + useState, + useCallback, + ReactNode, + useEffect, + Dispatch, + SetStateAction, +} from "react"; +import { MinimalOnyxDocument } from "@/lib/search/interfaces"; +import * as documentsService from "@/services/documentsService"; +import { FileDescriptor } from "../interfaces"; + +export interface FolderResponse { + id: number; + name: string; + description: string; + files: FileResponse[]; + assistant_ids?: number[]; + created_at: string; +} + +export enum FileStatus { + FAILED = "FAILED", + INDEXING = "INDEXING", + INDEXED = "INDEXED", + REINDEXING = "REINDEXING", +} + +export type FileResponse = { + id: number; + name: string; + document_id: string; + folder_id: number | null; + size?: number; + type?: string; + lastModified?: string; + token_count?: number; + assistant_ids?: number[]; + indexed?: boolean; + created_at?: string; + file_id?: string; + file_type?: string; + link_url?: string | null; + status: FileStatus; +}; + +export interface FileUploadResponse { + file_paths: string[]; +} + +export interface DocumentsContextType { + folders: FolderResponse[]; + files: FileResponse[]; + currentFolder: number | null; + presentingDocument: MinimalOnyxDocument | null; + searchQuery: string; + page: number; + isLoading: boolean; + error: string | null; + selectedFiles: FileResponse[]; + selectedFolders: FolderResponse[]; + addSelectedFile: (file: FileResponse) => void; + removeSelectedFile: (file: FileResponse) => void; + addSelectedFolder: (folder: FolderResponse) => void; + removeSelectedFolder: (folder: FolderResponse) => void; + clearSelectedItems: () => void; + setSelectedFiles: (files: FileResponse[]) => void; + setSelectedFolders: (folders: FolderResponse[]) => void; + refreshFolders: () => Promise; + createFolder: (name: string) => Promise; + deleteItem: (itemId: number, isFolder: boolean) => Promise; + moveItem: ( + itemId: number, + newFolderId: number | null, + isFolder: boolean + ) => Promise; + renameFile: (fileId: number, newName: string) => Promise; + renameFolder: (folderId: number, newName: string) => Promise; + uploadFile: ( + formData: FormData, + folderId: number | null + ) => Promise; + setCurrentFolder: (folderId: number | null) => void; + setPresentingDocument: (document: MinimalOnyxDocument | null) => void; + setSearchQuery: (query: string) => void; + setPage: (page: number) => void; + getFilesIndexingStatus: ( + fileIds: number[] + ) => Promise>; + getFolderDetails: (folderId: number) => Promise; + downloadItem: (documentId: string) => Promise; + renameItem: ( + itemId: number, + newName: string, + isFolder: boolean + ) => Promise; + createFileFromLink: ( + url: string, + folderId: number | null + ) => Promise; + handleUpload: (files: File[]) => Promise; + refreshFolderDetails: () => Promise; + getFolders: () => Promise; + folderDetails: FolderResponse | null | undefined; + updateFolderDetails: ( + folderId: number, + name: string, + description: string + ) => Promise; + currentMessageFiles: FileDescriptor[]; + setCurrentMessageFiles: Dispatch>; +} + +const DocumentsContext = createContext( + undefined +); + +interface DocumentsProviderProps { + children: ReactNode; + initialFolderDetails?: FolderResponse | null; +} + +export const DocumentsProvider: React.FC = ({ + children, + initialFolderDetails, +}) => { + const [isLoading, setIsLoading] = useState(true); + const [folders, setFolders] = useState([]); + const [currentFolder, setCurrentFolder] = useState(null); + const [presentingDocument, setPresentingDocument] = + useState(null); + const [searchQuery, setSearchQuery] = useState(""); + const [page, setPage] = useState(1); + const [selectedFiles, setSelectedFiles] = useState([]); + + // uploaded files + const [currentMessageFiles, setCurrentMessageFiles] = useState< + FileDescriptor[] + >([]); + + const [selectedFolders, setSelectedFolders] = useState([]); + const [folderDetails, setFolderDetails] = useState< + FolderResponse | undefined | null + >(initialFolderDetails || null); + const [showUploadWarning, setShowUploadWarning] = useState(false); + const [linkUrl, setLinkUrl] = useState(""); + const [isCreatingFileFromLink, setIsCreatingFileFromLink] = useState(false); + const [error, setError] = useState(null); + + useEffect(() => { + const fetchFolders = async () => { + await refreshFolders(); + setIsLoading(false); + }; + fetchFolders(); + }, []); + + const refreshFolders = async () => { + try { + const data = await documentsService.fetchFolders(); + setFolders(data); + } catch (error) { + console.error("Failed to fetch folders:", error); + setError("Failed to fetch folders"); + } + }; + + const uploadFile = useCallback( + async ( + formData: FormData, + folderId: number | null + ): Promise => { + if (folderId) { + formData.append("folder_id", folderId.toString()); + } + + setIsLoading(true); + setError(null); + + try { + const response = await fetch("/api/user/file/upload", { + method: "POST", + body: formData, + }); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.detail || "Failed to upload file"); + } + + const data = await response.json(); + await refreshFolders(); + return data; + } catch (error) { + console.error("Failed to upload file:", error); + setError( + error instanceof Error ? error.message : "Failed to upload file" + ); + throw error; + } finally { + setIsLoading(false); + } + }, + [refreshFolders] + ); + + const createFolder = useCallback( + async (name: string) => { + try { + const newFolder = await documentsService.createNewFolder(name, " "); + await refreshFolders(); + return newFolder; + } catch (error) { + console.error("Failed to create folder:", error); + throw error; + } + }, + [refreshFolders] + ); + + const deleteItem = useCallback( + async (itemId: number, isFolder: boolean) => { + try { + if (isFolder) { + await documentsService.deleteFolder(itemId); + } else { + await documentsService.deleteFile(itemId); + } + await refreshFolders(); + } catch (error) { + console.error("Failed to delete item:", error); + throw error; + } + }, + [refreshFolders] + ); + + const moveItem = async ( + itemId: number, + newFolderId: number | null, + isFolder: boolean + ): Promise => { + try { + if (isFolder) { + // Move folder logic + // This is a placeholder - implement actual folder moving logic + } else { + // Move file + const response = await fetch(`/api/user/file/${itemId}/move`, { + method: "PUT", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ new_folder_id: newFolderId }), + }); + + if (!response.ok) { + throw new Error("Failed to move file"); + } + } + await refreshFolders(); + } catch (error) { + console.error("Failed to move item:", error); + setError(error instanceof Error ? error.message : "Failed to move item"); + throw error; + } + }; + + const downloadItem = useCallback( + async (documentId: string): Promise => { + try { + const blob = await documentsService.downloadItem(documentId); + const url = window.URL.createObjectURL(blob); + const link = document.createElement("a"); + link.href = url; + link.download = "document"; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + window.URL.revokeObjectURL(url); + return blob; + } catch (error) { + console.error("Failed to download item:", error); + throw error; + } + }, + [] + ); + + const renameItem = useCallback( + async (itemId: number, newName: string, isFolder: boolean) => { + try { + await documentsService.renameItem(itemId, newName, isFolder); + if (isFolder) { + await refreshFolders(); + } + } catch (error) { + console.error("Failed to rename item:", error); + throw error; + } + }, + [refreshFolders] + ); + + const getFolderDetails = useCallback(async (folderId: number) => { + try { + return await documentsService.getFolderDetails(folderId); + } catch (error) { + console.error("Failed to get folder details:", error); + throw error; + } + }, []); + + const updateFolderDetails = useCallback( + async (folderId: number, name: string, description: string) => { + try { + await documentsService.updateFolderDetails(folderId, name, description); + await refreshFolders(); + } catch (error) { + console.error("Failed to update folder details:", error); + throw error; + } + }, + [refreshFolders] + ); + + const addSelectedFile = useCallback((file: FileResponse) => { + setSelectedFiles((prev) => { + if (prev.find((f) => f.id === file.id)) { + return prev; + } + return [...prev, file]; + }); + }, []); + + const removeSelectedFile = useCallback((file: FileResponse) => { + setSelectedFiles((prev) => prev.filter((f) => f.id !== file.id)); + }, []); + + const addSelectedFolder = useCallback((folder: FolderResponse) => { + setSelectedFolders((prev) => { + if (prev.find((f) => f.id === folder.id)) { + return prev; + } + return [...prev, folder]; + }); + }, []); + + const removeSelectedFolder = useCallback((folder: FolderResponse) => { + setSelectedFolders((prev) => prev.filter((f) => f.id !== folder.id)); + }, []); + + const clearSelectedItems = useCallback(() => { + setSelectedFiles([]); + setSelectedFolders([]); + }, []); + + const refreshFolderDetails = useCallback(async () => { + if (folderDetails) { + const details = await getFolderDetails(folderDetails.id); + setFolderDetails(details); + } + }, [folderDetails, getFolderDetails]); + + const createFileFromLink = useCallback( + async (url: string, folderId: number | null): Promise => { + try { + const data = await documentsService.createFileFromLinkRequest( + url, + folderId + ); + await refreshFolders(); + return data; + } catch (error) { + console.error("Failed to create file from link:", error); + throw error; + } + }, + [refreshFolders] + ); + + const handleUpload = useCallback( + async (files: File[]) => { + if ( + folderDetails?.assistant_ids && + folderDetails.assistant_ids.length > 0 + ) { + setShowUploadWarning(true); + } else { + await performUpload(files); + } + }, + [folderDetails] + ); + + const performUpload = useCallback( + async (files: File[]) => { + try { + const formData = new FormData(); + files.forEach((file) => { + formData.append("files", file); + }); + setIsLoading(true); + + await uploadFile(formData, folderDetails?.id || null); + await refreshFolderDetails(); + } catch (error) { + console.error("Error uploading documents:", error); + setError("Failed to upload documents. Please try again."); + } finally { + setIsLoading(false); + setShowUploadWarning(false); + } + }, + [uploadFile, folderDetails, refreshFolderDetails] + ); + + const handleCreateFileFromLink = useCallback(async () => { + if (!linkUrl) return; + setIsCreatingFileFromLink(true); + try { + await createFileFromLink(linkUrl, folderDetails?.id || null); + setLinkUrl(""); + await refreshFolderDetails(); + } catch (error) { + console.error("Error creating file from link:", error); + setError("Failed to create file from link. Please try again."); + } finally { + setIsCreatingFileFromLink(false); + } + }, [linkUrl, createFileFromLink, folderDetails, refreshFolderDetails]); + + const getFolders = async (): Promise => { + try { + const response = await fetch("/api/user/folder"); + if (!response.ok) { + throw new Error("Failed to fetch folders"); + } + return await response.json(); + } catch (error) { + console.error("Error fetching folders:", error); + return []; + } + }; + + const getFilesIndexingStatus = async ( + fileIds: number[] + ): Promise> => { + try { + const queryParams = fileIds.map((id) => `file_ids=${id}`).join("&"); + const response = await fetch( + `/api/user/file/indexing-status?${queryParams}` + ); + + if (!response.ok) { + throw new Error("Failed to fetch indexing status"); + } + + return await response.json(); + } catch (error) { + console.error("Error fetching indexing status:", error); + return {}; + } + }; + + const renameFile = useCallback( + async (fileId: number, newName: string) => { + try { + await documentsService.renameItem(fileId, newName, false); + await refreshFolders(); + } catch (error) { + console.error("Failed to rename file:", error); + throw error; + } + }, + [refreshFolders] + ); + + const renameFolder = useCallback( + async (folderId: number, newName: string) => { + try { + await documentsService.renameItem(folderId, newName, true); + await refreshFolders(); + } catch (error) { + console.error("Failed to rename folder:", error); + throw error; + } + }, + [refreshFolders] + ); + + const value: DocumentsContextType = { + files: folders.map((folder) => folder.files).flat(), + folders, + currentFolder, + presentingDocument, + searchQuery, + page, + isLoading, + error, + selectedFiles, + selectedFolders, + addSelectedFile, + removeSelectedFile, + addSelectedFolder, + removeSelectedFolder, + clearSelectedItems, + setSelectedFiles, + setSelectedFolders, + refreshFolders, + createFolder, + deleteItem, + moveItem, + renameFile, + renameFolder, + uploadFile, + setCurrentFolder, + setPresentingDocument, + setSearchQuery, + setPage, + getFilesIndexingStatus, + getFolderDetails, + downloadItem, + renameItem, + createFileFromLink, + handleUpload, + refreshFolderDetails, + getFolders, + folderDetails, + updateFolderDetails, + currentMessageFiles, + setCurrentMessageFiles, + }; + + return ( + + {children} + + ); +}; + +export const useDocumentsContext = () => { + const context = useContext(DocumentsContext); + if (context === undefined) { + throw new Error("useDocuments must be used within a DocumentsProvider"); + } + return context; +}; diff --git a/web/src/app/chat/my-documents/MyDocumenItem.tsx b/web/src/app/chat/my-documents/MyDocumenItem.tsx new file mode 100644 index 0000000000..e8c0dcb877 --- /dev/null +++ b/web/src/app/chat/my-documents/MyDocumenItem.tsx @@ -0,0 +1,342 @@ +import React, { useEffect, useRef, useState } from "react"; +import { + FolderIcon, + FileIcon, + DownloadIcon, + TrashIcon, + PencilIcon, + InfoIcon, + CheckIcon, + XIcon, +} from "lucide-react"; + +interface FolderItemProps { + folder: { name: string; id: number }; + onFolderClick: (folderId: number) => void; + onDeleteItem: (itemId: number, isFolder: boolean) => void; + onMoveItem: (folderId: number) => void; + editingItem: { id: number; name: string; isFolder: boolean } | null; + setEditingItem: React.Dispatch< + React.SetStateAction<{ id: number; name: string; isFolder: boolean } | null> + >; + handleRename: (id: number, newName: string, isFolder: boolean) => void; + onDragStart: ( + e: React.DragEvent, + item: { id: number; isFolder: boolean; name: string } + ) => void; + onDrop: (e: React.DragEvent, targetFolderId: number) => void; +} + +export function FolderItem({ + folder, + onFolderClick, + onDeleteItem, + onMoveItem, + editingItem, + setEditingItem, + handleRename, + onDragStart, + onDrop, +}: FolderItemProps) { + const [showMenu, setShowMenu] = useState(undefined); + const [newName, setNewName] = useState(folder.name); + + const isEditing = + editingItem && editingItem.id === folder.id && editingItem.isFolder; + + const folderItemRef = useRef(null); + + const handleContextMenu = (e: React.MouseEvent) => { + e.preventDefault(); + const xPos = + e.clientX - folderItemRef.current?.getBoundingClientRect().left! - 40; + setShowMenu(xPos); + }; + + const startEditing = () => { + setEditingItem({ id: folder.id, name: folder.name, isFolder: true }); + setNewName(folder.name); + setShowMenu(undefined); + }; + + const submitRename = (e: React.MouseEvent) => { + e.stopPropagation(); + handleRename(folder.id, newName, true); + }; + + const cancelEditing = (e: React.MouseEvent) => { + e.stopPropagation(); + setEditingItem(null); + setNewName(folder.name); + }; + + useEffect(() => { + document.addEventListener("click", (e) => { + setShowMenu(undefined); + }); + return () => { + document.removeEventListener("click", () => {}); + }; + }, [showMenu]); + + return ( +
!isEditing && onFolderClick(folder.id)} + onContextMenu={handleContextMenu} + draggable={!isEditing} + onDragStart={(e) => + onDragStart(e, { id: folder.id, isFolder: true, name: folder.name }) + } + onDragOver={(e) => e.preventDefault()} + onDrop={(e) => onDrop(e, folder.id)} + > +
+ + {isEditing ? ( +
+ e.stopPropagation()} + type="text" + value={newName} + onChange={(e) => { + e.stopPropagation(); + setNewName(e.target.value); + }} + className="border rounded px-2 py-1 mr-2" + autoFocus + /> + + +
+ ) : ( + {folder.name} + )} +
+ {showMenu && !isEditing && ( +
+ + + +
+ )} +
+ ); +} + +interface FileItemProps { + file: { name: string; id: number; document_id: string }; + onDeleteItem: (itemId: number, isFolder: boolean) => void; + onDownloadItem: (documentId: string) => void; + onMoveItem: (fileId: number) => void; + editingItem: { id: number; name: string; isFolder: boolean } | null; + setEditingItem: React.Dispatch< + React.SetStateAction<{ id: number; name: string; isFolder: boolean } | null> + >; + setPresentingDocument: ( + document_id: string, + semantic_identifier: string + ) => void; + handleRename: (fileId: number, newName: string, isFolder: boolean) => void; + onDragStart: ( + e: React.DragEvent, + item: { id: number; isFolder: boolean; name: string } + ) => void; +} + +export function FileItem({ + setPresentingDocument, + file, + onDeleteItem, + onDownloadItem, + onMoveItem, + editingItem, + setEditingItem, + handleRename, + onDragStart, +}: FileItemProps) { + const [showMenu, setShowMenu] = useState(); + const [newFileName, setNewFileName] = useState(file.name); + + const isEditing = + editingItem && editingItem.id === file.id && !editingItem.isFolder; + + const fileItemRef = useRef(null); + const handleContextMenu = (e: React.MouseEvent) => { + e.preventDefault(); + const xPos = + e.clientX - fileItemRef.current?.getBoundingClientRect().left! - 40; + setShowMenu(xPos); + }; + + useEffect(() => { + document.addEventListener("click", (e) => { + if (fileItemRef.current?.contains(e.target as Node)) { + return; + } + setShowMenu(undefined); + }); + document.addEventListener("contextmenu", (e) => { + if (fileItemRef.current?.contains(e.target as Node)) { + return; + } + setShowMenu(undefined); + }); + return () => { + document.removeEventListener("click", () => {}); + document.removeEventListener("contextmenu", () => {}); + }; + }, [showMenu]); + + const startEditing = () => { + setEditingItem({ id: file.id, name: file.name, isFolder: false }); + setNewFileName(file.name); + setShowMenu(undefined); + }; + + const submitRename = (e: React.MouseEvent) => { + e.stopPropagation(); + handleRename(file.id, newFileName, false); + }; + + const cancelEditing = (e: React.MouseEvent) => { + e.stopPropagation(); + setEditingItem(null); + setNewFileName(file.name); + }; + + return ( +
+ onDragStart(e, { id: file.id, isFolder: false, name: file.name }) + } + > + + +
+ ) : ( +

{file.name}

+ )} + + {showMenu && !isEditing && ( +
+ + + + +
+ )} +
+ ); +} diff --git a/web/src/app/chat/my-documents/MyDocuments.tsx b/web/src/app/chat/my-documents/MyDocuments.tsx new file mode 100644 index 0000000000..f246fb8b71 --- /dev/null +++ b/web/src/app/chat/my-documents/MyDocuments.tsx @@ -0,0 +1,476 @@ +"use client"; + +import React, { useMemo, useState, useTransition } from "react"; +import { useRouter, useSearchParams } from "next/navigation"; +import { + Plus, + FolderOpen, + MessageSquare, + ArrowUp, + ArrowDown, +} from "lucide-react"; +import { Button } from "@/components/ui/button"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { SharedFolderItem } from "./components/SharedFolderItem"; +import CreateEntityModal from "@/components/modals/CreateEntityModal"; +import { useDocumentsContext } from "./DocumentsContext"; +import TextView from "@/components/chat/TextView"; +import { TokenDisplay } from "@/components/TokenDisplay"; +import { useChatContext } from "@/components/context/ChatContext"; +import { + PDFIcon, + TXTIcon, + DOCIcon, + HTMLIcon, + JSONIcon, + ImagesIcon, + XMLIcon, +} from "@/components/icons/icons"; + +enum SortType { + TimeCreated = "Time Created", + Alphabetical = "Alphabetical", + Tokens = "Tokens", +} + +enum SortDirection { + Ascending = "asc", + Descending = "desc", +} + +const SkeletonLoader = () => ( +
+
+
+
+
+
+
+
+
+
+
+
+); + +export default function MyDocuments() { + const { + folders, + currentFolder, + presentingDocument, + searchQuery, + page, + refreshFolders, + createFolder, + deleteItem, + moveItem, + isLoading, + downloadItem, + renameItem, + setCurrentFolder, + setPresentingDocument, + setSearchQuery, + setPage, + } = useDocumentsContext(); + + const [sortType, setSortType] = useState(SortType.TimeCreated); + const [sortDirection, setSortDirection] = useState( + SortDirection.Descending + ); + const pageLimit = 10; + const searchParams = useSearchParams(); + const router = useRouter(); + const { popup, setPopup } = usePopup(); + const [isCreateFolderOpen, setIsCreateFolderOpen] = useState(false); + const [isPending, startTransition] = useTransition(); + const [hoveredColumn, setHoveredColumn] = useState(null); + + const handleSortChange = (newSortType: SortType) => { + if (sortType === newSortType) { + setSortDirection( + sortDirection === SortDirection.Ascending + ? SortDirection.Descending + : SortDirection.Ascending + ); + } else { + setSortType(newSortType); + setSortDirection(SortDirection.Descending); + } + }; + + const handleFolderClick = (id: number) => { + startTransition(() => { + router.push(`/chat/my-documents/${id}`); + setPage(1); + setCurrentFolder(id); + }); + }; + + const handleCreateFolder = async (name: string) => { + try { + const folderResponse = await createFolder(name); + startTransition(() => { + setPage(1); + setIsCreateFolderOpen(false); + setCurrentFolder(folderResponse.id); + }); + } catch (error) { + console.error("Error creating folder:", error); + setPopup({ + message: + error instanceof Error + ? error.message + : "Failed to create knowledge group", + type: "error", + }); + } + }; + + const handleDeleteItem = async (itemId: number, isFolder: boolean) => { + if (!isFolder) { + // For files, keep the old confirmation + const confirmDelete = window.confirm( + `Are you sure you want to delete this file?` + ); + + if (confirmDelete) { + try { + await deleteItem(itemId, isFolder); + setPopup({ + message: `File deleted successfully`, + type: "success", + }); + await refreshFolders(); + } catch (error) { + console.error("Error deleting item:", error); + setPopup({ + message: `Failed to delete file`, + type: "error", + }); + } + } + } + + // If it's a folder, the SharedFolderItem component will handle it + }; + + const handleMoveItem = async ( + itemId: number, + currentFolderId: number | null, + isFolder: boolean + ) => { + const availableFolders = folders + .filter((folder) => folder.id !== itemId) + .map((folder) => `${folder.id}: ${folder.name}`) + .join("\n"); + + const promptMessage = `Enter the ID of the destination folder:\n\nAvailable folders:\n${availableFolders}\n\nEnter 0 to move to the root folder.`; + const destinationFolderId = prompt(promptMessage); + + if (destinationFolderId !== null) { + const newFolderId = parseInt(destinationFolderId, 10); + if (isNaN(newFolderId)) { + setPopup({ + message: "Invalid folder ID", + type: "error", + }); + return; + } + + try { + await moveItem( + itemId, + newFolderId === 0 ? null : newFolderId, + isFolder + ); + setPopup({ + message: `${ + isFolder ? "Knowledge Group" : "File" + } moved successfully`, + type: "success", + }); + await refreshFolders(); + } catch (error) { + console.error("Error moving item:", error); + setPopup({ + message: "Failed to move item", + type: "error", + }); + } + } + }; + + const handleDownloadItem = async (documentId: string) => { + try { + await downloadItem(documentId); + } catch (error) { + console.error("Error downloading file:", error); + setPopup({ + message: "Failed to download file", + type: "error", + }); + } + }; + + const onRenameItem = async ( + itemId: number, + currentName: string, + isFolder: boolean + ) => { + const newName = prompt( + `Enter new name for ${isFolder ? "Knowledge Group" : "File"}:`, + currentName + ); + if (newName && newName !== currentName) { + try { + await renameItem(itemId, newName, isFolder); + setPopup({ + message: `${ + isFolder ? "Knowledge Group" : "File" + } renamed successfully`, + type: "success", + }); + await refreshFolders(); + } catch (error) { + console.error("Error renaming item:", error); + setPopup({ + message: `Failed to rename ${isFolder ? "Knowledge Group" : "File"}`, + type: "error", + }); + } + } + }; + + const filteredFolders = useMemo(() => { + return folders + .filter( + (folder) => + folder.name.toLowerCase().includes(searchQuery.toLowerCase()) || + folder.description.toLowerCase().includes(searchQuery.toLowerCase()) + ) + .sort((a, b) => { + let comparison = 0; + + if (sortType === SortType.TimeCreated) { + comparison = + new Date(b.created_at).getTime() - new Date(a.created_at).getTime(); + } else if (sortType === SortType.Alphabetical) { + comparison = a.name.localeCompare(b.name); + } else if (sortType === SortType.Tokens) { + const aTokens = a.files.reduce( + (acc, file) => acc + (file.token_count || 0), + 0 + ); + const bTokens = b.files.reduce( + (acc, file) => acc + (file.token_count || 0), + 0 + ); + comparison = bTokens - aTokens; + } + + return sortDirection === SortDirection.Ascending + ? -comparison + : comparison; + }); + }, [folders, searchQuery, sortType, sortDirection]); + + const renderSortIndicator = (columnType: SortType) => { + if (sortType !== columnType) return null; + + return sortDirection === SortDirection.Ascending ? ( + + ) : ( + + ); + }; + + const renderHoverIndicator = (columnType: SortType) => { + if (sortType === columnType || hoveredColumn !== columnType) return null; + + return ; + }; + + const handleStartChat = () => { + router.push(`/chat?allMyDocuments=true`); + }; + + const totalTokens = folders.reduce( + (acc, folder) => + acc + + (folder.files.reduce((acc, file) => acc + (file.token_count || 0), 0) || + 0), + 0 + ); + const { llmProviders } = useChatContext(); + + const modelDescriptors = llmProviders.flatMap((provider) => + Object.entries(provider.model_token_limits ?? {}).map( + ([modelName, maxTokens]) => ({ + modelName, + provider: provider.provider, + maxTokens, + }) + ) + ); + + const selectedModel = modelDescriptors[0] || { + modelName: "Unknown", + provider: "Unknown", + maxTokens: 0, + }; + const maxTokens = selectedModel.maxTokens; + const tokenPercentage = (totalTokens / maxTokens) * 100; + + return ( +
+
+

+ My Documents +

+
+ + + New Folder + + } + hideLabel + /> +
+
+ +
+
+
+
+ + + +
+ setSearchQuery(e.target.value)} + /> +
+
+ + {presentingDocument && ( + setPresentingDocument(null)} + /> + )} + {popup} +
+
+ + +
+
+ +
+ {isLoading ? ( + + ) : filteredFolders.length > 0 ? ( +
+
+ + + +
+
+ {filteredFolders.map((folder) => ( + acc + (file.token_count || 0), + 0 + ), + }} + onClick={handleFolderClick} + description={folder.description} + lastUpdated={folder.created_at} + onRename={() => onRenameItem(folder.id, folder.name, true)} + onDelete={() => handleDeleteItem(folder.id, true)} + onMove={() => + handleMoveItem(folder.id, currentFolder, true) + } + /> + ))} +
+
+ ) : ( +
+ +

+ No items found +

+
+ )} +
+
+
+ ); +} diff --git a/web/src/app/chat/my-documents/WrappedDocuments.tsx b/web/src/app/chat/my-documents/WrappedDocuments.tsx new file mode 100644 index 0000000000..78f18d1558 --- /dev/null +++ b/web/src/app/chat/my-documents/WrappedDocuments.tsx @@ -0,0 +1,21 @@ +"use client"; + +import MyDocuments from "./MyDocuments"; +import { BackButton } from "@/components/BackButton"; +import { useRouter } from "next/navigation"; + +export default function WrappedUserDocuments() { + const router = useRouter(); + return ( +
+
+ { + router.push("/chat"); + }} + /> +
+ +
+ ); +} diff --git a/web/src/app/chat/my-documents/[id]/UserFileContent.tsx b/web/src/app/chat/my-documents/[id]/UserFileContent.tsx new file mode 100644 index 0000000000..bd9dcf7ed0 --- /dev/null +++ b/web/src/app/chat/my-documents/[id]/UserFileContent.tsx @@ -0,0 +1,7 @@ +import { useDocumentsContext } from "../DocumentsContext"; + +export default function UserFolder({ userFileId }: { userFileId: string }) { + const { folders } = useDocumentsContext(); + + return
{folders.length}
; +} diff --git a/web/src/app/chat/my-documents/[id]/UserFolder.tsx b/web/src/app/chat/my-documents/[id]/UserFolder.tsx new file mode 100644 index 0000000000..197db1bd58 --- /dev/null +++ b/web/src/app/chat/my-documents/[id]/UserFolder.tsx @@ -0,0 +1,25 @@ +"use client"; + +import SidebarWrapper from "@/app/assistants/SidebarWrapper"; +import UserFolderContent from "./UserFolderContent"; +import { BackButton } from "@/components/BackButton"; +import { useRouter } from "next/navigation"; +export default function WrappedUserFolders({ + userFileId, +}: { + userFileId: string; +}) { + const router = useRouter(); + return ( +
+
+ { + router.push("/chat/my-documents"); + }} + /> +
+ +
+ ); +} diff --git a/web/src/app/chat/my-documents/[id]/UserFolderContent.tsx b/web/src/app/chat/my-documents/[id]/UserFolderContent.tsx new file mode 100644 index 0000000000..4f4c3e0975 --- /dev/null +++ b/web/src/app/chat/my-documents/[id]/UserFolderContent.tsx @@ -0,0 +1,796 @@ +import React, { useEffect, useState, useRef } from "react"; +import { useRouter } from "next/navigation"; +import { + ChevronRight, + MessageSquare, + ArrowUp, + ArrowDown, + Plus, + Trash, + Upload, + AlertCircle, + X, +} from "lucide-react"; +import { useDocumentsContext } from "../DocumentsContext"; +import { useChatContext } from "@/components/context/ChatContext"; +import { Button } from "@/components/ui/button"; +import { DocumentList } from "./components/DocumentList"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { usePopupFromQuery } from "@/components/popup/PopupFromQuery"; +import { Input } from "@/components/ui/input"; +import { DeleteEntityModal } from "@/components/DeleteEntityModal"; +import { MoveFolderModal } from "@/components/MoveFolderModal"; +import { FolderResponse } from "../DocumentsContext"; +import { getDisplayNameForModel } from "@/lib/hooks"; +import { TokenDisplay } from "@/components/TokenDisplay"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; +import CreateEntityModal from "@/components/modals/CreateEntityModal"; +import { CleanupModal, CleanupPeriod } from "@/components/CleanupModal"; +import { bulkCleanupFiles } from "../api"; + +// Define allowed file extensions +const ALLOWED_FILE_TYPES = [ + // Documents + ".pdf", + ".doc", + ".docx", + ".txt", + ".rtf", + ".odt", + // Spreadsheets + ".csv", + ".xls", + ".xlsx", + ".ods", + // Presentations + ".ppt", + ".pptx", + ".odp", + // Images + ".jpg", + ".jpeg", + ".png", + ".gif", + ".bmp", + ".svg", + ".webp", + // Web + ".html", + ".htm", + ".xml", + ".json", + ".md", + ".markdown", + // Archives (if supported by your system) + ".zip", + ".rar", + ".7z", + ".tar", + ".gz", + // Code + ".js", + ".jsx", + ".ts", + ".tsx", + ".py", + ".java", + ".c", + ".cpp", + ".cs", + ".php", + ".rb", + ".go", + ".swift", + ".html", + ".css", + ".scss", + ".sass", + ".less", +]; + +// Function to check if a file type is allowed +const isFileTypeAllowed = (file: File): boolean => { + const fileName = file.name.toLowerCase(); + const fileExtension = fileName.substring(fileName.lastIndexOf(".")); + return ALLOWED_FILE_TYPES.includes(fileExtension); +}; + +// Filter files to only include allowed types +const filterAllowedFiles = ( + files: File[] +): { allowed: File[]; rejected: string[] } => { + const allowed: File[] = []; + const rejected: string[] = []; + + files.forEach((file) => { + if (isFileTypeAllowed(file)) { + allowed.push(file); + } else { + rejected.push(file.name); + } + }); + + return { allowed, rejected }; +}; + +// Define enums outside the component and export them +export enum SortType { + TimeCreated = "Time Created", + Alphabetical = "Alphabetical", + Tokens = "Tokens", +} + +export enum SortDirection { + Ascending = "asc", + Descending = "desc", +} + +// Define a type for tracking file upload progress +interface UploadProgress { + fileName: string; + progress: number; +} + +export default function UserFolderContent({ folderId }: { folderId: number }) { + const router = useRouter(); + const { llmProviders } = useChatContext(); + const { popup, setPopup } = usePopup(); + const { + folderDetails, + getFolderDetails, + downloadItem, + renameItem, + deleteItem, + createFileFromLink, + handleUpload, + refreshFolderDetails, + getFolders, + moveItem, + updateFolderDetails, + } = useDocumentsContext(); + + const [editingItemId, setEditingItemId] = useState(null); + const [newItemName, setNewItemName] = useState(""); + const [editingDescription, setEditingDescription] = useState(false); + const [newDescription, setNewDescription] = useState(""); + const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false); + const [deleteItemId, setDeleteItemId] = useState(null); + const [deleteItemType, setDeleteItemType] = useState<"file" | "folder">( + "file" + ); + const [deleteItemName, setDeleteItemName] = useState(""); + const [isMoveModalOpen, setIsMoveModalOpen] = useState(false); + const [folders, setFolders] = useState([]); + const [searchQuery, setSearchQuery] = useState(""); + const [sortType, setSortType] = useState(SortType.TimeCreated); + const [sortDirection, setSortDirection] = useState( + SortDirection.Descending + ); + const [hoveredColumn, setHoveredColumn] = useState(null); + const [isDraggingOver, setIsDraggingOver] = useState(false); + const pageContainerRef = useRef(null); + + const modelDescriptors = llmProviders.flatMap((provider) => + Object.entries(provider.model_token_limits ?? {}).map( + ([modelName, maxTokens]) => ({ + modelName, + provider: provider.provider, + maxTokens, + }) + ) + ); + + const { popup: folderCreatedPopup } = usePopupFromQuery({ + "folder-created": { + message: `Folder created successfully`, + type: "success", + }, + }); + const [selectedModel, setSelectedModel] = useState(modelDescriptors[0]); + + const [uploadingFiles, setUploadingFiles] = useState([]); + const [uploadProgress, setUploadProgress] = useState([]); + const [isCleanupModalOpen, setIsCleanupModalOpen] = useState(false); + const [invalidFiles, setInvalidFiles] = useState([]); + const [showInvalidFileMessage, setShowInvalidFileMessage] = useState(false); + + useEffect(() => { + if (!folderDetails) { + getFolderDetails(folderId); + } + }, [folderId, folderDetails, getFolderDetails]); + + useEffect(() => { + const fetchFolders = async () => { + try { + const fetchedFolders = await getFolders(); + setFolders(fetchedFolders); + } catch (error) { + console.error("Error fetching folders:", error); + } + }; + + fetchFolders(); + }, []); + + // Hide invalid file message after 5 seconds + useEffect(() => { + if (showInvalidFileMessage) { + // Remove the auto-hide timer + return () => {}; + } + }, [showInvalidFileMessage]); + + const handleBack = () => { + router.push("/chat/my-documents"); + }; + if (!folderDetails) { + return ( +
+
+

+ No Folder Found +

+

+ The requested folder does not exist or you dont have permission to + view it. +

+ +
+
+ ); + } + + const totalTokens = folderDetails.files.reduce( + (acc, file) => acc + (file.token_count || 0), + 0 + ); + const maxTokens = selectedModel.maxTokens; + const tokenPercentage = (totalTokens / maxTokens) * 100; + + const handleStartChat = () => { + router.push(`/chat?userFolderId=${folderId}`); + }; + + const handleCreateFileFromLink = async (url: string) => { + await createFileFromLink(url, folderId); + }; + + const handleRenameItem = async ( + itemId: number, + currentName: string, + isFolder: boolean + ) => { + setEditingItemId(itemId); + setNewItemName(currentName); + }; + + const handleSaveRename = async (itemId: number, isFolder: boolean) => { + if (newItemName && newItemName !== folderDetails.name) { + try { + await renameItem(itemId, newItemName, isFolder); + setPopup({ + message: `${isFolder ? "Folder" : "File"} renamed successfully`, + type: "success", + }); + await refreshFolderDetails(); + } catch (error) { + console.error("Error renaming item:", error); + setPopup({ + message: `Failed to rename ${isFolder ? "folder" : "file"}`, + type: "error", + }); + } + } + setEditingItemId(null); + }; + + const handleCancelRename = () => { + setEditingItemId(null); + setNewItemName(""); + }; + + const handleSaveDescription = async () => { + if (folderDetails && newDescription !== folderDetails.description) { + try { + alert( + JSON.stringify({ + id: folderDetails.id, + name: folderDetails.name, + newDescription, + }) + ); + await updateFolderDetails( + folderDetails.id, + folderDetails.name, + newDescription + ); + setPopup({ + message: "Folder description updated successfully", + type: "success", + }); + await refreshFolderDetails(); + } catch (error) { + console.error("Error updating folder description:", error); + setPopup({ + message: "Failed to update folder description", + type: "error", + }); + } + } + setEditingDescription(false); + }; + + const handleCancelDescription = () => { + setEditingDescription(false); + setNewDescription(""); + }; + + const handleDeleteItem = ( + itemId: number, + isFolder: boolean, + itemName: string + ) => { + setDeleteItemId(itemId); + setDeleteItemType(isFolder ? "folder" : "file"); + setDeleteItemName(itemName); + setIsDeleteModalOpen(true); + }; + + const confirmDelete = async () => { + if (deleteItemId !== null) { + try { + await deleteItem(deleteItemId, deleteItemType === "folder"); + setPopup({ + message: `${deleteItemType} deleted successfully`, + type: "success", + }); + await refreshFolderDetails(); + } catch (error) { + console.error("Error deleting item:", error); + setPopup({ + message: `Failed to delete ${deleteItemType}`, + type: "error", + }); + } + } + setIsDeleteModalOpen(false); + }; + + const handleMoveFolder = () => { + setIsMoveModalOpen(true); + }; + + const confirmMove = async (targetFolderId: number) => { + try { + await moveItem(folderId, targetFolderId, true); + setPopup({ + message: "Folder moved successfully", + type: "success", + }); + router.push(`/chat/my-documents/${targetFolderId}`); + } catch (error) { + console.error("Error moving folder:", error); + setPopup({ + message: "Failed to move folder", + type: "error", + }); + } + setIsMoveModalOpen(false); + }; + + const handleMoveFile = async (fileId: number, targetFolderId: number) => { + try { + await moveItem(fileId, targetFolderId, false); + setPopup({ + message: "File moved successfully", + type: "success", + }); + await refreshFolderDetails(); + } catch (error) { + console.error("Error moving file:", error); + setPopup({ + message: "Failed to move file", + type: "error", + }); + } + }; + + const handleSortChange = (newSortType: SortType) => { + if (sortType === newSortType) { + setSortDirection( + sortDirection === SortDirection.Ascending + ? SortDirection.Descending + : SortDirection.Ascending + ); + } else { + setSortType(newSortType); + setSortDirection(SortDirection.Descending); + } + }; + + const renderSortIndicator = (columnType: SortType) => { + if (sortType !== columnType) return null; + + return sortDirection === SortDirection.Ascending ? ( + + ) : ( + + ); + }; + + const renderHoverIndicator = (columnType: SortType) => { + if (sortType === columnType || hoveredColumn !== columnType) return null; + + return ; + }; + + const handleCreateFolder = async (name: string) => { + try { + // await createFolder(name, folderId); + } catch (error) { + console.error("Error creating folder:", error); + } + }; + + // Add new drag and drop handlers + const handlePageDragEnter = (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + if (folderDetails?.id !== -1) { + setIsDraggingOver(true); + } + }; + + const handlePageDragOver = (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + // Keep the isDraggingOver state true while dragging over + if (folderDetails?.id !== -1 && !isDraggingOver) { + setIsDraggingOver(true); + } + }; + + const handlePageDragLeave = (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + + // Only set isDraggingOver to false if we're leaving the container itself + if ( + pageContainerRef.current && + !pageContainerRef.current.contains(e.relatedTarget as Node) + ) { + setIsDraggingOver(false); + } + }; + + // Handle file upload progress tracking + const handleUploadProgress = (fileName: string, progress: number) => { + setUploadProgress((prev) => { + const existing = prev.findIndex((p) => p.fileName === fileName); + if (existing >= 0) { + // Update existing progress + const updated = [...prev]; + updated[existing] = { fileName, progress }; + return updated; + } else { + // Add new file progress + return [...prev, { fileName, progress }]; + } + }); + }; + + // Add drag-drop upload progress tracking + const handlePageDrop = async (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + setIsDraggingOver(false); + + if ( + folderDetails?.id !== -1 && + e.dataTransfer.files && + e.dataTransfer.files.length > 0 + ) { + const files = Array.from(e.dataTransfer.files); + + // Filter out invalid file types + const { allowed, rejected } = filterAllowedFiles(files); + + // Show error message if there are invalid files + if (rejected.length > 0) { + setInvalidFiles(rejected); + setShowInvalidFileMessage(true); + } + + // Only proceed if there are valid files + if (allowed.length > 0) { + // Track uploading files + const fileNames = allowed.map((file) => file.name); + setUploadingFiles((prev) => [...prev, ...fileNames]); + + // Initialize progress for each file + fileNames.forEach((fileName) => { + handleUploadProgress(fileName, 0); + }); + + try { + await handleUpload(allowed); + } catch (error) { + console.error("Error uploading files:", error); + setPopup({ + message: "Failed to upload files", + type: "error", + }); + } + } + } + }; + + // Function to update uploading files that can be called from DocumentList + const updateUploadingFiles = (newUploadingFiles: string[]) => { + setUploadingFiles(newUploadingFiles); + }; + + const handleCleanup = () => { + setIsCleanupModalOpen(true); + }; + + const confirmCleanup = async (period: CleanupPeriod, value: number) => { + try { + let daysOlderThan: number | null = null; + + // Convert the selected period and value to days + if (period === CleanupPeriod.Day) { + daysOlderThan = 1; + } else if (period === CleanupPeriod.Week) { + daysOlderThan = 7; + } else if (period === CleanupPeriod.Month) { + daysOlderThan = 30; + } else if (period === CleanupPeriod.All) { + // All documents, don't set a date filter + daysOlderThan = null; + } + + const result = await bulkCleanupFiles({ + folder_id: folderId, + days_older_than: daysOlderThan, + }); + + setPopup({ + message: result.message, + type: "success", + }); + + // Refresh folder details to update the UI + await refreshFolderDetails(); + + // Close the modal after successful completion + setIsCleanupModalOpen(false); + } catch (error) { + console.error("Error during cleanup:", error); + setPopup({ + message: "Failed to cleanup files", + type: "error", + }); + // Modal will remain open, user can try again or cancel + } + }; + + return ( +
+ {popup} + {folderCreatedPopup} + + {/* Invalid file message */} + + {/* Add a visual overlay when dragging files */} + {isDraggingOver && ( +
+
+
+ +
+

+ Drop files to upload +

+

+ Files will be uploaded to{" "} + + {folderDetails?.name || "this folder"} + +

+
+
+ )} + + setIsDeleteModalOpen(false)} + onConfirm={confirmDelete} + entityType={deleteItemType} + entityName={deleteItemName} + /> + setIsMoveModalOpen(false)} + onMove={confirmMove} + folders={folders} + currentFolderId={folderId} + /> + + setIsCleanupModalOpen(false)} + onConfirm={confirmCleanup} + /> + +
+
+ + +
+ +
+
+
+ + + +
+ setSearchQuery(e.target.value)} + /> +
+
+ +
+
+ + +
+
+ + { + const blob = await downloadItem(documentId); + const url = URL.createObjectURL(blob); + window.open(url, "_blank"); + }} + onUpload={handleUpload} + onMove={handleMoveFile} + folders={folders} + disabled={folderDetails.id === -1} + editingItemId={editingItemId} + onSaveRename={handleSaveRename} + onCancelRename={handleCancelRename} + newItemName={newItemName} + setNewItemName={setNewItemName} + tokenPercentage={tokenPercentage} + totalTokens={totalTokens} + maxTokens={maxTokens} + selectedModelName={getDisplayNameForModel(selectedModel.modelName)} + searchQuery={searchQuery} + sortType={sortType} + sortDirection={sortDirection} + onSortChange={handleSortChange} + hoveredColumn={hoveredColumn} + setHoveredColumn={setHoveredColumn} + renderSortIndicator={renderSortIndicator} + renderHoverIndicator={renderHoverIndicator} + externalUploadingFiles={uploadingFiles} + updateUploadingFiles={updateUploadingFiles} + onUploadProgress={handleUploadProgress} + invalidFiles={invalidFiles} + showInvalidFileMessage={showInvalidFileMessage} + setShowInvalidFileMessage={setShowInvalidFileMessage} + /> +
+
+ ); +} diff --git a/web/src/app/chat/my-documents/[id]/components/DocumentList.tsx b/web/src/app/chat/my-documents/[id]/components/DocumentList.tsx new file mode 100644 index 0000000000..4a737a59fe --- /dev/null +++ b/web/src/app/chat/my-documents/[id]/components/DocumentList.tsx @@ -0,0 +1,775 @@ +import React, { useState, useEffect, useCallback, useMemo } from "react"; +import { + FileResponse, + FolderResponse, + useDocumentsContext, +} from "../../DocumentsContext"; +import { FileListItem } from "../../components/FileListItem"; +import { Button } from "@/components/ui/button"; +import { + Loader2, + AlertCircle, + X, + RefreshCw, + Trash2, + MoreHorizontal, +} from "lucide-react"; +import TextView from "@/components/chat/TextView"; +import { Input } from "@/components/ui/input"; +import { FileUploadSection } from "./upload/FileUploadSection"; +import { SortType, SortDirection } from "../UserFolderContent"; +import { CircularProgress } from "./upload/CircularProgress"; +import { + Popover, + PopoverContent, + PopoverTrigger, +} from "@/components/ui/popover"; + +// Define a type for uploading files that includes progress +interface UploadingFile { + name: string; + progress: number; +} + +// Add interface for failed uploads +interface FailedUpload { + name: string; + error: string; + isPopoverOpen: boolean; +} + +interface DocumentListProps { + files: FileResponse[]; + onRename: ( + itemId: number, + currentName: string, + isFolder: boolean + ) => Promise; + onDelete: (itemId: number, isFolder: boolean, itemName: string) => void; + onDownload: (documentId: string) => Promise; + onUpload: (files: File[]) => void; + onMove: (fileId: number, targetFolderId: number) => Promise; + folders: FolderResponse[]; + isLoading: boolean; + disabled?: boolean; + editingItemId: number | null; + onSaveRename: (itemId: number, isFolder: boolean) => Promise; + onCancelRename: () => void; + newItemName: string; + setNewItemName: React.Dispatch>; + folderId: number; + tokenPercentage?: number; + totalTokens?: number; + maxTokens?: number; + selectedModelName?: string; + searchQuery?: string; + sortType?: SortType; + sortDirection?: SortDirection; + onSortChange?: (newSortType: SortType) => void; + hoveredColumn?: SortType | null; + setHoveredColumn?: React.Dispatch>; + renderSortIndicator?: (columnType: SortType) => JSX.Element | null; + renderHoverIndicator?: (columnType: SortType) => JSX.Element | null; + externalUploadingFiles?: string[]; + updateUploadingFiles?: (newUploadingFiles: string[]) => void; + onUploadProgress?: (fileName: string, progress: number) => void; + invalidFiles?: string[]; + showInvalidFileMessage?: boolean; + setShowInvalidFileMessage?: React.Dispatch>; +} + +// Animated dots component for the indexing status +export const AnimatedDots: React.FC = () => { + const [dots, setDots] = useState(1); + + useEffect(() => { + const interval = setInterval(() => { + setDots((prev) => (prev === 3 ? 1 : prev + 1)); + }, 500); + + return () => clearInterval(interval); + }, []); + + return {".".repeat(dots)}; +}; + +export const DocumentList: React.FC = ({ + files, + onRename, + onDelete, + onDownload, + onUpload, + onMove, + folders, + isLoading, + editingItemId, + onSaveRename, + onCancelRename, + newItemName, + setNewItemName, + folderId, + tokenPercentage, + totalTokens, + maxTokens, + selectedModelName, + searchQuery = "", + sortType, + sortDirection, + onSortChange, + hoveredColumn, + setHoveredColumn, + renderSortIndicator, + renderHoverIndicator, + externalUploadingFiles = [], + updateUploadingFiles, + onUploadProgress, + invalidFiles = [], + showInvalidFileMessage = false, + setShowInvalidFileMessage, +}) => { + const [presentingDocument, setPresentingDocument] = + useState(null); + const openDocument = (file: FileResponse) => { + if (file.link_url) { + window.open(file.link_url, "_blank"); + } else { + setPresentingDocument(file); + } + }; + const [uploadingFiles, setUploadingFiles] = useState([]); + const [completedFiles, setCompletedFiles] = useState([]); + // Add state for failed uploads + const [failedUploads, setFailedUploads] = useState([]); + const [refreshInterval, setRefreshInterval] = useState( + null + ); + + // Merge external uploading files with local ones + useEffect(() => { + if (externalUploadingFiles.length > 0) { + setUploadingFiles((prev) => { + // Convert string filenames to UploadingFile objects with 0 progress + const newFiles = externalUploadingFiles + .filter( + (name) => + !prev.some((file) => file.name === name) && + !completedFiles.includes(name) + ) + .map((name) => ({ name, progress: 0 })); + + return [...prev, ...newFiles]; + }); + startRefreshInterval(); + } + }, [externalUploadingFiles, completedFiles]); + + const { createFileFromLink } = useDocumentsContext(); + + const handleCreateFileFromLink = async (url: string) => { + setUploadingFiles((prev) => [...prev, { name: url, progress: 0 }]); + + try { + await createFileFromLink(url, folderId); + startRefreshInterval(); + } catch (error) { + console.error("Error creating file from link:", error); + // Remove from uploading files + setUploadingFiles((prev) => prev.filter((file) => file.name !== url)); + // Add to failed uploads with isPopoverOpen initialized to false + setFailedUploads((prev) => [ + ...prev, + { + name: url, + error: + error instanceof Error ? error.message : "Failed to upload file", + isPopoverOpen: false, + }, + ]); + } + }; + + // Add handler for retrying failed uploads + const handleRetryUpload = async (url: string) => { + // Remove from failed uploads + setFailedUploads((prev) => prev.filter((file) => file.name !== url)); + + // Add back to uploading files + setUploadingFiles((prev) => [...prev, { name: url, progress: 0 }]); + + try { + await createFileFromLink(url, folderId); + startRefreshInterval(); + } catch (error) { + console.error("Error retrying file upload from link:", error); + // Remove from uploading files again + setUploadingFiles((prev) => prev.filter((file) => file.name !== url)); + // Add back to failed uploads with isPopoverOpen initialized to false + setFailedUploads((prev) => [ + ...prev, + { + name: url, + error: + error instanceof Error ? error.message : "Failed to upload file", + isPopoverOpen: false, + }, + ]); + } + }; + + // Add handler for deleting failed uploads + const handleDeleteFailedUpload = (url: string) => { + setFailedUploads((prev) => prev.filter((file) => file.name !== url)); + }; + + const handleFileUpload = (files: File[]) => { + const fileObjects = files.map((file) => ({ + name: file.name, + progress: 0, + })); + + setUploadingFiles((prev) => [...prev, ...fileObjects]); + onUpload(files); + startRefreshInterval(); + }; + + // Filter files based on search query + const filteredFiles = searchQuery + ? files.filter((file) => + file.name.toLowerCase().includes(searchQuery.toLowerCase()) + ) + : files; + + // Sort files if sorting props are provided + const sortedFiles = + sortType && sortDirection + ? [...filteredFiles].sort((a, b) => { + let comparison = 0; + + if (sortType === SortType.TimeCreated) { + const dateA = a.created_at ? new Date(a.created_at).getTime() : 0; + const dateB = b.created_at ? new Date(b.created_at).getTime() : 0; + comparison = dateB - dateA; + } else if (sortType === SortType.Alphabetical) { + comparison = a.name.localeCompare(b.name); + } else if (sortType === SortType.Tokens) { + comparison = (b.token_count || 0) - (a.token_count || 0); + } + + return sortDirection === SortDirection.Ascending + ? -comparison + : comparison; + }) + : filteredFiles; + + // Add a function to mark a file as complete + const markFileComplete = (fileName: string) => { + // Update progress to 100% + setUploadingFiles((prev) => + prev.map((file) => + file.name === fileName ? { ...file, progress: 100 } : file + ) + ); + + // Add to completed files + setCompletedFiles((prev) => [...prev, fileName]); + + // Remove from uploading files after showing 100% for a moment + setTimeout(() => { + setUploadingFiles((prev) => + prev.filter((file) => file.name !== fileName) + ); + }, 2000); // Show complete state for 2 seconds + + // Remove from completed files after a longer delay + setTimeout(() => { + setCompletedFiles((prev) => prev.filter((name) => name !== fileName)); + }, 3000); + }; + + const startRefreshInterval = () => { + if (refreshInterval) { + clearInterval(refreshInterval); + } + + // Add a timestamp to track when we started refreshing + const startTime = Date.now(); + const MAX_REFRESH_TIME = 30000; // 30 seconds max for any upload to complete + + const interval = setInterval(() => { + // Check if we've been waiting too long, if so, clear uploading state + if (Date.now() - startTime > MAX_REFRESH_TIME) { + setUploadingFiles([]); + setCompletedFiles([]); + if (updateUploadingFiles) { + updateUploadingFiles([]); + } + clearInterval(interval); + setRefreshInterval(null); + return; + } + + // Simulate progress for files that don't have real progress tracking yet + setUploadingFiles((prev) => + prev.map((file) => { + // Don't update files that are already complete + if (completedFiles.includes(file.name) || file.progress >= 100) { + return file; + } + + // Slow down progress as it approaches completion for more realistic feel + let increment; + if (file.progress < 70) { + // Normal increment for first 70% + increment = Math.floor(Math.random() * 10) + 5; + } else if (file.progress < 90) { + // Slower increment between 70-90% + increment = Math.floor(Math.random() * 5) + 2; + } else { + // Very slow for final 10% + increment = Math.floor(Math.random() * 2) + 1; + } + + const newProgress = Math.min(file.progress + increment, 99); // Cap at 99% until confirmed + return { ...file, progress: newProgress }; + }) + ); + + const allFilesUploaded = uploadingFiles.every((uploadingFile) => { + // Skip files already marked as complete + if (completedFiles.includes(uploadingFile.name)) { + return true; + } + + if (uploadingFile.name.startsWith("http")) { + // For URL uploads, extract the domain and check for files containing it + try { + // Get the hostname (domain) from the URL + const url = new URL(uploadingFile.name); + const hostname = url.hostname; + alert("checking for " + hostname); + alert(JSON.stringify(files)); + + // Look for recently added files that might match this URL + const isUploaded = files.some( + (file) => + // Check for hostname in filename + file.name.toLowerCase().includes(hostname.toLowerCase()) || + // Check for recently created files + (file.lastModified && + new Date(file.lastModified).getTime() > startTime - 60000) + ); + + if (isUploaded) { + // Mark as complete if found in files list + markFileComplete(uploadingFile.name); + } + return isUploaded; + } catch (e) { + console.error("Failed to parse URL:", e); + return false; + } + } + + // For regular file uploads, check if filename exists in the files list + const isUploaded = files.some( + (file) => file.name === uploadingFile.name + ); + if (isUploaded) { + // Mark as complete if found in files list + markFileComplete(uploadingFile.name); + } + return isUploaded; + }); + + if ( + allFilesUploaded && + uploadingFiles.length > 0 && + completedFiles.length === uploadingFiles.length + ) { + // If all files are marked complete and no new uploads are happening, clean up + setTimeout(() => { + setUploadingFiles([]); + setCompletedFiles([]); + if (updateUploadingFiles) { + updateUploadingFiles([]); + } + clearInterval(interval); + setRefreshInterval(null); + }, 2000); + } + }, 1000); // Update every second for smoother animation + + setRefreshInterval(interval); + }; + + useEffect(() => { + if (uploadingFiles.length > 0 && files.length > 0) { + // Filter out any uploading files that now exist in the files list + const remainingUploadingFiles = uploadingFiles.filter((uploadingFile) => { + if (uploadingFile.name.startsWith("http")) { + try { + // For URLs, check if any file contains the hostname + const url = new URL(uploadingFile.name); + const hostname = url.hostname; + const fullUrl = uploadingFile.name; + + return ( + // !files.some((file) => + // file.name.toLowerCase().includes(hostname.toLowerCase()) + // ) && + !files.some( + (file) => + file.link_url && + // (file.link_url + // .toLowerCase() + // .includes(hostname.toLowerCase()) || + file.link_url.toLowerCase() === fullUrl.toLowerCase() + ) + ); + } catch (e) { + console.error("Failed to parse URL:", e); + return true; // Keep in the list if we can't parse + } + } else { + // For regular files, check if the filename exists + return !files.some((file) => file.name === uploadingFile.name); + } + }); + + // Update the uploading files list if there's a change + if (remainingUploadingFiles.length !== uploadingFiles.length) { + setUploadingFiles(remainingUploadingFiles); + + // Also update parent component's state if the function is provided + if (updateUploadingFiles) { + const fileNames = remainingUploadingFiles.map((file) => file.name); + updateUploadingFiles(fileNames); + } + + // If all files are uploaded, clear the refresh interval + if (remainingUploadingFiles.length === 0 && refreshInterval) { + clearInterval(refreshInterval); + setRefreshInterval(null); + } + } + } + }, [files, uploadingFiles, refreshInterval, updateUploadingFiles]); + + useEffect(() => { + return () => { + if (refreshInterval) { + clearInterval(refreshInterval); + } + }; + }, [refreshInterval]); + + const handleUploadComplete = () => { + startRefreshInterval(); + }; + + // Wrap in useCallback to prevent function recreation on each render + const toggleFailedUploadPopover = useCallback( + (index: number, isOpen: boolean) => { + setFailedUploads((prev) => + prev.map((item, i) => + i === index ? { ...item, isPopoverOpen: isOpen } : item + ) + ); + }, + [] + ); + + return ( + <> +
+
+ {presentingDocument && ( + setPresentingDocument(null)} + /> + )} + +
+ {isLoading ? ( + Array.from({ length: 3 }).map((_, index) => ( +
+
+
+
+
+
+
+ )) + ) : ( + <> +
+
+ {onSortChange && setHoveredColumn ? ( + <> + + + + + ) : ( + <> +
Name
+
Created
+
LLM Tokens
+ + )} +
+
+ + {sortedFiles.map((file) => ( +
+ {editingItemId === file.id ? ( +
+
+ setNewItemName(e.target.value)} + className="mr-2" + autoFocus + /> + + +
+
+ ) : ( + openDocument(file)} + status={file.status} + /> + )} +
+ ))} + {uploadingFiles.map((uploadingFile, index) => ( +
+
+
+ {uploadingFile.name.startsWith("http") ? ( + + ) : ( + + )} + + {uploadingFile.name.startsWith("http") + ? `${uploadingFile.name.substring(0, 30)}${ + uploadingFile.name.length > 30 ? "..." : "" + }` + : uploadingFile.name} + +
+
+ - +
+
+ - +
+
+
+ ))} + + {/* Failed uploads row with three dots menu on right */} + {failedUploads.map((failedUpload, index) => ( +
+
+
+ + + {failedUpload.name.startsWith("http") + ? `${failedUpload.name.substring(0, 30)}${ + failedUpload.name.length > 30 ? "..." : "" + }` + : failedUpload.name} + +
+
+ Upload failed +
+
+ + toggleFailedUploadPopover(index, open) + } + > + e.stopPropagation()} + asChild + > +
+ +
+
+ +
+
+

+ Visiting URL failed. +
+ You can retry or remove it from the list +

+
+
+ + +
+
+
+
+
+
+
+ ))} + + {sortedFiles.length === 0 && + uploadingFiles.length === 0 && + failedUploads.length === 0 && ( +
+ {searchQuery + ? "No documents match your search." + : "No documents in this folder yet. Upload files or add URLs to get started."} +
+ )} + + )} +
+
+ +
+ {showInvalidFileMessage && invalidFiles.length > 0 && ( +
+ +
+

+ Unsupported file type{invalidFiles.length > 1 ? "s" : ""} +

+

+ {invalidFiles.length > 1 + ? `The following files cannot be uploaded: ${invalidFiles + .slice(0, 3) + .join(", ")}${ + invalidFiles.length > 3 + ? ` and ${invalidFiles.length - 3} more` + : "" + }` + : `The file "${invalidFiles[0]}" cannot be uploaded.`} +

+
+ +
+ )} +
+ 0} + onUploadComplete={handleUploadComplete} + /> +
+
+
+ + ); +}; diff --git a/web/src/app/chat/my-documents/[id]/components/panels/AddWebsitePanel.tsx b/web/src/app/chat/my-documents/[id]/components/panels/AddWebsitePanel.tsx new file mode 100644 index 0000000000..0078ab7353 --- /dev/null +++ b/web/src/app/chat/my-documents/[id]/components/panels/AddWebsitePanel.tsx @@ -0,0 +1,79 @@ +import React, { useState } from "react"; +import { Link, ChevronDown, ChevronRight } from "lucide-react"; +import { Button } from "@/components/ui/button"; +import { useDocumentsContext } from "../../../DocumentsContext"; + +interface AddWebsitePanelProps { + folderId: number; + onCreateFileFromLink: (url: string, folderId: number) => Promise; +} + +export function AddWebsitePanel({ + folderId, + onCreateFileFromLink, +}: AddWebsitePanelProps) { + const [isOpen, setIsOpen] = useState(false); + const [linkUrl, setLinkUrl] = useState(""); + const [isCreating, setIsCreating] = useState(false); + const { refreshFolderDetails } = useDocumentsContext(); + + const handleCreateFileFromLink = async () => { + if (!linkUrl) return; + setIsCreating(true); + try { + await onCreateFileFromLink(linkUrl, folderId); + setLinkUrl(""); + await refreshFolderDetails(); + } catch (error) { + console.error("Error creating file from link:", error); + } finally { + setIsCreating(false); + } + }; + + return ( +
+
setIsOpen(!isOpen)} + > +
+ + + Add a website + +
+ +
+ + {isOpen && ( +
+
+ setLinkUrl(e.target.value)} + placeholder="Enter URL" + className="flex-grow !text-sm mr-2 px-2 py-1 border border-neutral-300 dark:border-neutral-600 rounded bg-white dark:bg-neutral-800 text-neutral-900 dark:text-neutral-100" + /> + +
+
+ )} +
+ ); +} diff --git a/web/src/app/chat/my-documents/[id]/components/panels/ContextLimitPanel.tsx b/web/src/app/chat/my-documents/[id]/components/panels/ContextLimitPanel.tsx new file mode 100644 index 0000000000..491e5604bf --- /dev/null +++ b/web/src/app/chat/my-documents/[id]/components/panels/ContextLimitPanel.tsx @@ -0,0 +1,120 @@ +import React, { useState } from "react"; +import { Info, ChevronRight, ChevronDown } from "lucide-react"; +import { Button } from "@/components/ui/button"; +import { LLMModelDescriptor } from "@/app/admin/configuration/llm/interfaces"; +import { ModelSelector } from "./ModelSelector"; +import { useChatContext } from "@/components/context/ChatContext"; +import { getDisplayNameForModel } from "@/lib/hooks"; + +interface ContextLimitPanelProps { + isOpen: boolean; + onToggle: () => void; + totalTokens: number; +} + +export function ContextLimitPanel({ + isOpen, + onToggle, + totalTokens, +}: ContextLimitPanelProps) { + const { llmProviders } = useChatContext(); + const modelDescriptors = llmProviders.flatMap((provider) => + Object.entries(provider.model_token_limits ?? {}).map( + ([modelName, maxTokens]) => ({ + modelName, + provider: provider.provider, + maxTokens, + }) + ) + ); + + return ( +
+
+
+ + + Context Limit + +
+ + +
+ {isOpen && ( +
+

+ Shows how much of each model's context window is used by these + documents. When exceeded, the model will search over content rather + than including all content in each prompt. +

+

+ Total tokens in this group:{" "} + + {totalTokens.toLocaleString()} + +

+
+ )} + + {isOpen && ( +
+ {modelDescriptors.map((model, index) => { + const tokenPercentage = (totalTokens / model.maxTokens) * 100; + return ( +
+
+ + {getDisplayNameForModel(model.modelName)} + + + {model.maxTokens.toLocaleString()} tokens + +
+
+
100 + ? "bg-red-500 dark:bg-red-600" + : tokenPercentage > 80 + ? "bg-amber-500 dark:bg-amber-600" + : "bg-emerald-500 dark:bg-emerald-600" + }`} + style={{ width: `${Math.min(tokenPercentage, 100)}%` }} + >
+
+ {tokenPercentage > 100 && ( +
+ Capacity exceeded | Search + will be used +
+ )} + {tokenPercentage > 80 && tokenPercentage <= 100 && ( +
+ Near capacity limit +
+ )} +
+ ); + })} + {modelDescriptors.length === 0 && ( +
+ No models available +
+ )} +
+ )} +
+ ); +} diff --git a/web/src/app/chat/my-documents/[id]/components/panels/ModelSelector.tsx b/web/src/app/chat/my-documents/[id]/components/panels/ModelSelector.tsx new file mode 100644 index 0000000000..05b95877d3 --- /dev/null +++ b/web/src/app/chat/my-documents/[id]/components/panels/ModelSelector.tsx @@ -0,0 +1,45 @@ +import React from "react"; +import { LLMModelDescriptor } from "@/app/admin/configuration/llm/interfaces"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { OpenAIIcon } from "@/components/icons/icons"; +import { getDisplayNameForModel } from "@/lib/hooks"; + +interface ModelSelectorProps { + models: LLMModelDescriptor[]; + selectedModel: LLMModelDescriptor; + onSelectModel: (model: LLMModelDescriptor) => void; +} + +export const ModelSelector: React.FC = ({ + models, + selectedModel, + onSelectModel, +}) => ( + +); diff --git a/web/src/app/chat/my-documents/[id]/components/panels/SharingPanel.tsx b/web/src/app/chat/my-documents/[id]/components/panels/SharingPanel.tsx new file mode 100644 index 0000000000..5e43ee0f81 --- /dev/null +++ b/web/src/app/chat/my-documents/[id]/components/panels/SharingPanel.tsx @@ -0,0 +1,84 @@ +import React from "react"; +import { User, Users, ChevronDown, ChevronRight } from "lucide-react"; +import { Button } from "@/components/ui/button"; +import { AssistantIcon } from "@/components/assistants/AssistantIcon"; + +// Define a simplified Assistant interface with only the properties we use +interface Assistant { + id: number; + name: string; +} + +interface SharingPanelProps { + assistantIds?: number[]; + assistants: Assistant[]; + isOpen: boolean; + onToggle: () => void; +} + +export function SharingPanel({ + assistantIds = [], + assistants, + isOpen, + onToggle, +}: SharingPanelProps) { + const count = assistantIds.length; + return ( +
+
+
+ {count > 0 ? ( + <> + + + Shared with {count} Assistant{count > 1 ? "s" : ""} + + + ) : ( + <> + + + Not shared + + + )} +
+ +
+ {isOpen && ( +
+ {count > 0 ? ( +
+ {assistantIds.map((id) => { + const assistant = assistants.find((a) => a.id === id); + return assistant ? ( + + + + {assistant.name} + + + ) : null; + })} +
+ ) : ( + Not shared with any assistants + )} +
+ )} +
+ ); +} diff --git a/web/src/app/chat/my-documents/[id]/components/upload/CircularProgress.tsx b/web/src/app/chat/my-documents/[id]/components/upload/CircularProgress.tsx new file mode 100644 index 0000000000..4117cb0451 --- /dev/null +++ b/web/src/app/chat/my-documents/[id]/components/upload/CircularProgress.tsx @@ -0,0 +1,112 @@ +import React, { useEffect, useState } from "react"; +import { Check } from "lucide-react"; + +interface CircularProgressProps { + progress: number; // 0 to 100 + size?: number; + strokeWidth?: number; + showPercentage?: boolean; +} + +export const CircularProgress: React.FC = ({ + progress, + size = 16, + strokeWidth = 2, + showPercentage = false, +}) => { + const [displayedProgress, setDisplayedProgress] = useState(progress); + const [showComplete, setShowComplete] = useState(false); + + // Smooth progress transitions by gradually updating the displayed value + useEffect(() => { + // If we're going to 100%, handle special completion animation + if (progress >= 100 && displayedProgress < 100) { + // First complete the circle + const timer = setTimeout(() => { + setDisplayedProgress(100); + // Then show the checkmark after circle is complete + setTimeout(() => setShowComplete(true), 400); + }, 200); + return () => clearTimeout(timer); + } + + // For normal progress updates, smooth the transition + if (progress > displayedProgress) { + const diff = progress - displayedProgress; + const increment = Math.max(1, Math.min(diff / 2, 5)); // Smoothing factor + + const timer = setTimeout(() => { + setDisplayedProgress((prev) => Math.min(progress, prev + increment)); + }, 50); + return () => clearTimeout(timer); + } + }, [progress, displayedProgress]); + + const radius = (size - strokeWidth) / 2; + const circumference = radius * 2 * Math.PI; + const strokeDashoffset = + circumference - (displayedProgress / 100) * circumference; + + // Animation class for completion + const completionClass = showComplete + ? "scale-100 opacity-100" + : "scale-0 opacity-0"; + + return ( +
+ {/* Progress circle */} + + {/* Background circle */} + + {/* Progress circle */} + = 100 ? "text-green-500 dark:text-green-400" : "" + }`} + /> + + + {/* Check mark for completion */} +
+ +
+ + {/* Percentage label */} + {showPercentage && !showComplete && ( + + {Math.round(displayedProgress)}% + + )} +
+ ); +}; diff --git a/web/src/app/chat/my-documents/[id]/components/upload/FileUploadSection.tsx b/web/src/app/chat/my-documents/[id]/components/upload/FileUploadSection.tsx new file mode 100644 index 0000000000..befa49e9cf --- /dev/null +++ b/web/src/app/chat/my-documents/[id]/components/upload/FileUploadSection.tsx @@ -0,0 +1,611 @@ +import React, { useState, useRef, useEffect } from "react"; +import { + Upload, + Link, + ArrowRight, + X, + Loader2, + FileIcon, + Plus, + AlertCircle, +} from "lucide-react"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; + +// Define allowed file extensions +const ALLOWED_FILE_TYPES = [ + // Documents + ".pdf", + ".doc", + ".docx", + ".txt", + ".rtf", + ".odt", + // Spreadsheets + ".csv", + ".xls", + ".xlsx", + ".ods", + // Presentations + ".ppt", + ".pptx", + ".odp", + // Images + ".jpg", + ".jpeg", + ".png", + ".gif", + ".bmp", + ".svg", + ".webp", + // Web + ".html", + ".htm", + ".xml", + ".json", + ".md", + ".markdown", + // Archives (if supported by your system) + ".zip", + ".rar", + ".7z", + ".tar", + ".gz", + // Code + ".js", + ".jsx", + ".ts", + ".tsx", + ".py", + ".java", + ".c", + ".cpp", + ".cs", + ".php", + ".rb", + ".go", + ".swift", + ".html", + ".css", + ".scss", + ".sass", + ".less", +]; + +interface FileUploadSectionProps { + onUpload: (files: File[]) => void; + onUrlUpload?: (url: string) => Promise; + disabledMessage?: string; + disabled?: boolean; + isUploading?: boolean; + onUploadComplete?: () => void; + onUploadProgress?: (fileName: string, progress: number) => void; +} + +export const FileUploadSection: React.FC = ({ + onUpload, + onUrlUpload, + disabledMessage, + disabled, + isUploading = false, + onUploadComplete, + onUploadProgress, +}) => { + const [uploadType, setUploadType] = useState<"file" | "url">("file"); + const [fileUrl, setFileUrl] = useState(""); + const [urlError, setUrlError] = useState(null); + const [selectedFiles, setSelectedFiles] = useState([]); + const [isDragging, setIsDragging] = useState(false); + const [isProcessing, setIsProcessing] = useState(false); + const [invalidFiles, setInvalidFiles] = useState([]); + const [showInvalidFileMessage, setShowInvalidFileMessage] = useState(false); + const dropAreaRef = useRef(null); + const urlInputRef = useRef(null); + const fileInputRef = useRef(null); + + // Focus URL input when switching to URL mode + useEffect(() => { + if (uploadType === "url" && urlInputRef.current) { + urlInputRef.current.focus(); + } + }, [uploadType]); + + // Hide invalid file message after 5 seconds + useEffect(() => { + if (showInvalidFileMessage) { + // Remove the auto-hide timer + return () => {}; + } + }, [showInvalidFileMessage]); + + // Function to check if a file type is allowed + const isFileTypeAllowed = (file: File): boolean => { + const fileName = file.name.toLowerCase(); + const fileExtension = fileName.substring(fileName.lastIndexOf(".")); + return ALLOWED_FILE_TYPES.includes(fileExtension); + }; + + // Filter files to only include allowed types + const filterAllowedFiles = ( + files: File[] + ): { allowed: File[]; rejected: string[] } => { + const allowed: File[] = []; + const rejected: string[] = []; + + files.forEach((file) => { + if (isFileTypeAllowed(file)) { + allowed.push(file); + } else { + rejected.push(file.name); + } + }); + + return { allowed, rejected }; + }; + + const simulateFileUploadProgress = (file: File) => { + let progress = 0; + const fileSize = file.size; + + // Calculate simulation parameters based on file size + const getUploadParameters = (size: number) => { + // For very small files, upload is faster + if (size < 100 * 1024) { + // < 100KB + return { + initialJump: 40, // Quick initial progress jump + steadyRate: 10, // Steady upload rate (percentage points per second) + finalSlowdown: 0.5, // Slower rate near completion + totalTime: 2000, // Total upload time in ms + }; + } + // For medium files + else if (size < 1024 * 1024) { + // < 1MB + return { + initialJump: 30, + steadyRate: 7, + finalSlowdown: 0.3, + totalTime: 4000, + }; + } + // For larger files + else if (size < 10 * 1024 * 1024) { + // < 10MB + return { + initialJump: 20, + steadyRate: 5, + finalSlowdown: 0.2, + totalTime: 8000, + }; + } + // For very large files + else { + return { + initialJump: 10, + steadyRate: 3, + finalSlowdown: 0.1, + totalTime: 15000, + }; + } + }; + + const params = getUploadParameters(fileSize); + + // Initial jump to show immediate progress + setTimeout(() => { + progress = params.initialJump; + if (onUploadProgress) { + onUploadProgress(file.name, progress); + } + }, 100); + + // Middle section - steady progress + const steadyUpdateInterval = 300; // ms between updates + const steadyIncrement = params.steadyRate * (steadyUpdateInterval / 1000); + const steadySteps = Math.floor((90 - params.initialJump) / steadyIncrement); + + // Start steady updates after initial jump + let steadyTimer = setTimeout(() => { + let step = 0; + const intervalId = setInterval(() => { + step++; + progress = Math.min(params.initialJump + step * steadyIncrement, 90); + + if (onUploadProgress) { + onUploadProgress(file.name, Math.round(progress)); + } + + if (step >= steadySteps) { + clearInterval(intervalId); + + // Final slowdown phase - more gradual progress to 99% + const finalUpdateInterval = 400; + const finalIncrement = params.finalSlowdown; + let finalProgress = progress; + + const finalIntervalId = setInterval(() => { + finalProgress += finalIncrement; + if (finalProgress >= 99) { + finalProgress = 99; + clearInterval(finalIntervalId); + } + + if (onUploadProgress) { + onUploadProgress(file.name, Math.round(finalProgress)); + } + }, finalUpdateInterval); + } + }, steadyUpdateInterval); + }, 300); + + // Ensure we eventually reach 100% after the expected total time + setTimeout(() => { + if (onUploadProgress) { + // Send 99% if we haven't reached it yet + onUploadProgress(file.name, 99); + + // After a short pause, mark as complete + setTimeout(() => { + onUploadProgress(file.name, 100); + }, 500); + } + }, params.totalTime); + }; + + const handleChange = async (e: React.ChangeEvent) => { + e.preventDefault(); + if (e.target.files && e.target.files.length > 0) { + const newFiles = Array.from(e.target.files); + const { allowed, rejected } = filterAllowedFiles(newFiles); + setSelectedFiles(allowed); + + // Show error message if there are invalid files + if (rejected.length > 0) { + setInvalidFiles(rejected); + setShowInvalidFileMessage(true); + } + + // Only proceed if there are valid files + if (allowed.length > 0) { + setIsProcessing(true); + + try { + // Start progress tracking for each file + allowed.forEach((file) => { + simulateFileUploadProgress(file); + }); + + onUpload(allowed); + + // Wait a bit to show loading state + await new Promise((resolve) => setTimeout(resolve, 500)); + } finally { + setIsProcessing(false); + if (onUploadComplete) { + onUploadComplete(); + } + } + } + + e.target.value = ""; // Reset input after upload + } + }; + + const validateUrl = (url: string): boolean => { + try { + // Check if URL is valid format + const urlObj = new URL(url); + // Make sure it has http or https protocol + return urlObj.protocol === "http:" || urlObj.protocol === "https:"; + } catch (e) { + return false; + } + }; + + const handleUrlChange = (e: React.ChangeEvent) => { + const url = e.target.value; + setFileUrl(url); + + // Clear error when input changes + if (urlError) { + setUrlError(null); + } + }; + + const handleUrlSubmit = async () => { + if (!fileUrl) return; + + if (!validateUrl(fileUrl)) { + setUrlError("Please enter a valid URL (e.g., https://example.com)"); + return; + } + + if (onUrlUpload) { + setIsProcessing(true); + + try { + // Simulate progress for URL uploads + let progress = 0; + const progressInterval = setInterval(() => { + progress += 10; + if (progress >= 95) { + clearInterval(progressInterval); + } + if (onUploadProgress) { + onUploadProgress(fileUrl, progress); + } + }, 300); + + await onUrlUpload(fileUrl); + + // Set to 100% when complete + if (onUploadProgress) { + onUploadProgress(fileUrl, 100); + } + + clearInterval(progressInterval); + setFileUrl(""); + } finally { + setIsProcessing(false); + if (onUploadComplete) { + onUploadComplete(); + } + } + } + }; + + const handleKeyDown = (e: React.KeyboardEvent) => { + if (e.key === "Enter" && fileUrl) { + handleUrlSubmit(); + } + }; + + // Drag and drop handlers + const handleDragEnter = (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + if (!disabled) { + setIsDragging(true); + setUploadType("file"); // Switch to file mode when dragging + } + }; + + const handleDragOver = (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + if (!disabled && !isDragging) { + setIsDragging(true); + } + }; + + const handleDragLeave = (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + + // Only set isDragging to false if we're leaving the drop area itself, not its children + if ( + !disabled && + dropAreaRef.current && + !dropAreaRef.current.contains(e.relatedTarget as Node) + ) { + setIsDragging(false); + } + }; + + const handleDrop = async (e: React.DragEvent) => { + e.preventDefault(); + e.stopPropagation(); + setIsDragging(false); + + if (!disabled && e.dataTransfer.files && e.dataTransfer.files.length > 0) { + const newFiles = Array.from(e.dataTransfer.files); + const { allowed, rejected } = filterAllowedFiles(newFiles); + setSelectedFiles(allowed); + + // Show error message if there are invalid files + if (rejected.length > 0) { + setInvalidFiles(rejected); + setShowInvalidFileMessage(true); + } + + // Only proceed if there are valid files + if (allowed.length > 0) { + setIsProcessing(true); + + try { + // Start progress tracking for each file + allowed.forEach((file) => { + simulateFileUploadProgress(file); + }); + + onUpload(allowed); + + // Wait a bit to show loading state + await new Promise((resolve) => setTimeout(resolve, 500)); + } finally { + setIsProcessing(false); + if (onUploadComplete) { + onUploadComplete(); + } + } + } + } + }; + + return ( +
+ {/* Invalid file message */} + {showInvalidFileMessage && invalidFiles.length > 0 && ( +
+ +
+

+ Unsupported file type{invalidFiles.length > 1 ? "s" : ""} +

+

+ {invalidFiles.length > 1 + ? `The following files cannot be uploaded: ${invalidFiles + .slice(0, 3) + .join(", ")}${ + invalidFiles.length > 3 + ? ` and ${invalidFiles.length - 3} more` + : "" + }` + : `The file "${invalidFiles[0]}" cannot be uploaded.`} +

+
+ +
+ )} + + {/* Toggle Buttons - Now outside the main container */} + + {/* Main upload area */} + + + +
+ {/* Common layout structure for both modes */} +
+ {uploadType === "file" ? ( + + ) : ( + <> + {/* Icon container - fixed position for both modes */} +
+ +
+ + {/* Content area - different for each mode but with consistent spacing */} +
+
+ + +
+ {urlError && ( +

+ {urlError} +

+ )} +
+ + )} +
+
+
+
+
+
+ + +
+
+ ); +}; diff --git a/web/src/app/chat/my-documents/[id]/components/upload/UploadWarning.tsx b/web/src/app/chat/my-documents/[id]/components/upload/UploadWarning.tsx new file mode 100644 index 0000000000..cf25e93e28 --- /dev/null +++ b/web/src/app/chat/my-documents/[id]/components/upload/UploadWarning.tsx @@ -0,0 +1,24 @@ +import React from "react"; +import { AlertTriangle } from "lucide-react"; + +interface UploadWarningProps { + className?: string; +} + +export const UploadWarning: React.FC = ({ className }) => { + return ( +
+
+ +

+ Warning: This folder is shared. Any + documents you upload will be accessible to the shared assistants. +

+
+
+ ); +}; diff --git a/web/src/app/chat/my-documents/[id]/page.tsx b/web/src/app/chat/my-documents/[id]/page.tsx new file mode 100644 index 0000000000..abfb57b34a --- /dev/null +++ b/web/src/app/chat/my-documents/[id]/page.tsx @@ -0,0 +1,22 @@ +import WrappedUserFolders from "./UserFolder"; +import { DocumentsProvider, FolderResponse } from "../DocumentsContext"; +import { fetchSS } from "@/lib/utilsSS"; + +export default async function GalleryPage(props: { + params: Promise<{ ["id"]: string }>; +}) { + const searchParams = await props.params; + const response = await fetchSS(`/user/folder/${searchParams.id}`); + + // Simulate a 20-second delay + // await new Promise((resolve) => setTimeout(resolve, 20000)); + const folderResponse: FolderResponse | undefined = response.ok + ? await response.json() + : null; + + return ( + + + + ); +} diff --git a/web/src/app/chat/my-documents/api.ts b/web/src/app/chat/my-documents/api.ts new file mode 100644 index 0000000000..929d278bac --- /dev/null +++ b/web/src/app/chat/my-documents/api.ts @@ -0,0 +1,61 @@ +import { INTERNAL_URL } from "@/lib/constants"; + +// Add this interface for the bulk cleanup +export interface BulkCleanupRequest { + folder_id: number; + days_older_than: number | null; +} + +// Existing API functions may be here if the file already exists + +export const deleteFolder = async (folderId: number): Promise => { + try { + const response = await fetch( + `${INTERNAL_URL}/api/user_files/folder/${folderId}`, + { + method: "DELETE", + headers: { + "Content-Type": "application/json", + }, + credentials: "include", + } + ); + + if (!response.ok) { + throw new Error(`Failed to delete folder: ${response.statusText}`); + } + } catch (error) { + console.error("Error deleting folder:", error); + throw error; + } +}; + +// Add this new function +export const bulkCleanupFiles = async ( + request: BulkCleanupRequest +): Promise<{ message: string }> => { + try { + const response = await fetch("/api/user/file/bulk-cleanup", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify(request), + credentials: "include", + }); + + if (!response.ok) { + const errorText = await response.text(); + console.error("Cleanup error response:", errorText); + throw new Error( + `Failed to cleanup files: ${response.status} ${response.statusText}` + ); + } + + const result = await response.json(); + return result; + } catch (error) { + console.error("Error cleaning up files:", error); + throw error; + } +}; diff --git a/web/src/app/chat/my-documents/components/ContextUsage.tsx b/web/src/app/chat/my-documents/components/ContextUsage.tsx new file mode 100644 index 0000000000..19b6f8ec11 --- /dev/null +++ b/web/src/app/chat/my-documents/components/ContextUsage.tsx @@ -0,0 +1,51 @@ +import React from "react"; + +interface ContextUsageProps { + totalTokens: number; + maxTokens: number; + modelName?: string; + compact?: boolean; +} + +export const ContextUsage: React.FC = ({ + totalTokens, + maxTokens, + modelName, + compact = false, +}) => { + const tokenPercentage = Math.round((totalTokens / maxTokens) * 100); + + return ( +
+ {modelName && !compact && ( + + Context usage for {modelName} + + )} + +
+
+
75 + ? "bg-red-500" + : tokenPercentage > 50 + ? "bg-amber-500" + : "bg-emerald-500" + }`} + style={{ width: `${Math.min(tokenPercentage, 100)}%` }} + /> +
+ + {totalTokens.toLocaleString()} / {maxTokens.toLocaleString()} tokens + +
+
+ ); +}; diff --git a/web/src/app/chat/my-documents/components/FileListItem.tsx b/web/src/app/chat/my-documents/components/FileListItem.tsx new file mode 100644 index 0000000000..fcd67a4b50 --- /dev/null +++ b/web/src/app/chat/my-documents/components/FileListItem.tsx @@ -0,0 +1,351 @@ +import React, { useState, useEffect, useCallback } from "react"; +import { Checkbox } from "@/components/ui/checkbox"; +import { File, File as FileIcon, Loader, MoreHorizontal } from "lucide-react"; +import { Button } from "@/components/ui/button"; +import { + Popover, + PopoverContent, + PopoverTrigger, +} from "@/components/ui/popover"; +import { + FileResponse, + FileStatus, + FolderResponse, + useDocumentsContext, +} from "../DocumentsContext"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; +import { + FiAlertCircle, + FiAlertTriangle, + FiDownload, + FiEdit, + FiRefreshCw, + FiTrash, + FiTrash2, +} from "react-icons/fi"; +import { getFormattedDateTime } from "@/lib/dateUtils"; +import { getFileIconFromFileNameAndLink } from "@/lib/assistantIconUtils"; +import { AnimatedDots } from "../[id]/components/DocumentList"; +import { FolderMoveIcon } from "@/components/icons/icons"; +import { truncateString } from "@/lib/utils"; +import { triggerIndexing } from "@/app/admin/connector/[ccPairId]/lib"; +import { usePopup } from "@/components/admin/connectors/Popup"; + +interface FileListItemProps { + file: FileResponse; + isSelected?: boolean; + onSelect?: (file: FileResponse) => void; + view: "grid" | "list"; + onRename: ( + itemId: number, + currentName: string, + isFolder: boolean + ) => Promise; + onDelete: (itemId: number, isFolder: boolean, itemName: string) => void; + onDownload: (documentId: string) => Promise; + onMove: (fileId: number, targetFolderId: number) => Promise; + folders: FolderResponse[]; + + status: FileStatus; +} + +export const FileListItem: React.FC = ({ + file, + isSelected, + onSelect, + onRename, + onDelete, + onDownload, + onMove, + folders, + status, +}) => { + const { setPopup, popup } = usePopup(); + const [showMoveOptions, setShowMoveOptions] = useState(false); + const [indexingStatus, setIndexingStatus] = useState(null); + const [isPopoverOpen, setIsPopoverOpen] = useState(false); + const { getFilesIndexingStatus, refreshFolderDetails } = + useDocumentsContext(); + + useEffect(() => { + const checkStatus = async () => { + const status = await getFilesIndexingStatus([file.id]); + setIndexingStatus(status[file.id]); + }; + + checkStatus(); + const interval = setInterval(() => { + refreshFolderDetails(); + if (indexingStatus === false) { + checkStatus(); + } + }, 5000); + + return () => clearInterval(interval); + }, [file.id, indexingStatus, getFilesIndexingStatus]); + + const handleDelete = () => { + onDelete(file.id, false, file.name); + }; + + const handleMove = (targetFolderId: number) => { + onMove(file.id, targetFolderId); + setShowMoveOptions(false); + }; + const FailureWithPopover = useCallback(() => { + return ( + + e.stopPropagation()} asChild> +
+ +
+
+ +
+
+

+ Indexing failed. +
+ You can attempt a reindex to continue using this file, or delete + the file. +

+
+
+ + +
+
+
+
+ ); + }, [ + file.id, + handleDelete, + isPopoverOpen, + refreshFolderDetails, + setIndexingStatus, + setIsPopoverOpen, + setPopup, + ]); + + return ( +
{ + if (!(e.target as HTMLElement).closest(".action-menu")) { + onSelect && onSelect(file); + } + }} + > +
+
+ {isSelected !== undefined && ( + + )} + {status === FileStatus.FAILED ? ( + + ) : ( + getFileIconFromFileNameAndLink(file.name, file.link_url) + )} + {file.name.length > 50 ? ( + + + + + {truncateString(file.name, 50)} + + + +

{file.name}

+
+
+
+ ) : ( + + {file.name} + + )} +
+ +
+ {file.created_at && + getFormattedDateTime( + new Date(new Date(file.created_at).getTime() - 8 * 60 * 60 * 1000) + )} +
+ +
+ {file.status == FileStatus.INDEXING || + file.status == FileStatus.REINDEXING ? ( + <> + N/A, indexing + + + ) : file.status == FileStatus.FAILED ? ( + <>Failed + ) : file.token_count !== undefined ? ( + `${file.token_count?.toLocaleString()} tokens` + ) : ( + "N/A" + )} +
+
+ {popup} + +
e.stopPropagation()}> + { + if (!open) { + setShowMoveOptions(false); + } + }} + > + + + + + {!showMoveOptions ? ( +
+ + + + +
+ ) : ( +
+
+

Move to

+
+
+
+ {folders + .filter( + (folder) => + folder.id !== -1 && folder.id !== file.folder_id + ) + .map((folder) => ( + + ))} + {folders.filter( + (folder) => + folder.id !== -1 && folder.id !== file.folder_id + ).length === 0 && ( +
+ No folders available to move this file to. +
+ )} +
+
+
+ )} +
+
+
+
+ ); +}; + +export const SkeletonFileListItem: React.FC<{ view: "grid" | "list" }> = () => { + return ( +
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ); +}; diff --git a/web/src/app/chat/my-documents/components/FilePicker.tsx b/web/src/app/chat/my-documents/components/FilePicker.tsx new file mode 100644 index 0000000000..9724f65e87 --- /dev/null +++ b/web/src/app/chat/my-documents/components/FilePicker.tsx @@ -0,0 +1,1420 @@ +import React, { useState, useEffect, useMemo } from "react"; +import { Button } from "@/components/ui/button"; +import { Modal } from "@/components/Modal"; +import { + Grid, + List, + UploadIcon, + FolderIcon, + FileIcon, + PlusIcon, + Router, + X, + Loader2, + ArrowUp, + ArrowDown, +} from "lucide-react"; +import { ContextUsage } from "./ContextUsage"; +import { SelectedItemsList } from "./SelectedItemsList"; +import { Separator } from "@/components/ui/separator"; +import { + useDocumentsContext, + FolderResponse, + FileResponse, + FileUploadResponse, + FileStatus, +} from "../DocumentsContext"; +import { + DndContext, + closestCenter, + DragOverlay, + DragEndEvent, + DragStartEvent, + useSensor, + useSensors, + PointerSensor, + DragMoveEvent, + KeyboardSensor, +} from "@dnd-kit/core"; +import { + SortableContext, + sortableKeyboardCoordinates, + verticalListSortingStrategy, +} from "@dnd-kit/sortable"; +import { useSortable } from "@dnd-kit/sortable"; +import { CSS } from "@dnd-kit/utilities"; + +import { + TooltipProvider, + Tooltip, + TooltipTrigger, + TooltipContent, +} from "@/components/ui/tooltip"; +import { useRouter } from "next/navigation"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { getFormattedDateTime } from "@/lib/dateUtils"; +import { FileUploadSection } from "../[id]/components/upload/FileUploadSection"; +import { truncateString } from "@/lib/utils"; +import { MinimalOnyxDocument } from "@/lib/search/interfaces"; +import { getFileIconFromFileNameAndLink } from "@/lib/assistantIconUtils"; +import { TokenDisplay } from "@/components/TokenDisplay"; + +// Define a type for uploading files that includes progress +export interface UploadingFile { + name: string; + progress: number; +} + +const DraggableItem: React.FC<{ + id: string; + type: "folder" | "file"; + item: FolderResponse | FileResponse; + onClick?: () => void; + onSelect?: (e: React.MouseEvent) => void; + isSelected: boolean; +}> = ({ id, type, item, onClick, onSelect, isSelected }) => { + const { + attributes, + listeners, + setNodeRef, + transform, + transition, + isDragging, + } = useSortable({ id }); + + const style: React.CSSProperties = { + transform: CSS.Transform.toString(transform), + transition, + opacity: isDragging ? 0.5 : 1, + position: "relative", + zIndex: isDragging ? 1 : "auto", + }; + + const selectedClassName = isSelected + ? "bg-neutral-200/50 dark:bg-neutral-800/50" + : "hover:bg-neutral-200/50 dark:hover:bg-neutral-800/50"; + + if (type === "folder") { + return ( +
+ {})} + onSelect={onSelect || (() => {})} + isSelected={isSelected} + allFilesSelected={false} + /> +
+ ); + } + + const file = item as FileResponse; + return ( +
+
+
{ + e.stopPropagation(); + e.preventDefault(); + onSelect && onSelect(e); + }} + > +
+ {isSelected && ( + + + + )} +
+
+
+
+
+
+ {getFileIconFromFileNameAndLink(file.name, file.link_url)} + {file.name.length > 34 ? ( + + + + + {truncateString(file.name, 34)} + + + +

{file.name}

+
+
+
+ ) : ( + + {file.name} + + )} +
+ +
+ {file.created_at + ? getFormattedDateTime(new Date(file.created_at)) + : "–"} +
+
+
+
+ ); +}; + +const FilePickerFolderItem: React.FC<{ + folder: FolderResponse; + onClick: () => void; + onSelect: (e: React.MouseEvent) => void; + isSelected: boolean; + allFilesSelected: boolean; +}> = ({ folder, onClick, onSelect, isSelected, allFilesSelected }) => { + const selectedClassName = + isSelected || allFilesSelected + ? "bg-neutral-200/50 dark:bg-neutral-800/50" + : "hover:bg-neutral-200/50 dark:hover:bg-neutral-800/50"; + + // Determine if the folder is empty + const isEmpty = folder.files.length === 0; + + return ( +
+
+ {!isEmpty && ( +
{ + e.preventDefault(); + e.stopPropagation(); + onSelect(e); + }} + > +
+ {(isSelected || allFilesSelected) && ( + + + + )} +
+
+ )} +
+
+
+
+ + + {folder.name.length > 40 ? ( + + + + + {truncateString(folder.name, 40)} + + + +

{folder.name}

+
+
+
+ ) : ( + + {folder.name} + + )} +
+ +
+ {folder.files.length} {folder.files.length === 1 ? "file" : "files"} +
+
+
+
+ ); +}; + +export interface FilePickerModalProps { + isOpen: boolean; + onClose: () => void; + onSave: () => void; + buttonContent: string; + setPresentingDocument: (onyxDocument: MinimalOnyxDocument) => void; +} + +// Define a model descriptor interface +interface LLMModelDescriptor { + modelName: string; + maxTokens: number; +} + +enum SortType { + TimeCreated = "Time Created", + Alphabetical = "Alphabetical", + Files = "Files", +} + +enum SortDirection { + Ascending = "asc", + Descending = "desc", +} + +export const FilePickerModal: React.FC = ({ + isOpen, + onClose, + onSave, + setPresentingDocument, + buttonContent, +}) => { + const { + folders, + refreshFolders, + uploadFile, + currentFolder, + setCurrentFolder, + renameItem, + deleteItem, + moveItem, + selectedFiles, + selectedFolders, + addSelectedFile, + removeSelectedFile, + removeSelectedFolder, + addSelectedFolder, + createFileFromLink, + } = useDocumentsContext(); + + const router = useRouter(); + const [linkUrl, setLinkUrl] = useState(""); + const [isCreatingFileFromLink, setIsCreatingFileFromLink] = useState(false); + const [isUploadingFile, setIsUploadingFile] = useState(false); + + // Add new state variables for progress tracking + const [uploadingFiles, setUploadingFiles] = useState([]); + const [completedFiles, setCompletedFiles] = useState([]); + const [refreshInterval, setRefreshInterval] = useState( + null + ); + + const [searchQuery, setSearchQuery] = useState(""); + const [currentFolderFiles, setCurrentFolderFiles] = useState( + [] + ); + const [activeId, setActiveId] = useState(null); + const [isHoveringRight, setIsHoveringRight] = useState(false); + + const sensors = useSensors( + useSensor(PointerSensor, { + activationConstraint: { + distance: 8, + }, + }), + useSensor(KeyboardSensor, { + coordinateGetter: sortableKeyboardCoordinates, + }) + ); + + const [selectedFileIds, setSelectedFileIds] = useState>( + new Set() + ); + const [selectedFolderIds, setSelectedFolderIds] = useState>( + new Set() + ); + + const { setPopup } = usePopup(); + + // Create model descriptors and selectedModel state + const modelDescriptors: LLMModelDescriptor[] = [ + { modelName: "Claude 3 Opus", maxTokens: 200000 }, + { modelName: "Claude 3 Sonnet", maxTokens: 180000 }, + { modelName: "GPT-4", maxTokens: 128000 }, + ]; + + const [selectedModel, setSelectedModel] = useState(modelDescriptors[0]); + + // Add a new state for tracking uploads + const [uploadStartTime, setUploadStartTime] = useState(null); + const MAX_UPLOAD_TIME = 30000; // 30 seconds max for any upload + + const [sortType, setSortType] = useState(SortType.TimeCreated); + const [sortDirection, setSortDirection] = useState( + SortDirection.Descending + ); + const [hoveredColumn, setHoveredColumn] = useState(null); + + useEffect(() => { + if (isOpen) { + // Initialize selected file IDs + const fileIds = new Set(); + selectedFiles.forEach((file) => fileIds.add(file.id)); + setSelectedFileIds(fileIds); + + // Initialize selected folder IDs + const folderIds = new Set(); + selectedFolders.forEach((folder) => folderIds.add(folder.id)); + setSelectedFolderIds(folderIds); + } + }, [isOpen, selectedFiles, selectedFolders]); + + useEffect(() => { + if (isOpen) { + refreshFolders(); + } + }, [isOpen, refreshFolders]); + + useEffect(() => { + if (currentFolder) { + if (currentFolder === -1) { + // For the special "Recent" folder (id: -1), include files not in any folder that are selected + const folder = folders.find((f) => f.id === currentFolder); + const filesInFolder = folder?.files || []; + + // Get selected files that are not in any folder + const selectedFilesNotInFolders = selectedFiles.filter( + (file) => !folders.some((f) => f.id === file.folder_id) + ); + + const combinedFiles = [...filesInFolder, ...selectedFilesNotInFolders]; + + // Sort the files + const sortedFiles = combinedFiles.sort((a, b) => { + let comparison = 0; + + if (sortType === SortType.TimeCreated) { + comparison = + new Date(b.created_at || "").getTime() - + new Date(a.created_at || "").getTime(); + } else if (sortType === SortType.Alphabetical) { + comparison = a.name.localeCompare(b.name); + } + + return sortDirection === SortDirection.Ascending + ? -comparison + : comparison; + }); + + setCurrentFolderFiles(sortedFiles); + } else { + const folder = folders.find( + (f) => f.id === currentFolder && f.name != "Recent Documents" + ); + const files = folder?.files || []; + + // Sort the files + const sortedFiles = [...files].sort((a, b) => { + let comparison = 0; + + if (sortType === SortType.TimeCreated) { + comparison = + new Date(b.created_at || "").getTime() - + new Date(a.created_at || "").getTime(); + } else if (sortType === SortType.Alphabetical) { + comparison = a.name.localeCompare(b.name); + } + + return sortDirection === SortDirection.Ascending + ? -comparison + : comparison; + }); + + setCurrentFolderFiles(sortedFiles); + } + } else { + setCurrentFolderFiles([]); + } + }, [currentFolder, folders, selectedFiles, sortType, sortDirection]); + + useEffect(() => { + if (searchQuery) { + setCurrentFolder(null); + } + }, [searchQuery]); + + // Add a useEffect to check for timed-out uploads + useEffect(() => { + if (isUploadingFile || isCreatingFileFromLink) { + if (!uploadStartTime) { + setUploadStartTime(Date.now()); + } + + const timer = setTimeout(() => { + // If uploads have been going on for too long, reset the state + if (uploadStartTime && Date.now() - uploadStartTime > MAX_UPLOAD_TIME) { + setIsUploadingFile(false); + setIsCreatingFileFromLink(false); + setUploadStartTime(null); + refreshFolders(); // Make sure we have the latest files + } + }, MAX_UPLOAD_TIME + 1000); // Check just after the max time + + return () => clearTimeout(timer); + } else { + // Reset when not uploading + setUploadStartTime(null); + } + }, [ + isUploadingFile, + isCreatingFileFromLink, + uploadStartTime, + refreshFolders, + ]); + + const handleFolderClick = (folderId: number) => { + setCurrentFolder(folderId); + const clickedFolder = folders.find((f) => f.id === folderId); + if (clickedFolder) { + setCurrentFolderFiles(clickedFolder.files || []); + } else { + setCurrentFolderFiles([]); + } + }; + const handleFileClick = (file: FileResponse) => { + if (file.link_url) { + window.open(file.link_url, "_blank"); + } else { + setPresentingDocument({ + document_id: file.document_id, + semantic_identifier: file.name, + }); + } + }; + + const handleFileSelect = ( + e: React.MouseEvent, + file: FileResponse + ) => { + e.stopPropagation(); + setSelectedFileIds((prev) => { + const newSet = new Set(prev); + if (newSet.has(file.id)) { + newSet.delete(file.id); + removeSelectedFile(file); + } else { + newSet.add(file.id); + addSelectedFile(file); + } + return newSet; + }); + // Check if the file's folder should be unselected + if (file.folder_id) { + setSelectedFolderIds((prev) => { + const newSet = new Set(prev); + if (newSet.has(file.folder_id!)) { + const folder = folders.find((f) => f.id === file.folder_id); + if (folder) { + const allFilesSelected = folder.files.every( + (f) => selectedFileIds.has(f.id) || f.id === file.id + ); + + if (!allFilesSelected) { + newSet.delete(file.folder_id!); + if (folder) { + removeSelectedFolder(folder); + } + } + } + } + return newSet; + }); + } + }; + + const RECENT_DOCS_FOLDER_ID = -1; + + const isRecentFolder = (folderId: number) => + folderId === RECENT_DOCS_FOLDER_ID; + + const handleFolderSelect = (folder: FolderResponse) => { + // Special handling for the recent folder + const isRecent = isRecentFolder(folder.id); + + setSelectedFolderIds((prev) => { + const newSet = new Set(prev); + if (newSet.has(folder.id)) { + newSet.delete(folder.id); + removeSelectedFolder(folder); + + // For the recent folder, also remove all its files from selection + if (isRecent) { + folder.files.forEach((file) => { + if (selectedFileIds.has(file.id)) { + removeSelectedFile(file); + } + }); + } + } else { + newSet.add(folder.id); + addSelectedFolder(folder); + } + return newSet; + }); + + // Update selectedFileIds based on folder selection + setSelectedFileIds((prev) => { + const newSet = new Set(prev); + + // For the recent folder, we need special handling + if (isRecent) { + // If we're selecting the recent folder, don't automatically select all its files + if (!selectedFolderIds.has(folder.id)) { + return newSet; + } + } + + folder.files.forEach((file) => { + if (selectedFolderIds.has(folder.id)) { + newSet.delete(file.id); + } else { + newSet.add(file.id); + } + }); + return newSet; + }); + }; + + const selectedItems = useMemo(() => { + const items: { + folders: FolderResponse[]; + files: FileResponse[]; + totalTokens: number; + } = { + folders: [], + files: [], + totalTokens: 0, + }; + + // First handle selected files that are not in any folder + selectedFiles.forEach((file) => { + if (!folders.some((f) => f.id === file.folder_id)) { + items.files.push(file); + items.totalTokens += file.token_count || 0; + } + }); + + // Then handle folders and their files + folders.forEach((folder) => { + // For the recent folder, only include it if explicitly selected + if (isRecentFolder(folder.id)) { + if (selectedFolderIds.has(folder.id)) { + items.folders.push(folder); + folder.files.forEach((file) => { + items.totalTokens += file.token_count || 0; + }); + } else { + // For the recent folder, include individually selected files + const selectedFilesInFolder = folder.files.filter((file) => + selectedFileIds.has(file.id) + ); + items.files.push(...selectedFilesInFolder); + selectedFilesInFolder.forEach((file) => { + items.totalTokens += file.token_count || 0; + }); + } + return; + } + + // For regular folders + if (selectedFolderIds.has(folder.id)) { + items.folders.push(folder); + folder.files.forEach((file) => { + items.totalTokens += file.token_count || 0; + }); + } else { + const selectedFilesInFolder = folder.files.filter((file) => + selectedFileIds.has(file.id) + ); + if ( + selectedFilesInFolder.length === folder.files.length && + folder.files.length > 0 + ) { + items.folders.push(folder); + folder.files.forEach((file) => { + items.totalTokens += file.token_count || 0; + }); + } else { + items.files.push(...selectedFilesInFolder); + selectedFilesInFolder.forEach((file) => { + items.totalTokens += file.token_count || 0; + }); + } + } + }); + + return items; + }, [folders, selectedFileIds, selectedFolderIds, selectedFiles]); + + // Add these new functions for tracking upload progress + const updateFileProgress = (fileName: string, progress: number) => { + setUploadingFiles((prev) => + prev.map((file) => + file.name === fileName ? { ...file, progress } : file + ) + ); + }; + + const markFileComplete = (fileName: string) => { + setUploadingFiles((prev) => prev.filter((file) => file.name !== fileName)); + }; + + const startRefreshInterval = () => { + if (refreshInterval) { + clearInterval(refreshInterval); + } + + // Add a timestamp to track when we started refreshing + const startTime = Date.now(); + const MAX_REFRESH_TIME = 30000; // 30 seconds max for any upload to complete + + const interval = setInterval(() => { + // Check if we've been waiting too long, if so, clear uploading state + if (Date.now() - startTime > MAX_REFRESH_TIME) { + setUploadingFiles([]); + setCompletedFiles([]); + clearInterval(interval); + setRefreshInterval(null); + return; + } + + // Simulate progress for files that don't have real progress tracking yet + setUploadingFiles((prev) => + prev.map((file) => { + // Don't update files that are already complete + if (completedFiles.includes(file.name) || file.progress >= 100) { + return file; + } + + // Slow down progress as it approaches completion for more realistic feel + let increment; + if (file.progress < 70) { + // Normal increment for first 70% + increment = Math.floor(Math.random() * 10) + 5; + } else if (file.progress < 90) { + // Slower increment between 70-90% + increment = Math.floor(Math.random() * 5) + 2; + } else { + // Very slow for final 10% + increment = Math.floor(Math.random() * 2) + 1; + } + + const newProgress = Math.min(file.progress + increment, 99); // Cap at 99% until confirmed + return { ...file, progress: newProgress }; + }) + ); + + const allFilesUploaded = uploadingFiles.every((uploadingFile) => { + // Skip files already marked as complete + if (completedFiles.includes(uploadingFile.name)) { + return true; + } + + if (uploadingFile.name.startsWith("http")) { + // For URL uploads, extract the domain and check for files containing it + try { + // Get the hostname (domain) from the URL + const url = new URL(uploadingFile.name); + const hostname = url.hostname; + + // Look for recently added files that might match this URL + const isUploaded = folders.some((folder) => + folder.files.some( + (file) => + file.name.toLowerCase().includes(hostname.toLowerCase()) || + (file.lastModified && + new Date(file.lastModified).getTime() > startTime - 60000) + ) + ); + + if (isUploaded) { + // Mark as complete if found in files list + markFileComplete(uploadingFile.name); + } + return isUploaded; + } catch (e) { + console.error("Failed to parse URL:", e); + return false; // Force continued checking + } + } + + // For regular file uploads, check if filename exists in the folders + const isUploaded = folders.some((folder) => + folder.files.some((file) => file.name === uploadingFile.name) + ); + + if (isUploaded) { + // Mark as complete if found in files list + markFileComplete(uploadingFile.name); + } + return isUploaded; + }); + + if ( + allFilesUploaded && + uploadingFiles.length > 0 && + completedFiles.length === uploadingFiles.length + ) { + // If all files are marked complete and no new uploads are happening, clean up + setTimeout(() => { + setUploadingFiles([]); + setCompletedFiles([]); + clearInterval(interval); + setRefreshInterval(null); + }, 2000); + } + }, 1000); // Update every second for smoother animation + + setRefreshInterval(interval); + }; + + // Cleanup interval on component unmount + useEffect(() => { + return () => { + if (refreshInterval) { + clearInterval(refreshInterval); + } + }; + }, [refreshInterval]); + + const addUploadedFileToContext = async (files: FileList) => { + for (let i = 0; i < files.length; i++) { + const file = files[i]; + // Add file to uploading files state + setUploadingFiles((prev) => [...prev, { name: file.name, progress: 0 }]); + const formData = new FormData(); + formData.append("files", file); + const response: FileResponse[] = await uploadFile(formData, null); + + if (response.length > 0) { + const uploadedFile = response[0]; + addSelectedFile(uploadedFile); + markFileComplete(file.name); + } + } + }; + + const handleFileUpload = async (e: React.ChangeEvent) => { + const files = e.target.files; + if (files) { + setIsUploadingFile(true); + try { + await addUploadedFileToContext(files); + await refreshFolders(); + } catch (error) { + console.error("Error uploading file:", error); + } finally { + setIsUploadingFile(false); + } + } + }; + + const handleDragStart = (event: DragStartEvent) => { + setActiveId(event.active.id.toString()); + }; + + const handleDragMove = (event: DragMoveEvent) => {}; + + const handleDragEnd = (event: DragEndEvent) => { + const { active, over } = event; + setActiveId(null); + setIsHoveringRight(false); + }; + + const handleDragCancel = () => { + setActiveId(null); + setIsHoveringRight(false); + }; + + const handleSortChange = (newSortType: SortType) => { + if (sortType === newSortType) { + setSortDirection( + sortDirection === SortDirection.Ascending + ? SortDirection.Descending + : SortDirection.Ascending + ); + } else { + setSortType(newSortType); + setSortDirection(SortDirection.Descending); + } + }; + + const renderSortIndicator = (columnType: SortType) => { + if (sortType !== columnType) return null; + + return sortDirection === SortDirection.Ascending ? ( + + ) : ( + + ); + }; + + const renderHoverIndicator = (columnType: SortType) => { + if (sortType === columnType || hoveredColumn !== columnType) return null; + + return ; + }; + + const filteredFolders = folders + .filter(function (folder) { + return folder.name.toLowerCase().includes(searchQuery.toLowerCase()); + }) + .sort((a, b) => { + let comparison = 0; + + if (sortType === SortType.TimeCreated) { + comparison = + new Date(b.created_at).getTime() - new Date(a.created_at).getTime(); + } else if (sortType === SortType.Alphabetical) { + comparison = a.name.localeCompare(b.name); + } else if (sortType === SortType.Files) { + comparison = b.files.length - a.files.length; + } + + return sortDirection === SortDirection.Ascending + ? -comparison + : comparison; + }); + + const renderNavigation = () => { + if (currentFolder !== null) { + return ( +
setCurrentFolder(null)} + > + + + + Back to My Documents +
+ ); + } + return null; + }; + + const isAllFilesInFolderSelected = (folder: FolderResponse) => { + return folder.files.every((file) => selectedFileIds.has(file.id)); + }; + + const handleRenameItem = async ( + itemId: number, + currentName: string, + isFolder: boolean + ) => { + const newName = prompt( + `Enter new name for ${isFolder ? "folder" : "file"}:`, + currentName + ); + if (newName && newName !== currentName) { + try { + await renameItem(itemId, newName, isFolder); + setPopup({ + message: `${isFolder ? "Folder" : "File"} renamed successfully`, + type: "success", + }); + await refreshFolders(); + } catch (error) { + console.error("Error renaming item:", error); + setPopup({ + message: `Failed to rename ${isFolder ? "folder" : "file"}`, + type: "error", + }); + } + } + }; + + const handleDeleteItem = async (itemId: number, isFolder: boolean) => { + const itemType = isFolder ? "folder" : "file"; + const confirmDelete = window.confirm( + `Are you sure you want to delete this ${itemType}?` + ); + + if (confirmDelete) { + try { + await deleteItem(itemId, isFolder); + setPopup({ + message: `${itemType} deleted successfully`, + type: "success", + }); + await refreshFolders(); + } catch (error) { + console.error("Error deleting item:", error); + setPopup({ + message: `Failed to delete ${itemType}`, + type: "error", + }); + } + } + }; + + const handleMoveItem = async ( + itemId: number, + currentFolderId: number | null, + isFolder: boolean + ) => { + const availableFolders = folders + .filter((folder) => folder.id !== itemId) + .map((folder) => `${folder.id}: ${folder.name}`) + .join("\n"); + + const promptMessage = `Enter the ID of the destination folder:\n\nAvailable folders:\n${availableFolders}\n\nEnter 0 to move to the root folder.`; + const destinationFolderId = prompt(promptMessage); + + if (destinationFolderId !== null) { + const newFolderId = parseInt(destinationFolderId, 10); + if (isNaN(newFolderId)) { + setPopup({ + message: "Invalid folder ID", + type: "error", + }); + return; + } + + try { + await moveItem( + itemId, + newFolderId === 0 ? null : newFolderId, + isFolder + ); + setPopup({ + message: `${isFolder ? "Folder" : "File"} moved successfully`, + type: "success", + }); + await refreshFolders(); + } catch (error) { + console.error("Error moving item:", error); + setPopup({ + message: "Failed to move item", + type: "error", + }); + } + } + }; + + // Add these new functions for removing files and groups + const handleRemoveFile = (file: FileResponse) => { + setSelectedFileIds((prev) => { + const newSet = new Set(prev); + newSet.delete(file.id); + return newSet; + }); + removeSelectedFile(file); + }; + + const handleRemoveFolder = (folder: FolderResponse) => { + // Special handling for the recent folder + if (isRecentFolder(folder.id)) { + // Also remove all files in the recent folder from selection + folder.files.forEach((file) => { + if (selectedFileIds.has(file.id)) { + setSelectedFileIds((prev) => { + const newSet = new Set(prev); + newSet.delete(file.id); + return newSet; + }); + removeSelectedFile(file); + } + }); + } + + setSelectedFolderIds((prev) => { + const newSet = new Set(prev); + newSet.delete(folder.id); + return newSet; + }); + removeSelectedFolder(folder); + }; + + return ( + folder.id === currentFolder)?.name + : "My Documents" + } + > +
+
+
+
+
+ setSearchQuery(e.target.value)} + /> + +
+ + + +
+
+ {renderNavigation()} +
+ + {filteredFolders.length + currentFolderFiles.length > 0 ? ( +
+
+
+ +
+
+ +
+
+ + {/* {JSON.stringify(folders)} */} + + `folder-${f.id}`), + ...currentFolderFiles.map((f) => `file-${f.id}`), + ]} + strategy={verticalListSortingStrategy} + > +
+ {currentFolder === null + ? filteredFolders.map((folder) => ( + handleFolderClick(folder.id)} + onSelect={() => handleFolderSelect(folder)} + isSelected={selectedFolderIds.has(folder.id)} + allFilesSelected={isAllFilesInFolderSelected( + folder + )} + /> + )) + : currentFolderFiles.map((file) => ( + handleFileClick(file)} + onSelect={(e: React.MouseEvent) => + handleFileSelect(e, file) + } + isSelected={selectedFileIds.has(file.id)} + /> + ))} + {/* Add uploading files visualization */} +
+
+ + + {activeId ? ( + + f.id === parseInt(activeId.split("-")[1], 10) + )! + : currentFolderFiles.find( + (f) => + f.id === parseInt(activeId.split("-")[1], 10) + )! + } + isSelected={ + activeId.startsWith("folder") + ? selectedFolderIds.has( + parseInt(activeId.split("-")[1], 10) + ) + : selectedFileIds.has( + parseInt(activeId.split("-")[1], 10) + ) + } + /> + ) : null} + +
+
+ ) : folders.length > 0 ? ( +
+

+ No groups found +

+
+ ) : ( +
+

+ No groups found +

+ + + Create folder in My Documents + +
+ )} +
+
setIsHoveringRight(true)} + onDragLeave={() => setIsHoveringRight(false)} + > +
+
+ +
+ +
+
+ { + setIsUploadingFile(true); + setUploadStartTime(Date.now()); // Record start time + + // Add files to uploading files state + + // Start the refresh interval to simulate progress + startRefreshInterval(); + + // Convert File[] to FileList for addUploadedFileToContext + const fileListArray = Array.from(files); + const fileList = new DataTransfer(); + fileListArray.forEach((file) => fileList.items.add(file)); + + addUploadedFileToContext(fileList.files) + .then(() => refreshFolders()) + .finally(() => { + setIsUploadingFile(false); + }); + }} + onUrlUpload={async (url: string) => { + setIsCreatingFileFromLink(true); + setUploadStartTime(Date.now()); // Record start time + + // Add URL to uploading files + setUploadingFiles((prev) => [ + ...prev, + { name: url, progress: 0 }, + ]); + + // Start the refresh interval to simulate progress + startRefreshInterval(); + + try { + const response: FileResponse[] = + await createFileFromLink(url, -1); + + if (response.length > 0) { + // Extract domain from URL to help with detection + const urlObj = new URL(url); + + const createdFile: FileResponse = response[0]; + addSelectedFile(createdFile); + // Make sure to remove the uploading file indicator when done + markFileComplete(url); + } + + await refreshFolders(); + } catch (e) { + console.error("Error creating file from link:", e); + // Also remove the uploading indicator on error + markFileComplete(url); + } finally { + setIsCreatingFileFromLink(false); + } + }} + isUploading={isUploadingFile || isCreatingFileFromLink} + /> +
+
+
+
+
+
+
+
+ + Selected context: + + +
+ + + +
+ +
+
+ {(isUploadingFile || + isCreatingFileFromLink || + uploadingFiles.length > 0) && ( + +

Please wait for all files to finish uploading

+
+ )} +
+
+
+
+
+
+ ); +}; diff --git a/web/src/app/chat/my-documents/components/SearchResultItem.tsx b/web/src/app/chat/my-documents/components/SearchResultItem.tsx new file mode 100644 index 0000000000..4d5ccf1b5d --- /dev/null +++ b/web/src/app/chat/my-documents/components/SearchResultItem.tsx @@ -0,0 +1,117 @@ +import React from "react"; +import { File, Link as LinkIcon, Folder } from "lucide-react"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; + +interface SearchResultItemProps { + item: { + id: number; + name: string; + document_id: string; + }; + view: "grid" | "list"; + onClick: (documentId: string, name: string) => void; + isLink?: boolean; + lastUpdated?: string; + onRename: () => void; + onDelete: () => void; + onMove: () => void; + parentFolder?: { + id: number; + name: string; + }; + onParentFolderClick?: (folderId: number) => void; + fileSize?: FileSize; +} +export enum FileSize { + SMALL = "Small", + MEDIUM = "Medium", + LARGE = "Large", +} +export const fileSizeToDescription = { + [FileSize.SMALL]: "Small", + [FileSize.MEDIUM]: "Medium", + [FileSize.LARGE]: "Large", +}; + +export const SearchResultItem: React.FC = ({ + item, + view, + onClick, + isLink = false, + lastUpdated, + onRename, + onDelete, + onMove, + parentFolder, + onParentFolderClick, + fileSize = FileSize.SMALL, +}) => { + const Icon = isLink ? LinkIcon : File; + + return ( + + ); +}; diff --git a/web/src/app/chat/my-documents/components/SelectedItemsList.tsx b/web/src/app/chat/my-documents/components/SelectedItemsList.tsx new file mode 100644 index 0000000000..9256cbc3ba --- /dev/null +++ b/web/src/app/chat/my-documents/components/SelectedItemsList.tsx @@ -0,0 +1,220 @@ +import React from "react"; +import { cn, truncateString } from "@/lib/utils"; +import { Button } from "@/components/ui/button"; +import { X, Folder, File, FolderIcon, Loader2 } from "lucide-react"; +import { ScrollArea } from "@/components/ui/scroll-area"; +import { Separator } from "@/components/ui/separator"; +import { Badge } from "@/components/ui/badge"; +import { + FolderResponse, + FileResponse, + useDocumentsContext, +} from "../DocumentsContext"; +import { useDocumentSelection } from "../../useDocumentSelection"; +import { getFileIconFromFileNameAndLink } from "@/lib/assistantIconUtils"; +import { MinimalOnyxDocument } from "@/lib/search/interfaces"; +import { UploadingFile } from "./FilePicker"; +import { CircularProgress } from "../[id]/components/upload/CircularProgress"; + +interface SelectedItemsListProps { + folders: FolderResponse[]; + files: FileResponse[]; + uploadingFiles: UploadingFile[]; + onRemoveFile: (file: FileResponse) => void; + onRemoveFolder: (folder: FolderResponse) => void; + setPresentingDocument: (onyxDocument: MinimalOnyxDocument) => void; +} + +export const SelectedItemsList: React.FC = ({ + folders, + files, + uploadingFiles, + onRemoveFile, + onRemoveFolder, + setPresentingDocument, +}) => { + const hasItems = folders.length > 0 || files.length > 0; + const openFile = (file: FileResponse) => { + if (file.link_url) { + window.open(file.link_url, "_blank"); + } else { + setPresentingDocument({ + semantic_identifier: file.name, + document_id: file.document_id, + }); + } + }; + + return ( +
+
+

+ Selected Items +

+
+ + +
+ {folders.length > 0 && ( +
+ {folders.map((folder: FolderResponse) => ( +
+
+
+ + + + {truncateString(folder.name, 34)} + +
+
+ + +
+ ))} +
+ )} + + {files.length > 0 && ( +
+ {files.map((file: FileResponse) => ( +
+
openFile(file)} + > +
+ {getFileIconFromFileNameAndLink(file.name, file.link_url)} + + {truncateString(file.name, 34)} + +
+
+ +
+ ))} +
+ )} +
+ {uploadingFiles + .filter( + (uploadingFile) => + !files.map((file) => file.name).includes(uploadingFile.name) + ) + .map((uploadingFile, index) => ( +
+
+
+
+ {uploadingFile.name.startsWith("http") ? ( + + ) : ( + + )} + + {uploadingFile.name.startsWith("http") + ? `${uploadingFile.name.substring(0, 30)}${ + uploadingFile.name.length > 30 ? "..." : "" + }` + : truncateString(uploadingFile.name, 34)} + +
+
+ +
+
+ ))} +
+ {!hasItems && ( +
+ No items selected +
+ )} +
+
+
+ ); +}; diff --git a/web/src/app/chat/my-documents/components/SharedFolderItem.tsx b/web/src/app/chat/my-documents/components/SharedFolderItem.tsx new file mode 100644 index 0000000000..3ebda026e1 --- /dev/null +++ b/web/src/app/chat/my-documents/components/SharedFolderItem.tsx @@ -0,0 +1,143 @@ +import React, { useState } from "react"; +import { FolderIcon, MoreHorizontal } from "lucide-react"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; +import { getFormattedDateTime, getTimeAgoString } from "@/lib/dateUtils"; +import { Button } from "@/components/ui/button"; +import { + Popover, + PopoverContent, + PopoverTrigger, +} from "@/components/ui/popover"; +import { FiArrowDown, FiEdit, FiTrash } from "react-icons/fi"; +import { DeleteEntityModal } from "@/components/DeleteEntityModal"; +import { useDocumentsContext } from "../DocumentsContext"; +import { TruncatedText } from "@/components/ui/truncatedText"; +import { truncateString } from "@/lib/utils"; + +interface SharedFolderItemProps { + folder: { + id: number; + name: string; + tokens?: number; + }; + onClick: (folderId: number) => void; + description?: string; + lastUpdated?: string; + onRename: () => void; + onDelete: () => void; + onMove: () => void; +} + +export const SharedFolderItem: React.FC = ({ + folder, + onClick, + description, + lastUpdated, + onRename, + onDelete, + onMove, +}) => { + const [isDeleteModalOpen, setIsDeleteModalOpen] = useState(false); + + const handleDeleteClick = () => { + setIsDeleteModalOpen(true); + }; + + return ( + <> +
{ + if (!(e.target as HTMLElement).closest(".action-menu")) { + e.preventDefault(); + onClick(folder.id); + } + }} + > +
+
+ + {folder.name.length > 50 ? ( + + + + + {truncateString(folder.name, 60)} + + + +

{folder.name}

+ {description && ( +

{description}

+ )} +
+
+
+ ) : ( + + {folder.name} + + )} +
+ +
+ {lastUpdated && getFormattedDateTime(new Date(lastUpdated))} +
+ +
+ {folder.tokens !== undefined + ? `${folder.tokens.toLocaleString()} tokens` + : "-"} +
+
+ +
e.stopPropagation()}> + + + + + +
+ {/* + */} + +
+
+
+
+
+ + setIsDeleteModalOpen(false)} + onConfirm={() => { + setIsDeleteModalOpen(false); + onDelete(); + }} + entityType="folder" + entityName={folder.name} + /> + + ); +}; diff --git a/web/src/app/chat/my-documents/components/types.ts b/web/src/app/chat/my-documents/components/types.ts new file mode 100644 index 0000000000..78a2be1a5c --- /dev/null +++ b/web/src/app/chat/my-documents/components/types.ts @@ -0,0 +1,32 @@ +import { FileResponse } from "../DocumentsContext"; + +export interface UserFolder { + id: number; + name: string; + parent_id: number | null; + token_count: number | null; +} + +export interface UserFile { + id: number; + name: string; + parent_folder_id: number | null; + token_count: number | null; + link_url: string | null; +} + +export interface FolderNode extends UserFolder { + children: FolderNode[]; + files: UserFolder[]; +} + +export interface FilePickerModalProps { + isOpen: boolean; + onClose: () => void; + onSave: (selectedItems: { files: number[]; folders: number[] }) => void; + title: string; + buttonContent: string; + selectedFiles: FileResponse[]; + addSelectedFile: (file: FileResponse) => void; + removeSelectedFile: (file: FileResponse) => void; +} diff --git a/web/src/app/chat/my-documents/page.tsx b/web/src/app/chat/my-documents/page.tsx new file mode 100644 index 0000000000..5f443e9501 --- /dev/null +++ b/web/src/app/chat/my-documents/page.tsx @@ -0,0 +1,13 @@ +import WrappedDocuments from "./WrappedDocuments"; +import { DocumentsProvider } from "./DocumentsContext"; +import { UserProvider } from "@/components/user/UserProvider"; + +export default async function GalleryPage(props: { + searchParams: Promise<{ [key: string]: string }>; +}) { + return ( + + + + ); +} diff --git a/web/src/app/chat/my-documents/useDocuments.ts b/web/src/app/chat/my-documents/useDocuments.ts new file mode 100644 index 0000000000..7f222b65c4 --- /dev/null +++ b/web/src/app/chat/my-documents/useDocuments.ts @@ -0,0 +1,64 @@ +import { useState, useEffect, useCallback } from "react"; + +// API functions +const fetchDocuments = async (): Promise => { + const response = await fetch("/api/manage/admin/documents"); + if (!response.ok) { + throw new Error("Failed to fetch documents"); + } + return response.json(); +}; + +const deleteDocument = async (documentId: number): Promise => { + const response = await fetch(`/api/manage/admin/documents/${documentId}`, { + method: "DELETE", + }); + if (!response.ok) { + throw new Error("Failed to delete document"); + } +}; + +export interface Document { + id: number; + document_id: string; +} +// Custom hook +export const useDocuments = () => { + const [documents, setDocuments] = useState([]); + const [isLoading, setIsLoading] = useState(true); + const [error, setError] = useState(null); + + const loadDocuments = useCallback(async () => { + setIsLoading(true); + setError(null); + try { + const fetchedDocuments = await fetchDocuments(); + setDocuments(fetchedDocuments); + } catch (err) { + setError("Failed to load documents err: " + err); + } finally { + setIsLoading(false); + } + }, []); + + const handleDeleteDocument = async (documentId: number) => { + try { + await deleteDocument(documentId); + await loadDocuments(); + } catch (err) { + setError("Failed to delete document"); + } + }; + + useEffect(() => { + loadDocuments(); + }, [loadDocuments]); + + return { + documents, + isLoading, + error, + loadDocuments, + handleDeleteDocument, + }; +}; diff --git a/web/src/app/chat/page.tsx b/web/src/app/chat/page.tsx index 582b2c53e7..80c3ef69c2 100644 --- a/web/src/app/chat/page.tsx +++ b/web/src/app/chat/page.tsx @@ -1,3 +1,4 @@ +import { DocumentsProvider } from "./my-documents/DocumentsContext"; import { SEARCH_PARAMS } from "@/lib/extension/constants"; import WrappedChat from "./WrappedChat"; @@ -10,9 +11,11 @@ export default async function Page(props: { searchParams[SEARCH_PARAMS.DEFAULT_SIDEBAR_OFF] === "true"; return ( - + + + ); } diff --git a/web/src/app/chat/searchParams.ts b/web/src/app/chat/searchParams.ts index b32390aa4c..1cf0421302 100644 --- a/web/src/app/chat/searchParams.ts +++ b/web/src/app/chat/searchParams.ts @@ -5,6 +5,8 @@ export const SEARCH_PARAM_NAMES = { CHAT_ID: "chatId", SEARCH_ID: "searchId", PERSONA_ID: "assistantId", + USER_FOLDER_ID: "userFolderId", + ALL_MY_DOCUMENTS: "allMyDocuments", // overrides TEMPERATURE: "temperature", MODEL_VERSION: "model-version", diff --git a/web/src/app/chat/sessionSidebar/HistorySidebar.tsx b/web/src/app/chat/sessionSidebar/HistorySidebar.tsx index e348f1878d..7662a72d63 100644 --- a/web/src/app/chat/sessionSidebar/HistorySidebar.tsx +++ b/web/src/app/chat/sessionSidebar/HistorySidebar.tsx @@ -19,7 +19,11 @@ import { ChatSession } from "../interfaces"; import { Folder } from "../folders/interfaces"; import { SettingsContext } from "@/components/settings/SettingsProvider"; -import { DocumentIcon2, NewChatIcon } from "@/components/icons/icons"; +import { + DocumentIcon2, + KnowledgeGroupIcon, + NewChatIcon, +} from "@/components/icons/icons"; import { PagesTab } from "./PagesTab"; import { pageType } from "./types"; import LogoWithText from "@/components/header/LogoWithText"; @@ -47,7 +51,7 @@ import { } from "@dnd-kit/sortable"; import { useSortable } from "@dnd-kit/sortable"; import { CSS } from "@dnd-kit/utilities"; -import { CircleX, PinIcon } from "lucide-react"; +import { CircleX, FolderIcon, PinIcon } from "lucide-react"; import { restrictToVerticalAxis } from "@dnd-kit/modifiers"; import { TruncatedText } from "@/components/ui/truncatedText"; @@ -302,6 +306,18 @@ export const HistorySidebar = forwardRef( New Chat

+ + +

+ My Documents +

+ {user?.preferences?.shortcut_enabled && ( (null); const [isReady, setIsReady] = useState(false); const [presentingDocument, setPresentingDocument] = - useState(null); + useState(null); const toggleDocumentSidebar = () => { setDocumentSidebarVisible(!documentSidebarVisible); @@ -106,6 +106,7 @@ export function SharedChatDisplay({
); } else if (message.type === "assistant") { diff --git a/web/src/app/chat/useDocumentSelection.ts b/web/src/app/chat/useDocumentSelection.ts index 10da5543d1..0cd9830124 100644 --- a/web/src/app/chat/useDocumentSelection.ts +++ b/web/src/app/chat/useDocumentSelection.ts @@ -1,5 +1,6 @@ import { OnyxDocument } from "@/lib/search/interfaces"; import { useState } from "react"; +import { FileResponse } from "./my-documents/DocumentsContext"; interface DocumentInfo { num_chunks: number; @@ -18,14 +19,32 @@ async function fetchDocumentLength(documentId: string) { } export function useDocumentSelection(): [ + FileResponse[], + (file: FileResponse) => void, + (file: FileResponse) => void, OnyxDocument[], (document: OnyxDocument) => void, () => void, number, ] { + const [selectedFiles, setSelectedFiles] = useState([]); const [selectedDocuments, setSelectedDocuments] = useState( [] ); + const removeSelectedFile = (file: FileResponse) => { + setSelectedFiles(selectedFiles.filter((f) => f.id !== file.id)); + }; + + const addSelectedFile = (file: FileResponse) => { + // Check if file already exists in the array to avoid duplicates + setSelectedFiles((files) => { + // Check if file already exists in the array to avoid duplicates + if (files.some((f) => f.id === file.id)) { + return files; + } + return [...files, file]; + }); + }; const [totalTokens, setTotalTokens] = useState(0); const selectedDocumentIds = selectedDocuments.map( (document) => document.document_id @@ -61,6 +80,9 @@ export function useDocumentSelection(): [ } return [ + selectedFiles, + addSelectedFile, + removeSelectedFile, selectedDocuments, toggleDocumentSelection, clearDocuments, diff --git a/web/src/app/ee/assistants/stats/[id]/WrappedAssistantsStats.tsx b/web/src/app/ee/assistants/stats/[id]/WrappedAssistantsStats.tsx index 63eb34cfda..edd06d21d0 100644 --- a/web/src/app/ee/assistants/stats/[id]/WrappedAssistantsStats.tsx +++ b/web/src/app/ee/assistants/stats/[id]/WrappedAssistantsStats.tsx @@ -1,16 +1,15 @@ "use client"; -import SidebarWrapper from "../../../../assistants/SidebarWrapper"; + +import SidebarWrapper from "@/app/assistants/SidebarWrapper"; import { AssistantStats } from "./AssistantStats"; export default function WrappedAssistantsStats({ - initiallyToggled, assistantId, }: { - initiallyToggled: boolean; assistantId: number; }) { return ( - + ); diff --git a/web/src/app/globals.css b/web/src/app/globals.css index 655b3532f6..713d2ec9d1 100644 --- a/web/src/app/globals.css +++ b/web/src/app/globals.css @@ -658,3 +658,19 @@ ul > li > p { display: inline; /* Make paragraphs inline to reduce vertical space */ } + +/* Custom animations */ +@keyframes fadeIn { + from { + opacity: 0; + transform: scale(0.95); + } + to { + opacity: 1; + transform: scale(1); + } +} + +.animate-fadeIn { + animation: fadeIn 0.2s ease-out forwards; +} diff --git a/web/src/app/layout.tsx b/web/src/app/layout.tsx index d8665531ca..707367a459 100644 --- a/web/src/app/layout.tsx +++ b/web/src/app/layout.tsx @@ -27,6 +27,7 @@ import Script from "next/script"; import { Hanken_Grotesk } from "next/font/google"; import { WebVitals } from "./web-vitals"; import { ThemeProvider } from "next-themes"; +import { DocumentsProvider } from "./chat/my-documents/DocumentsContext"; import CloudError from "@/components/errorPages/CloudErrorPage"; import Error from "@/components/errorPages/ErrorPage"; import AccessRestrictedPage from "@/components/errorPages/AccessRestrictedPage"; @@ -156,11 +157,13 @@ export default async function RootLayout({ hasAnyConnectors={hasAnyConnectors} hasImageCompatibleModel={hasImageCompatibleModel} > - - - - {children} - {process.env.NEXT_PUBLIC_POSTHOG_KEY && } + + + + + {children} + {process.env.NEXT_PUBLIC_POSTHOG_KEY && } + ); } diff --git a/web/src/app/search/components/SearchResultItem.tsx b/web/src/app/search/components/SearchResultItem.tsx new file mode 100644 index 0000000000..9ebd44ea7a --- /dev/null +++ b/web/src/app/search/components/SearchResultItem.tsx @@ -0,0 +1,82 @@ +import React from "react"; +import { OnyxDocument } from "@/lib/search/interfaces"; +import { ResultIcon } from "@/components/chat/sources/SourceCard"; +import { getTimeAgoString } from "@/lib/dateUtils"; +import { FiThumbsUp, FiUser, FiClock } from "react-icons/fi"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; + +interface SearchResultItemProps { + document: OnyxDocument; + onClick: (document: OnyxDocument) => void; +} + +export function SearchResultItem({ document, onClick }: SearchResultItemProps) { + const handleClick = (e: React.MouseEvent) => { + e.preventDefault(); + onClick(document); + }; + + // Format the date if available + const formattedDate = document.updated_at + ? getTimeAgoString(new Date(document.updated_at)) + : ""; + + const lastUpdated = document.updated_at + ? getTimeAgoString(new Date(document.updated_at)) + : ""; + + return ( +
+
+
+ +
+ +
+
+

+ {document.semantic_identifier || "Untitled Document"} +

+
+
+ {document.boost > 1 && ( + + Matched + + )} + + {lastUpdated && ( + + + {lastUpdated} + + )} + {formattedDate && ( + + + {formattedDate} + + )} + {document.metadata?.helpful && ( + + + Helpful + + )} +
+

+ {document.blurb || "No description available"} +

+
+
+
+ ); +} diff --git a/web/src/components/CleanupModal.tsx b/web/src/components/CleanupModal.tsx new file mode 100644 index 0000000000..70507e4e39 --- /dev/null +++ b/web/src/components/CleanupModal.tsx @@ -0,0 +1,226 @@ +import React, { useState } from "react"; +import { Button } from "@/components/ui/button"; +import { AlertCircle, Calendar, Clock, Loader2, Trash, X } from "lucide-react"; + +export enum CleanupPeriod { + Day = "day", + Week = "week", + Month = "month", + All = "all", +} + +interface CleanupModalProps { + isOpen: boolean; + onClose: () => void; + onConfirm: (period: CleanupPeriod, value: number) => void; +} + +export const CleanupModal: React.FC = ({ + isOpen, + onClose, + onConfirm, +}) => { + const [selectedPeriod, setSelectedPeriod] = useState( + null + ); + const [isLoading, setIsLoading] = useState(false); + + if (!isOpen) return null; + + const handleOptionSelect = (period: CleanupPeriod) => { + setSelectedPeriod(period); + }; + + const handleConfirm = async () => { + if (!selectedPeriod) return; + + setIsLoading(true); + try { + // Value is always 1 for the fixed options, or 0 for "All" + const value = selectedPeriod === CleanupPeriod.All ? 0 : 1; + await onConfirm(selectedPeriod, value); + // Modal will be closed by parent component after onConfirm + } catch (error) { + console.error("Cleanup error:", error); + setIsLoading(false); + // Let parent handle the error, keep modal open with loading state reset + } + }; + + const getDeleteButtonText = () => { + if (!selectedPeriod) return "Select an option first"; + + switch (selectedPeriod) { + case CleanupPeriod.Day: + return "Delete files older than 1 day"; + case CleanupPeriod.Week: + return "Delete files older than 1 week"; + case CleanupPeriod.Month: + return "Delete files older than 1 month"; + case CleanupPeriod.All: + return "Delete all files"; + } + }; + + // Helper to get the appropriate variant based on period and selection state + const getButtonVariant = (period: CleanupPeriod) => { + if (selectedPeriod === period) { + return `time-${period}-selected` as const; + } + return `time-${period}` as const; + }; + + // Helper to get icon styling based on selection state + const getIconClass = (period: CleanupPeriod) => { + const isSelected = selectedPeriod === period; + + switch (period) { + case CleanupPeriod.Day: + return `h-4 w-4 mb-1 ${ + isSelected + ? "text-blue-600 dark:text-blue-300" + : "text-blue-500 dark:text-blue-400" + }`; + case CleanupPeriod.Week: + return `h-4 w-4 mb-1 ${ + isSelected + ? "text-green-600 dark:text-green-300" + : "text-green-500 dark:text-green-400" + }`; + case CleanupPeriod.Month: + return `h-4 w-4 mb-1 ${ + isSelected + ? "text-purple-600 dark:text-purple-300" + : "text-purple-500 dark:text-purple-400" + }`; + case CleanupPeriod.All: + return `h-4 w-4 mb-1 ${ + isSelected + ? "text-red-600 dark:text-red-300" + : "text-red-500 dark:text-red-400" + }`; + } + }; + + return ( +
+
+
+

+ Cleanup Documents +

+ +
+ +

+ First, select how far back to keep your documents: +

+ +
+ + + + + + + +
+ + {selectedPeriod === CleanupPeriod.All && ( +
+ +
+

+ Warning: This will delete ALL documents +

+

+ This action cannot be undone. Deleted documents cannot be + recovered. +

+
+
+ )} + +
+

+ Note: This action cannot be undone. +

+ +
+ + +
+
+
+
+ ); +}; diff --git a/web/src/components/DeleteEntityModal.tsx b/web/src/components/DeleteEntityModal.tsx new file mode 100644 index 0000000000..612d33e0e8 --- /dev/null +++ b/web/src/components/DeleteEntityModal.tsx @@ -0,0 +1,51 @@ +import React from "react"; +import { Button } from "@/components/ui/button"; +import { Modal } from "@/components/Modal"; + +interface DeleteEntityModalProps { + isOpen: boolean; + onClose: () => void; + onConfirm: () => void; + entityType: "file" | "folder"; + entityName: string; + additionalWarning?: string; +} + +export const DeleteEntityModal: React.FC = ({ + isOpen, + onClose, + onConfirm, + entityType, + entityName, + additionalWarning, +}) => { + if (!isOpen) return null; + + return ( + + <> +
+

Delete {entityType}

+

+ Are you sure you want to delete the {entityType} "{entityName} + "? This action cannot be undone. + {additionalWarning} +

+
+ + +
+
+ +
+ ); +}; diff --git a/web/src/components/Modal.tsx b/web/src/components/Modal.tsx index 3ea1225c11..7276b8f012 100644 --- a/web/src/components/Modal.tsx +++ b/web/src/components/Modal.tsx @@ -23,6 +23,7 @@ interface ModalProps { heightOverride?: string; removeBottomPadding?: boolean; removePadding?: boolean; + increasedPadding?: boolean; } export function Modal({ @@ -41,6 +42,7 @@ export function Modal({ heightOverride, removeBottomPadding, removePadding, + increasedPadding, }: ModalProps) { const modalRef = useRef(null); const [isMounted, setIsMounted] = useState(false); @@ -111,7 +113,7 @@ export function Modal({

{title} {icon && icon({ size: 30 })} diff --git a/web/src/components/MoveFolderModal.tsx b/web/src/components/MoveFolderModal.tsx new file mode 100644 index 0000000000..281ac5b56d --- /dev/null +++ b/web/src/components/MoveFolderModal.tsx @@ -0,0 +1,53 @@ +import React from "react"; +import { Button } from "@/components/ui/button"; + +interface Folder { + id: number; + name: string; +} + +interface MoveFolderModalProps { + isOpen: boolean; + onClose: () => void; + onMove: (targetFolderId: number) => void; + folders: Folder[]; + currentFolderId: number; +} + +export const MoveFolderModal: React.FC = ({ + isOpen, + onClose, + onMove, + folders, + currentFolderId, +}) => { + if (!isOpen) return null; + + return ( +
+
+

Move Folder

+

Select a destination folder:

+
+ {folders + .filter((folder) => folder.id !== currentFolderId) + .map((folder) => ( + + ))} +
+
+ +
+
+
+ ); +}; diff --git a/web/src/components/SearchResultIcon.tsx b/web/src/components/SearchResultIcon.tsx index 00bc31dc33..8175ee1bc5 100644 --- a/web/src/components/SearchResultIcon.tsx +++ b/web/src/components/SearchResultIcon.tsx @@ -1,3 +1,4 @@ +"use client"; import { useState, useEffect } from "react"; import faviconFetch from "favicon-fetch"; import { SourceIcon } from "./SourceIcon"; diff --git a/web/src/components/Status.tsx b/web/src/components/Status.tsx index 9ac38534aa..12e235cea7 100644 --- a/web/src/components/Status.tsx +++ b/web/src/components/Status.tsx @@ -11,6 +11,12 @@ import { } from "react-icons/fi"; import { HoverPopup } from "./HoverPopup"; import { ConnectorCredentialPairStatus } from "@/app/admin/connector/[ccPairId]/types"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; export function IndexAttemptStatus({ status, @@ -29,14 +35,14 @@ export function IndexAttemptStatus({ ); if (errorMsg) { badge = ( - {icon}

} - popupContent={ -
- {errorMsg} -
- } - /> + + + +
{icon}
+
+ {errorMsg} +
+
); } else { badge = icon; diff --git a/web/src/components/TokenDisplay.tsx b/web/src/components/TokenDisplay.tsx new file mode 100644 index 0000000000..66a9474624 --- /dev/null +++ b/web/src/components/TokenDisplay.tsx @@ -0,0 +1,60 @@ +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; +import { getDisplayNameForModel } from "@/lib/hooks"; + +interface TokenDisplayProps { + totalTokens: number; + maxTokens: number; + tokenPercentage: number; + selectedModel: { + modelName: string; + }; +} + +export function TokenDisplay({ + totalTokens, + maxTokens, + tokenPercentage, + selectedModel, +}: TokenDisplayProps) { + return ( +
+ + + +
+
+
= 100 + ? "bg-yellow-500 dark:bg-yellow-600" + : "bg-green-500 dark:bg-green-600" + }`} + style={{ + width: `${Math.min(tokenPercentage, 100)}%`, + }} + >
+
+
+ {totalTokens.toLocaleString()} / {maxTokens.toLocaleString()}{" "} + LLM tokens +
+
+
+ +

+ Maximum tokens for default model{" "} + {getDisplayNameForModel(selectedModel.modelName)}, if exceeded, + chat will run a search over the documents rather than including + all of the contents. +

+
+
+
+
+ ); +} diff --git a/web/src/components/admin/ClientLayout.tsx b/web/src/components/admin/ClientLayout.tsx index 8ce7d98d35..a1da8662dd 100644 --- a/web/src/components/admin/ClientLayout.tsx +++ b/web/src/components/admin/ClientLayout.tsx @@ -449,7 +449,7 @@ export function ClientLayout({ ]} />
-
+
diff --git a/web/src/components/chat/Header.tsx b/web/src/components/chat/Header.tsx index 52e7a49e4e..b5a03c393f 100644 --- a/web/src/components/chat/Header.tsx +++ b/web/src/components/chat/Header.tsx @@ -14,6 +14,7 @@ import { ModeToggle } from "@/app/chat/modal/ThemeToggle"; export default function FunctionalHeader({ page, + removeHeight, currentChatSession, setSharingModalVisible, toggleSidebar = () => null, @@ -23,6 +24,7 @@ export default function FunctionalHeader({ toggleUserSettings, hideUserDropdown, }: { + removeHeight?: boolean; reset?: () => void; page: pageType; sidebarToggled?: boolean; @@ -68,7 +70,11 @@ export default function FunctionalHeader({ router.push(newChatUrl); }; return ( -
+
void; } @@ -64,7 +64,9 @@ export default function TextView({ const fetchFile = useCallback(async () => { setIsLoading(true); - const fileId = presentingDocument.document_id.split("__")[1]; + const fileId = + presentingDocument.document_id.split("__")[1] || + presentingDocument.document_id; try { const response = await fetch( @@ -116,7 +118,7 @@ export default function TextView({ const handleDownload = () => { const link = document.createElement("a"); link.href = fileUrl; - link.download = fileName; + link.download = presentingDocument.document_id || fileName; document.body.appendChild(link); link.click(); document.body.removeChild(link); @@ -129,7 +131,7 @@ export default function TextView({ @@ -156,7 +158,6 @@ export default function TextView({
-
{isLoading ? ( diff --git a/web/src/components/chat/sources/SourceCard.tsx b/web/src/components/chat/sources/SourceCard.tsx index 7609e508b9..9d57d7faeb 100644 --- a/web/src/components/chat/sources/SourceCard.tsx +++ b/web/src/components/chat/sources/SourceCard.tsx @@ -4,8 +4,12 @@ import { OnyxDocument } from "@/lib/search/interfaces"; import { truncateString } from "@/lib/utils"; import { openDocument } from "@/lib/search/utils"; import { ValidSources } from "@/lib/types"; -import React from "react"; +import React, { useEffect, useState } from "react"; import { SearchResultIcon } from "@/components/SearchResultIcon"; +import { FileDescriptor } from "@/app/chat/interfaces"; +import { FiFileText } from "react-icons/fi"; +import { getFileIconFromFileNameAndLink } from "@/lib/assistantIconUtils"; +import { FileResponse } from "@/app/chat/my-documents/DocumentsContext"; export const ResultIcon = ({ doc, @@ -136,3 +140,82 @@ export function SeeMoreBlock({ ); } + +export function getUniqueFileIcons(files: FileResponse[]): JSX.Element[] { + const uniqueIcons: JSX.Element[] = []; + const seenExtensions = new Set(); + + // Helper function to get a styled icon + const getStyledIcon = ( + fileName: string, + fileId: number, + link_url?: string | null + ) => { + return React.cloneElement( + getFileIconFromFileNameAndLink(fileName, link_url), + { + key: `file-${fileId}`, + } + ); + }; + + for (const file of files) { + uniqueIcons.push(getStyledIcon(file.name, file.id, file.link_url)); + } + + // If we have zero icons, use a fallback + if (uniqueIcons.length === 0) { + return [ + getFileIconFromFileNameAndLink("fallback1.txt"), + getFileIconFromFileNameAndLink("fallback2.txt"), + getFileIconFromFileNameAndLink("fallback3.txt"), + ]; + } + + // Duplicate last icon if fewer than 3 icons + while (uniqueIcons.length < 3) { + // The last icon in the array + const lastIcon = uniqueIcons[uniqueIcons.length - 1]; + // Clone it with a new key + uniqueIcons.push( + React.cloneElement(lastIcon, { + key: `${lastIcon.key}-dup-${uniqueIcons.length}`, + }) + ); + } + + // Slice to just the first 3 if there are more than 3 + return uniqueIcons.slice(0, 3); +} + +export function FilesSeeMoreBlock({ + toggleDocumentSelection, + files, + toggled, + fullWidth = false, +}: { + toggleDocumentSelection: () => void; + files: FileResponse[]; + toggled: boolean; + fullWidth?: boolean; +}) { + const [iconsToRender, setIconsToRender] = useState([]); + useEffect(() => { + setIconsToRender(files.length > 2 ? getUniqueFileIcons(files) : []); + }, [files]); + + return ( + + ); +} diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx index e7f0a73901..3f3d1a0fc8 100644 --- a/web/src/components/icons/icons.tsx +++ b/web/src/components/icons/icons.tsx @@ -1,8 +1,6 @@ "use client"; import { - Notebook, - Key, Trash, XSquare, LinkBreak, @@ -35,7 +33,7 @@ import { FiBarChart2, } from "react-icons/fi"; import { SiBookstack } from "react-icons/si"; -import Image, { StaticImageData } from "next/image"; +import { StaticImageData } from "next/image"; import jiraSVG from "../../../public/Jira.svg"; import confluenceSVG from "../../../public/Confluence.svg"; import deepseekSVG from "../../../public/Deepseek.svg"; @@ -46,8 +44,6 @@ import metaSVG from "../../../public/Meta.svg"; import mistralSVG from "../../../public/Mistral.svg"; import openSourceIcon from "../../../public/OpenSource.png"; import litellmIcon from "../../../public/litellm.png"; - -import awsWEBP from "../../../public/Amazon.webp"; import azureIcon from "../../../public/Azure.png"; import asanaIcon from "../../../public/Asana.png"; import anthropicSVG from "../../../public/Anthropic.svg"; @@ -55,7 +51,6 @@ import nomicSVG from "../../../public/nomic.svg"; import microsoftIcon from "../../../public/microsoft.png"; import microsoftSVG from "../../../public/Microsoft.svg"; import mixedBreadSVG from "../../../public/Mixedbread.png"; - import OCIStorageSVG from "../../../public/OCI.svg"; import googleCloudStorageIcon from "../../../public/GoogleCloudStorage.png"; import guruIcon from "../../../public/Guru.svg"; @@ -69,9 +64,8 @@ import zendeskIcon from "../../../public/Zendesk.svg"; import dropboxIcon from "../../../public/Dropbox.png"; import egnyteIcon from "../../../public/Egnyte.png"; import slackIcon from "../../../public/Slack.png"; -import discordIcon from "../../../public/discord.webp"; +import discordIcon from "../../../public/discord.png"; import airtableIcon from "../../../public/Airtable.svg"; - import s3Icon from "../../../public/S3.png"; import r2Icon from "../../../public/r2.png"; import salesforceIcon from "../../../public/Salesforce.png"; @@ -86,12 +80,11 @@ import wikipediaIcon from "../../../public/Wikipedia.png"; import discourseIcon from "../../../public/Discourse.png"; import clickupIcon from "../../../public/Clickup.svg"; import cohereIcon from "../../../public/Cohere.svg"; -import voyageIcon from "../../../public/Voyage.png"; -import googleIcon from "../../../public/Google.webp"; +import googleIcon from "../../../public/Google.png"; import xenforoIcon from "../../../public/Xenforo.svg"; import highspotIcon from "../../../public/Highspot.png"; import { FaGithub, FaRobot } from "react-icons/fa"; - +import Image from "next/image"; import { cn } from "@/lib/utils"; export interface IconProps { @@ -1314,11 +1307,6 @@ export const ProductboardIcon = ({ ); -export const AWSIcon = ({ - size = 16, - className = defaultTailwindCSS, -}: IconProps) => ; - export const AzureIcon = ({ size = 16, className = defaultTailwindCSS, @@ -3151,8 +3139,8 @@ export const WebSearchIcon = ({ ); @@ -3236,6 +3224,28 @@ export const SearchAssistantIcon = ({ ); }; +export const SortIcon = ({ + size = 24, + className = defaultTailwindCSS, +}: IconProps) => { + return ( + + + + ); +}; + export const CirclingArrowIcon = ({ size = 24, className = defaultTailwindCSS, @@ -3276,7 +3286,7 @@ export const CirclingArrowIcon = ({ ); }; -export const SortIcon = ({ +export const KnowledgeGroupIcon = ({ size = 24, className = defaultTailwindCSS, }: IconProps) => { @@ -3284,14 +3294,200 @@ export const SortIcon = ({ ); }; + +export const FileOptionIcon = ({ + size = 24, + className = defaultTailwindCSS, +}: IconProps) => { + return ( + + + + ); +}; + +export const PDFIcon = ({ + size = 24, + className = defaultTailwindCSS, +}: IconProps) => { + return ( + + + + ); +}; + +export const DOCIcon = ({ + size = 24, + className = defaultTailwindCSS, +}: IconProps) => { + return ( + + + + ); +}; + +export const ImagesIcon = ({ + size = 24, + className = defaultTailwindCSS, +}: IconProps) => { + return ( + + + + ); +}; + +export const XMLIcon = ({ + size = 24, + className = defaultTailwindCSS, +}: IconProps) => { + return ( + + + + ); +}; + +export const TXTIcon = ({ + size = 24, + className = defaultTailwindCSS, +}: IconProps) => { + return ( + + + + ); +}; + +export const HTMLIcon = ({ + size = 24, + className = defaultTailwindCSS, +}: IconProps) => { + return ( + + + + ); +}; + +export const JSONIcon = ({ + size = 24, + className = defaultTailwindCSS, +}: IconProps) => { + return ( + + + + ); +}; + +export const FolderMoveIcon = ({ + size = 24, + className = defaultTailwindCSS, +}: IconProps) => { + return ( + + + + + + ); +}; diff --git a/web/src/components/modals/CreateEntityModal.tsx b/web/src/components/modals/CreateEntityModal.tsx new file mode 100644 index 0000000000..467c4ece32 --- /dev/null +++ b/web/src/components/modals/CreateEntityModal.tsx @@ -0,0 +1,78 @@ +import React, { useState } from "react"; +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + DialogTrigger, +} from "@/components/ui/dialog"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; + +interface CreateEntityModalProps { + title: string; + entityName: string; + onSubmit: (name: string) => void; + placeholder?: string; + trigger: React.ReactNode; + open: boolean; + setOpen: (open: boolean) => void; + hideLabel?: boolean; + type?: string; +} + +export default function CreateEntityModal({ + title, + entityName, + onSubmit, + trigger, + placeholder, + open, + setOpen, + hideLabel = false, + type = "text", +}: CreateEntityModalProps) { + const [name, setName] = useState(""); + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault(); + if (name.trim()) { + onSubmit(name.trim()); + } + }; + + return ( + + {trigger} + + + {title} + +
+
+ {!hideLabel && } + setName(e.target.value)} + placeholder={ + placeholder || `Enter ${entityName.toLowerCase()} name` + } + required + className="w-full focus-visible:border focus-visible:border-neutral-200 focus-visible:ring-0 !focus:ring-offset-0 !focus:ring-0 !focus:border-0 !focus:ring-transparent !focus:outline-none" + /> +
+ +
+
+
+ ); +} diff --git a/web/src/components/ui/TabToggle.tsx b/web/src/components/ui/TabToggle.tsx new file mode 100644 index 0000000000..2ef4214d58 --- /dev/null +++ b/web/src/components/ui/TabToggle.tsx @@ -0,0 +1,53 @@ +import React from "react"; +import { cn } from "@/lib/utils"; + +export interface TabOption { + id: string; + label: string; + icon?: React.ReactNode; +} + +interface TabToggleProps { + options: TabOption[]; + value: string; + onChange: (value: string) => void; + className?: string; +} + +export function TabToggle({ + options, + value, + onChange, + className, +}: TabToggleProps) { + return ( +
+ {options.map((option) => ( + + ))} +
+ ); +} diff --git a/web/src/components/ui/button.tsx b/web/src/components/ui/button.tsx index 3a8b690830..907b32391b 100644 --- a/web/src/components/ui/button.tsx +++ b/web/src/components/ui/button.tsx @@ -11,6 +11,7 @@ const buttonVariants = cva( variant: { agent: "bg-agent text-white hover:bg-agent-hovered dark:bg-agent dark:text-white dark:hover:bg-agent/90", + menu: "w-full justify-start text-neutral-500 !gap-x-2 !py-0 hover:bg-neutral-100 hover:text-neutral-700 dark:hover:bg-neutral-800 dark:hover:text-neutral-50", success: "bg-green-100 text-green-600 hover:bg-green-500/90 dark:bg-green-700 dark:text-green-100 dark:hover:bg-green-600/90", "success-reverse": @@ -55,11 +56,30 @@ const buttonVariants = cva( next: "bg-neutral-700 text-neutral-50 hover:bg-neutral-700/90 dark:bg-neutral-300 dark:text-neutral-900 dark:hover:bg-neutral-400", "next-reverse": "bg-neutral-50 text-neutral-700 hover:bg-neutral-50/90 dark:bg-neutral-800 dark:text-neutral-200 dark:hover:bg-neutral-700", + "time-day": + "border border-neutral-200 bg-blue-50/70 text-blue-700 hover:bg-blue-50 hover:border-blue-200 dark:border-blue-900/40 dark:bg-blue-950/30 dark:text-blue-300 dark:hover:bg-blue-950/40 dark:hover:border-blue-900/50", + "time-day-selected": + "border border-neutral-200 bg-blue-50 text-blue-700 dark:border-blue-900/40 dark:bg-blue-950/40 dark:text-blue-300", + + "time-week": + "border border-neutral-200 bg-green-50/70 text-green-700 hover:bg-green-50 hover:border-green-200 dark:border-green-900/40 dark:bg-green-950/30 dark:text-green-300 dark:hover:bg-green-950/40 dark:hover:border-green-900/50", + "time-week-selected": + "border border-neutral-200 bg-green-50 text-green-700 dark:border-green-900/40 dark:bg-green-950/40 dark:text-green-300", + + "time-month": + "border border-neutral-200 bg-purple-50/70 text-purple-700 hover:bg-purple-50 hover:border-purple-200 dark:border-purple-900/40 dark:bg-purple-950/30 dark:text-purple-300 dark:hover:bg-purple-950/40 dark:hover:border-purple-900/50", + "time-month-selected": + "border border-neutral-200 bg-purple-50 text-purple-700 dark:border-purple-900/40 dark:bg-purple-950/40 dark:text-purple-300", + + "time-all": + "border border-neutral-200 bg-red-50/70 text-red-700 hover:bg-red-50 hover:border-red-200 dark:border-red-900/40 dark:bg-red-950/30 dark:text-red-300 dark:hover:bg-red-950/40 dark:hover:border-red-900/50", + "time-all-selected": + "border border-neutral-200 bg-red-50 text-red-700 dark:border-red-900/40 dark:bg-red-950/40 dark:text-red-300", }, size: { default: "h-10 px-4 py-2", - xs: "h-8 px-3 py-1", sm: "h-9 px-3", + xs: "h-7 rounded-md px-2", lg: "h-11 px-8", icon: "h-10 w-10", }, diff --git a/web/src/components/ui/input.tsx b/web/src/components/ui/input.tsx index f226383ee6..44fcfb1532 100644 --- a/web/src/components/ui/input.tsx +++ b/web/src/components/ui/input.tsx @@ -15,7 +15,9 @@ const Input = React.forwardRef( "flex h-10 w-full rounded-md border border-neutral-200 bg-white px-3 py-2 text-base ring-offset-white file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-neutral-950 placeholder:text-neutral-500", removeFocusRing ? "" - : "focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-neutral-950 focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 md:text-sm dark:border-neutral-800 dark:bg-neutral-950 dark:ring-offset-neutral-950 dark:file:text-neutral-50 dark:placeholder:text-neutral-400 dark:focus-visible:ring-neutral-300", + : "focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50 md:text-sm dark:border-neutral-800 dark:bg-neutral-950 dark:ring-offset-neutral-950 dark:file:text-neutral-50 dark:placeholder:text-neutral-400 dark:focus-visible:ring-neutral-300", + "!focus:ring-0 !focus-visible:ring-transparent !focus-visible:ring-0 !focus:outline-none", + "flex h-10 w-full rounded-md border border-border bg-background/75 focus:border-border-dark focus:ring-none focus:outline-none px-3 py-2 text-base ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground disabled:cursor-not-allowed disabled:opacity-50 md:text-sm", className )} ref={ref} diff --git a/web/src/components/ui/select.tsx b/web/src/components/ui/select.tsx index f0691aa305..935e3a6b8c 100644 --- a/web/src/components/ui/select.tsx +++ b/web/src/components/ui/select.tsx @@ -150,6 +150,25 @@ const SelectItem = React.forwardRef< {children} + + // + // {!selected && Icon && ( + // + // + // + // )} + + // + // {children} + // + // )); SelectItem.displayName = SelectPrimitive.Item.displayName; diff --git a/web/src/components/ui/tooltip.tsx b/web/src/components/ui/tooltip.tsx index 0c04f5f0dd..eaa05a1cef 100644 --- a/web/src/components/ui/tooltip.tsx +++ b/web/src/components/ui/tooltip.tsx @@ -24,6 +24,7 @@ const TooltipContent = React.forwardRef< backgroundColor?: string; showTick?: boolean; tickSide?: "top" | "bottom" | "left" | "right"; + side?: "top" | "bottom" | "left" | "right"; } >( ( @@ -34,6 +35,7 @@ const TooltipContent = React.forwardRef< backgroundColor, showTick = false, tickSide = "bottom", + side = "top", ...props }, ref @@ -41,13 +43,14 @@ const TooltipContent = React.forwardRef< { + if (linkUrl) { + return ; + } + const extension = fileName.split(".").pop()?.toLowerCase(); + if (extension === "pdf") { + return ; + } else if (extension === "txt") { + return ; + } else if (extension === "doc" || extension === "docx") { + return ; + } else if (extension === "html" || extension === "htm") { + return ; + } else if (extension === "json") { + return ; + } else if ( + ["jpg", "jpeg", "png", "gif", "svg", "webp"].includes(extension || "") + ) { + return ; + } else if (extension === "xml") { + return ; + } else { + if (fileName.includes(".")) { + try { + // Check if fileName could be a valid domain when prefixed with https:// + const url = new URL(`https://${fileName}`); + if (url.hostname === fileName) { + return ; + } + } catch (e) { + // If URL construction fails, it's not a valid domain + } + return ; + } else { + return ; + } + } +}; diff --git a/web/src/lib/dateUtils.ts b/web/src/lib/dateUtils.ts index 5cdf574970..e4765bcd1f 100644 --- a/web/src/lib/dateUtils.ts +++ b/web/src/lib/dateUtils.ts @@ -118,14 +118,38 @@ export const getDateRangeString = (from: Date | null, to: Date | null) => { export const getTimeAgoString = (date: Date | null) => { if (!date) return null; - const diffMs = new Date().getTime() - date.getTime(); + const now = new Date(); + const diffMs = now.getTime() - date.getTime(); const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24)); const diffWeeks = Math.floor(diffDays / 7); const diffMonths = Math.floor(diffDays / 30); - if (buildDateString(date).includes("Today")) return "Today"; + if (now.toDateString() === date.toDateString()) return "Today"; if (diffDays === 1) return "Yesterday"; if (diffDays < 7) return `${diffDays}d ago`; if (diffDays < 30) return `${diffWeeks}w ago`; return `${diffMonths}mo ago`; }; + +export const getFormattedDateTime = (date: Date | null) => { + if (!date) return null; + + const now = new Date(); + const isToday = now.toDateString() === date.toDateString(); + + if (isToday) { + // If it's today, return the time in format like "3:45 PM" + return date.toLocaleTimeString("en-US", { + hour: "numeric", + minute: "2-digit", + hour12: true, + }); + } else { + // Otherwise return the date in format like "Jan 15, 2023" + return date.toLocaleDateString("en-US", { + month: "short", + day: "numeric", + year: "numeric", + }); + } +}; diff --git a/web/src/lib/search/interfaces.ts b/web/src/lib/search/interfaces.ts index cad836d2d0..39413725aa 100644 --- a/web/src/lib/search/interfaces.ts +++ b/web/src/lib/search/interfaces.ts @@ -95,13 +95,15 @@ export interface Quote { export interface QuotesInfoPacket { quotes: Quote[]; } - -export interface OnyxDocument { +export interface MinimalOnyxDocument { document_id: string; + semantic_identifier: string | null; +} + +export interface OnyxDocument extends MinimalOnyxDocument { link: string; source_type: ValidSources; blurb: string; - semantic_identifier: string | null; boost: number; hidden: boolean; score: number; @@ -188,6 +190,8 @@ export interface Filters { source_type: string[] | null; document_set: string[] | null; time_cutoff: Date | null; + user_file_ids: number[] | null; + // user_folder_ids: number[] | null; } export interface SearchRequestArgs { diff --git a/web/src/lib/search/utils.ts b/web/src/lib/search/utils.ts index 55601f9f8d..c205214171 100644 --- a/web/src/lib/search/utils.ts +++ b/web/src/lib/search/utils.ts @@ -6,7 +6,9 @@ export const buildFilters = ( sources: SourceMetadata[], documentSets: string[], timeRange: DateRangePickerValue | null, - tags: Tag[] + tags: Tag[], + userFileIds?: number[] | null, + userFolderIds?: number[] | null ): Filters => { const filters = { source_type: @@ -14,6 +16,8 @@ export const buildFilters = ( document_set: documentSets.length > 0 ? documentSets : null, time_cutoff: timeRange?.from ? timeRange.from : null, tags: tags, + user_file_ids: userFileIds || null, + // user_folder_ids: userFolderIds || null, }; return filters; diff --git a/web/src/services/documentsService.ts b/web/src/services/documentsService.ts new file mode 100644 index 0000000000..9e613fc327 --- /dev/null +++ b/web/src/services/documentsService.ts @@ -0,0 +1,144 @@ +import { + FileResponse, + FolderResponse, + FileUploadResponse, +} from "@/app/chat/my-documents/DocumentsContext"; + +export async function fetchFolders(): Promise { + const response = await fetch("/api/user/folder"); + if (!response.ok) { + throw new Error("Failed to fetch folders"); + } + return response.json(); +} + +export async function createNewFolder( + name: string, + description: string +): Promise { + const response = await fetch("/api/user/folder", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name, description }), + }); + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.detail || "Failed to create folder"); + } + return response.json(); +} + +export async function deleteFolder(folderId: number): Promise { + const response = await fetch(`/api/user/folder/${folderId}`, { + method: "DELETE", + }); + if (!response.ok) { + throw new Error("Failed to delete folder"); + } +} + +export async function deleteFile(fileId: number): Promise { + const response = await fetch(`/api/user/file/${fileId}`, { + method: "DELETE", + }); + if (!response.ok) { + throw new Error("Failed to delete file"); + } +} + +export async function createFileFromLinkRequest( + url: string, + folderId: number | null +): Promise { + const response = await fetch("/api/user/file/create-from-link", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ url, folder_id: folderId }), + }); + if (!response.ok) { + const errorData = await response.json(); + throw new Error(errorData.detail || "Failed to create file from link"); + } + return response.json(); +} + +export async function getFolderDetails( + folderId: number +): Promise { + const response = await fetch(`/api/user/folder/${folderId}`); + if (!response.ok) { + throw new Error("Failed to fetch folder details"); + } + return response.json(); +} + +export async function updateFolderDetails( + folderId: number, + name: string, + description: string +): Promise { + const response = await fetch(`/api/user/folder/${folderId}`, { + method: "PUT", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name, description }), + }); + if (!response.ok) { + throw new Error("Failed to update folder details"); + } +} + +export async function moveItem( + itemId: number, + newFolderId: number | null, + isFolder: boolean +): Promise { + const endpoint = isFolder + ? `/api/user/folder/${itemId}/move` + : `/api/user/file/${itemId}/move`; + const response = await fetch(endpoint, { + method: "PUT", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ new_folder_id: newFolderId }), + }); + if (!response.ok) { + throw new Error("Failed to move item"); + } +} + +export async function renameItem( + itemId: number, + newName: string, + isFolder: boolean +): Promise { + if (isFolder) { + const response = await fetch(`/api/user/folder/${itemId}`, { + method: "PUT", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name: newName }), + }); + if (!response.ok) { + throw new Error("Failed to rename folder"); + } + } else { + const endpoint = `/api/user/file/${itemId}/rename?name=${encodeURIComponent( + newName + )}`; + const response = await fetch(endpoint, { method: "PUT" }); + if (!response.ok) { + throw new Error("Failed to rename file"); + } + } +} + +export async function downloadItem(documentId: string): Promise { + const response = await fetch( + `/api/chat/file/${encodeURIComponent(documentId)}`, + { + method: "GET", + } + ); + if (!response.ok) { + throw new Error("Failed to fetch file"); + } + return response.blob(); +}