mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-17 21:32:36 +01:00
reworked drive+confluence frontend and implied backend changes (#3143)
* reworked drive+confluence frontend and implied backend changes * fixed oauth admin tests * fixed service account tests * frontend cleanup * copy change * details! * added key * so good * whoops! * fixed mnore treljsertjoslijt * has issue with boolean form * should be done
This commit is contained in:
parent
259fc049b7
commit
6e83fe3a39
@ -20,6 +20,7 @@ env:
|
||||
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
|
||||
# Google
|
||||
GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR }}
|
||||
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1 }}
|
||||
GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR }}
|
||||
GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR }}
|
||||
GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }}
|
||||
|
@ -81,15 +81,15 @@ class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
if cql_query:
|
||||
# if a cql_query is provided, we will use it to fetch the pages
|
||||
cql_page_query = cql_query
|
||||
elif space:
|
||||
# if no cql_query is provided, we will use the space to fetch the pages
|
||||
cql_page_query += f" and space='{quote(space)}'"
|
||||
elif page_id:
|
||||
# if a cql_query is not provided, we will use the page_id to fetch the page
|
||||
if index_recursively:
|
||||
cql_page_query += f" and ancestor='{page_id}'"
|
||||
else:
|
||||
# if neither a space nor a cql_query is provided, we will use the page_id to fetch the page
|
||||
cql_page_query += f" and id='{page_id}'"
|
||||
elif space:
|
||||
# if no cql_query or page_id is provided, we will use the space to fetch the pages
|
||||
cql_page_query += f" and space='{quote(space)}'"
|
||||
|
||||
self.cql_page_query = cql_page_query
|
||||
self.cql_time_filter = ""
|
||||
|
@ -15,6 +15,7 @@ from danswer.connectors.google_drive.doc_conversion import (
|
||||
convert_drive_item_to_document,
|
||||
)
|
||||
from danswer.connectors.google_drive.file_retrieval import crawl_folders_for_files
|
||||
from danswer.connectors.google_drive.file_retrieval import get_all_files_for_oauth
|
||||
from danswer.connectors.google_drive.file_retrieval import get_all_files_in_my_drive
|
||||
from danswer.connectors.google_drive.file_retrieval import get_files_in_shared_drive
|
||||
from danswer.connectors.google_drive.models import GoogleDriveFileType
|
||||
@ -82,12 +83,31 @@ def _process_files_batch(
|
||||
yield doc_batch
|
||||
|
||||
|
||||
def _clean_requested_drive_ids(
|
||||
requested_drive_ids: set[str],
|
||||
requested_folder_ids: set[str],
|
||||
all_drive_ids_available: set[str],
|
||||
) -> tuple[set[str], set[str]]:
|
||||
invalid_requested_drive_ids = requested_drive_ids - all_drive_ids_available
|
||||
filtered_folder_ids = requested_folder_ids - all_drive_ids_available
|
||||
if invalid_requested_drive_ids:
|
||||
logger.warning(
|
||||
f"Some shared drive IDs were not found. IDs: {invalid_requested_drive_ids}"
|
||||
)
|
||||
logger.warning("Checking for folder access instead...")
|
||||
filtered_folder_ids.update(invalid_requested_drive_ids)
|
||||
|
||||
valid_requested_drive_ids = requested_drive_ids - invalid_requested_drive_ids
|
||||
return valid_requested_drive_ids, filtered_folder_ids
|
||||
|
||||
|
||||
class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
def __init__(
|
||||
self,
|
||||
include_shared_drives: bool = True,
|
||||
include_shared_drives: bool = False,
|
||||
include_my_drives: bool = False,
|
||||
include_files_shared_with_me: bool = False,
|
||||
shared_drive_urls: str | None = None,
|
||||
include_my_drives: bool = True,
|
||||
my_drive_emails: str | None = None,
|
||||
shared_folder_urls: str | None = None,
|
||||
batch_size: int = INDEX_BATCH_SIZE,
|
||||
@ -120,22 +140,36 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
if (
|
||||
not include_shared_drives
|
||||
and not include_my_drives
|
||||
and not include_files_shared_with_me
|
||||
and not shared_folder_urls
|
||||
and not my_drive_emails
|
||||
and not shared_drive_urls
|
||||
):
|
||||
raise ValueError(
|
||||
"At least one of include_shared_drives, include_my_drives,"
|
||||
" or shared_folder_urls must be true"
|
||||
"Nothing to index. Please specify at least one of the following: "
|
||||
"include_shared_drives, include_my_drives, include_files_shared_with_me, "
|
||||
"shared_folder_urls, or my_drive_emails"
|
||||
)
|
||||
|
||||
self.batch_size = batch_size
|
||||
|
||||
self.include_shared_drives = include_shared_drives
|
||||
specific_requests_made = False
|
||||
if bool(shared_drive_urls) or bool(my_drive_emails) or bool(shared_folder_urls):
|
||||
specific_requests_made = True
|
||||
|
||||
self.include_files_shared_with_me = (
|
||||
False if specific_requests_made else include_files_shared_with_me
|
||||
)
|
||||
self.include_my_drives = False if specific_requests_made else include_my_drives
|
||||
self.include_shared_drives = (
|
||||
False if specific_requests_made else include_shared_drives
|
||||
)
|
||||
|
||||
shared_drive_url_list = _extract_str_list_from_comma_str(shared_drive_urls)
|
||||
self._requested_shared_drive_ids = set(
|
||||
_extract_ids_from_urls(shared_drive_url_list)
|
||||
)
|
||||
|
||||
self.include_my_drives = include_my_drives
|
||||
self._requested_my_drive_emails = set(
|
||||
_extract_str_list_from_comma_str(my_drive_emails)
|
||||
)
|
||||
@ -225,26 +259,20 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
creds=self.creds,
|
||||
user_email=self.primary_admin_email,
|
||||
)
|
||||
is_service_account = isinstance(self.creds, ServiceAccountCredentials)
|
||||
all_drive_ids = set()
|
||||
# We don't want to fail if we're using OAuth because you can
|
||||
# access your my drive as a non admin user in an org still
|
||||
ignore_fetch_failure = isinstance(self.creds, OAuthCredentials)
|
||||
for drive in execute_paginated_retrieval(
|
||||
retrieval_function=primary_drive_service.drives().list,
|
||||
list_key="drives",
|
||||
continue_on_404_or_403=ignore_fetch_failure,
|
||||
useDomainAdminAccess=True,
|
||||
useDomainAdminAccess=is_service_account,
|
||||
fields="drives(id)",
|
||||
):
|
||||
all_drive_ids.add(drive["id"])
|
||||
|
||||
if not all_drive_ids:
|
||||
logger.warning(
|
||||
"No drives found. This is likely because oauth user "
|
||||
"is not an admin and cannot view all drive IDs. "
|
||||
"Continuing with only the shared drive IDs specified in the config."
|
||||
"No drives found even though we are indexing shared drives was requested."
|
||||
)
|
||||
all_drive_ids = set(self._requested_shared_drive_ids)
|
||||
|
||||
return all_drive_ids
|
||||
|
||||
@ -261,14 +289,9 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
|
||||
# if we are including my drives, try to get the current user's my
|
||||
# drive if any of the following are true:
|
||||
# - no specific emails were requested
|
||||
# - include_my_drives is true
|
||||
# - the current user's email is in the requested emails
|
||||
# - we are using OAuth (in which case we assume that is the only email we will try)
|
||||
if self.include_my_drives and (
|
||||
not self._requested_my_drive_emails
|
||||
or user_email in self._requested_my_drive_emails
|
||||
or isinstance(self.creds, OAuthCredentials)
|
||||
):
|
||||
if self.include_my_drives or user_email in self._requested_my_drive_emails:
|
||||
yield from get_all_files_in_my_drive(
|
||||
service=drive_service,
|
||||
update_traversed_ids_func=self._update_traversed_parent_ids,
|
||||
@ -299,7 +322,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
end=end,
|
||||
)
|
||||
|
||||
def _fetch_drive_items(
|
||||
def _manage_service_account_retrieval(
|
||||
self,
|
||||
is_slim: bool,
|
||||
start: SecondsSinceUnixEpoch | None = None,
|
||||
@ -309,29 +332,16 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
|
||||
all_drive_ids: set[str] = self._get_all_drive_ids()
|
||||
|
||||
# remove drive ids from the folder ids because they are queried differently
|
||||
filtered_folder_ids = self._requested_folder_ids - all_drive_ids
|
||||
|
||||
# Remove drive_ids that are not in the all_drive_ids and check them as folders instead
|
||||
invalid_drive_ids = self._requested_shared_drive_ids - all_drive_ids
|
||||
if invalid_drive_ids:
|
||||
logger.warning(
|
||||
f"Some shared drive IDs were not found. IDs: {invalid_drive_ids}"
|
||||
drive_ids_to_retrieve: set[str] = set()
|
||||
folder_ids_to_retrieve: set[str] = set()
|
||||
if self._requested_shared_drive_ids or self._requested_folder_ids:
|
||||
drive_ids_to_retrieve, folder_ids_to_retrieve = _clean_requested_drive_ids(
|
||||
requested_drive_ids=self._requested_shared_drive_ids,
|
||||
requested_folder_ids=self._requested_folder_ids,
|
||||
all_drive_ids_available=all_drive_ids,
|
||||
)
|
||||
logger.warning("Checking for folder access instead...")
|
||||
filtered_folder_ids.update(invalid_drive_ids)
|
||||
|
||||
# If including shared drives, use the requested IDs if provided,
|
||||
# otherwise use all drive IDs
|
||||
filtered_drive_ids = set()
|
||||
if self.include_shared_drives:
|
||||
if self._requested_shared_drive_ids:
|
||||
# Remove invalid drive IDs from requested IDs
|
||||
filtered_drive_ids = (
|
||||
self._requested_shared_drive_ids - invalid_drive_ids
|
||||
)
|
||||
else:
|
||||
filtered_drive_ids = all_drive_ids
|
||||
elif self.include_shared_drives:
|
||||
drive_ids_to_retrieve = all_drive_ids
|
||||
|
||||
# Process users in parallel using ThreadPoolExecutor
|
||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||
@ -340,8 +350,8 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
self._impersonate_user_for_retrieval,
|
||||
email,
|
||||
is_slim,
|
||||
filtered_drive_ids,
|
||||
filtered_folder_ids,
|
||||
drive_ids_to_retrieve,
|
||||
folder_ids_to_retrieve,
|
||||
start,
|
||||
end,
|
||||
): email
|
||||
@ -353,13 +363,101 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
||||
yield from future.result()
|
||||
|
||||
remaining_folders = (
|
||||
filtered_drive_ids | filtered_folder_ids
|
||||
drive_ids_to_retrieve | folder_ids_to_retrieve
|
||||
) - self._retrieved_ids
|
||||
if remaining_folders:
|
||||
logger.warning(
|
||||
f"Some folders/drives were not retrieved. IDs: {remaining_folders}"
|
||||
)
|
||||
|
||||
def _manage_oauth_retrieval(
|
||||
self,
|
||||
is_slim: bool,
|
||||
start: SecondsSinceUnixEpoch | None = None,
|
||||
end: SecondsSinceUnixEpoch | None = None,
|
||||
) -> Iterator[GoogleDriveFileType]:
|
||||
drive_service = get_drive_service(self.creds, self.primary_admin_email)
|
||||
|
||||
if self.include_files_shared_with_me or self.include_my_drives:
|
||||
yield from get_all_files_for_oauth(
|
||||
service=drive_service,
|
||||
include_files_shared_with_me=self.include_files_shared_with_me,
|
||||
include_my_drives=self.include_my_drives,
|
||||
include_shared_drives=self.include_shared_drives,
|
||||
is_slim=is_slim,
|
||||
start=start,
|
||||
end=end,
|
||||
)
|
||||
|
||||
all_requested = (
|
||||
self.include_files_shared_with_me
|
||||
and self.include_my_drives
|
||||
and self.include_shared_drives
|
||||
)
|
||||
if all_requested:
|
||||
# If all 3 are true, we already yielded from get_all_files_for_oauth
|
||||
return
|
||||
|
||||
all_drive_ids = self._get_all_drive_ids()
|
||||
drive_ids_to_retrieve: set[str] = set()
|
||||
folder_ids_to_retrieve: set[str] = set()
|
||||
if self._requested_shared_drive_ids or self._requested_folder_ids:
|
||||
drive_ids_to_retrieve, folder_ids_to_retrieve = _clean_requested_drive_ids(
|
||||
requested_drive_ids=self._requested_shared_drive_ids,
|
||||
requested_folder_ids=self._requested_folder_ids,
|
||||
all_drive_ids_available=all_drive_ids,
|
||||
)
|
||||
elif self.include_shared_drives:
|
||||
drive_ids_to_retrieve = all_drive_ids
|
||||
|
||||
for drive_id in drive_ids_to_retrieve:
|
||||
yield from get_files_in_shared_drive(
|
||||
service=drive_service,
|
||||
drive_id=drive_id,
|
||||
is_slim=is_slim,
|
||||
update_traversed_ids_func=self._update_traversed_parent_ids,
|
||||
start=start,
|
||||
end=end,
|
||||
)
|
||||
|
||||
# Even if no folders were requested, we still check if any drives were requested
|
||||
# that could be folders.
|
||||
remaining_folders = folder_ids_to_retrieve - self._retrieved_ids
|
||||
for folder_id in remaining_folders:
|
||||
yield from crawl_folders_for_files(
|
||||
service=drive_service,
|
||||
parent_id=folder_id,
|
||||
traversed_parent_ids=self._retrieved_ids,
|
||||
update_traversed_ids_func=self._update_traversed_parent_ids,
|
||||
start=start,
|
||||
end=end,
|
||||
)
|
||||
|
||||
remaining_folders = (
|
||||
drive_ids_to_retrieve | folder_ids_to_retrieve
|
||||
) - self._retrieved_ids
|
||||
if remaining_folders:
|
||||
logger.warning(
|
||||
f"Some folders/drives were not retrieved. IDs: {remaining_folders}"
|
||||
)
|
||||
|
||||
def _fetch_drive_items(
|
||||
self,
|
||||
is_slim: bool,
|
||||
start: SecondsSinceUnixEpoch | None = None,
|
||||
end: SecondsSinceUnixEpoch | None = None,
|
||||
) -> Iterator[GoogleDriveFileType]:
|
||||
retrieval_method = (
|
||||
self._manage_service_account_retrieval
|
||||
if isinstance(self.creds, ServiceAccountCredentials)
|
||||
else self._manage_oauth_retrieval
|
||||
)
|
||||
return retrieval_method(
|
||||
is_slim=is_slim,
|
||||
start=start,
|
||||
end=end,
|
||||
)
|
||||
|
||||
def _extract_docs_from_google_drive(
|
||||
self,
|
||||
start: SecondsSinceUnixEpoch | None = None,
|
||||
|
@ -140,8 +140,8 @@ def get_files_in_shared_drive(
|
||||
) -> Iterator[GoogleDriveFileType]:
|
||||
# If we know we are going to folder crawl later, we can cache the folders here
|
||||
# Get all folders being queried and add them to the traversed set
|
||||
query = f"mimeType = '{DRIVE_FOLDER_TYPE}'"
|
||||
query += " and trashed = false"
|
||||
folder_query = f"mimeType = '{DRIVE_FOLDER_TYPE}'"
|
||||
folder_query += " and trashed = false"
|
||||
found_folders = False
|
||||
for file in execute_paginated_retrieval(
|
||||
retrieval_function=service.files().list,
|
||||
@ -152,7 +152,7 @@ def get_files_in_shared_drive(
|
||||
supportsAllDrives=True,
|
||||
includeItemsFromAllDrives=True,
|
||||
fields="nextPageToken, files(id)",
|
||||
q=query,
|
||||
q=folder_query,
|
||||
):
|
||||
update_traversed_ids_func(file["id"])
|
||||
found_folders = True
|
||||
@ -160,9 +160,9 @@ def get_files_in_shared_drive(
|
||||
update_traversed_ids_func(drive_id)
|
||||
|
||||
# Get all files in the shared drive
|
||||
query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
|
||||
query += " and trashed = false"
|
||||
query += _generate_time_range_filter(start, end)
|
||||
file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
|
||||
file_query += " and trashed = false"
|
||||
file_query += _generate_time_range_filter(start, end)
|
||||
yield from execute_paginated_retrieval(
|
||||
retrieval_function=service.files().list,
|
||||
list_key="files",
|
||||
@ -172,7 +172,7 @@ def get_files_in_shared_drive(
|
||||
supportsAllDrives=True,
|
||||
includeItemsFromAllDrives=True,
|
||||
fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS,
|
||||
q=query,
|
||||
q=file_query,
|
||||
)
|
||||
|
||||
|
||||
@ -185,14 +185,16 @@ def get_all_files_in_my_drive(
|
||||
) -> Iterator[GoogleDriveFileType]:
|
||||
# If we know we are going to folder crawl later, we can cache the folders here
|
||||
# Get all folders being queried and add them to the traversed set
|
||||
query = "trashed = false and 'me' in owners"
|
||||
folder_query = f"mimeType = '{DRIVE_FOLDER_TYPE}'"
|
||||
folder_query += " and trashed = false"
|
||||
folder_query += " and 'me' in owners"
|
||||
found_folders = False
|
||||
for file in execute_paginated_retrieval(
|
||||
retrieval_function=service.files().list,
|
||||
list_key="files",
|
||||
corpora="user",
|
||||
fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS,
|
||||
q=query,
|
||||
q=folder_query,
|
||||
):
|
||||
update_traversed_ids_func(file["id"])
|
||||
found_folders = True
|
||||
@ -200,18 +202,52 @@ def get_all_files_in_my_drive(
|
||||
update_traversed_ids_func(get_root_folder_id(service))
|
||||
|
||||
# Then get the files
|
||||
query = "trashed = false and 'me' in owners"
|
||||
query += _generate_time_range_filter(start, end)
|
||||
fields = "files(id, name, mimeType, webViewLink, modifiedTime, createdTime)"
|
||||
if not is_slim:
|
||||
fields += ", files(permissions, permissionIds, owners)"
|
||||
|
||||
file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
|
||||
file_query += " and trashed = false"
|
||||
file_query += " and 'me' in owners"
|
||||
file_query += _generate_time_range_filter(start, end)
|
||||
yield from execute_paginated_retrieval(
|
||||
retrieval_function=service.files().list,
|
||||
list_key="files",
|
||||
corpora="user",
|
||||
fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS,
|
||||
q=query,
|
||||
q=file_query,
|
||||
)
|
||||
|
||||
|
||||
def get_all_files_for_oauth(
|
||||
service: Any,
|
||||
include_files_shared_with_me: bool,
|
||||
include_my_drives: bool,
|
||||
# One of the above 2 should be true
|
||||
include_shared_drives: bool,
|
||||
is_slim: bool = False,
|
||||
start: SecondsSinceUnixEpoch | None = None,
|
||||
end: SecondsSinceUnixEpoch | None = None,
|
||||
) -> Iterator[GoogleDriveFileType]:
|
||||
should_get_all = (
|
||||
include_shared_drives and include_my_drives and include_files_shared_with_me
|
||||
)
|
||||
corpora = "allDrives" if should_get_all else "user"
|
||||
|
||||
file_query = f"mimeType != '{DRIVE_FOLDER_TYPE}'"
|
||||
file_query += " and trashed = false"
|
||||
file_query += _generate_time_range_filter(start, end)
|
||||
|
||||
if not should_get_all:
|
||||
if include_files_shared_with_me and not include_my_drives:
|
||||
file_query += " and not 'me' in owners"
|
||||
if not include_files_shared_with_me and include_my_drives:
|
||||
file_query += " and 'me' in owners"
|
||||
|
||||
yield from execute_paginated_retrieval(
|
||||
retrieval_function=service.files().list,
|
||||
list_key="files",
|
||||
corpora=corpora,
|
||||
includeItemsFromAllDrives=should_get_all,
|
||||
supportsAllDrives=should_get_all,
|
||||
fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS,
|
||||
q=file_query,
|
||||
)
|
||||
|
||||
|
||||
|
@ -21,6 +21,16 @@ from tests.load_env_vars import load_env_vars
|
||||
load_env_vars()
|
||||
|
||||
|
||||
_USER_TO_OAUTH_CREDENTIALS_MAP = {
|
||||
"admin@onyx-test.com": "GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR",
|
||||
"test_user_1@onyx-test.com": "GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR_TEST_USER_1",
|
||||
}
|
||||
|
||||
_USER_TO_SERVICE_ACCOUNT_CREDENTIALS_MAP = {
|
||||
"admin@onyx-test.com": "GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR",
|
||||
}
|
||||
|
||||
|
||||
def parse_credentials(env_str: str) -> dict:
|
||||
"""
|
||||
Parse a double-escaped JSON string from environment variables into a Python dictionary.
|
||||
@ -48,23 +58,25 @@ def parse_credentials(env_str: str) -> dict:
|
||||
@pytest.fixture
|
||||
def google_drive_oauth_connector_factory() -> Callable[..., GoogleDriveConnector]:
|
||||
def _connector_factory(
|
||||
primary_admin_email: str = "admin@onyx-test.com",
|
||||
include_shared_drives: bool = True,
|
||||
shared_drive_urls: str | None = None,
|
||||
include_my_drives: bool = True,
|
||||
my_drive_emails: str | None = None,
|
||||
shared_folder_urls: str | None = None,
|
||||
primary_admin_email: str,
|
||||
include_shared_drives: bool,
|
||||
shared_drive_urls: str | None,
|
||||
include_my_drives: bool,
|
||||
my_drive_emails: str | None,
|
||||
shared_folder_urls: str | None,
|
||||
include_files_shared_with_me: bool,
|
||||
) -> GoogleDriveConnector:
|
||||
print("Creating GoogleDriveConnector with OAuth credentials")
|
||||
connector = GoogleDriveConnector(
|
||||
include_shared_drives=include_shared_drives,
|
||||
shared_drive_urls=shared_drive_urls,
|
||||
include_my_drives=include_my_drives,
|
||||
include_files_shared_with_me=include_files_shared_with_me,
|
||||
my_drive_emails=my_drive_emails,
|
||||
shared_folder_urls=shared_folder_urls,
|
||||
)
|
||||
|
||||
json_string = os.environ["GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR"]
|
||||
json_string = os.environ[_USER_TO_OAUTH_CREDENTIALS_MAP[primary_admin_email]]
|
||||
refried_json_string = json.dumps(parse_credentials(json_string))
|
||||
|
||||
credentials_json = {
|
||||
@ -82,12 +94,13 @@ def google_drive_service_acct_connector_factory() -> (
|
||||
Callable[..., GoogleDriveConnector]
|
||||
):
|
||||
def _connector_factory(
|
||||
primary_admin_email: str = "admin@onyx-test.com",
|
||||
include_shared_drives: bool = True,
|
||||
shared_drive_urls: str | None = None,
|
||||
include_my_drives: bool = True,
|
||||
my_drive_emails: str | None = None,
|
||||
shared_folder_urls: str | None = None,
|
||||
primary_admin_email: str,
|
||||
include_shared_drives: bool,
|
||||
shared_drive_urls: str | None,
|
||||
include_my_drives: bool,
|
||||
my_drive_emails: str | None,
|
||||
shared_folder_urls: str | None,
|
||||
include_files_shared_with_me: bool,
|
||||
) -> GoogleDriveConnector:
|
||||
print("Creating GoogleDriveConnector with service account credentials")
|
||||
connector = GoogleDriveConnector(
|
||||
@ -96,9 +109,12 @@ def google_drive_service_acct_connector_factory() -> (
|
||||
include_my_drives=include_my_drives,
|
||||
my_drive_emails=my_drive_emails,
|
||||
shared_folder_urls=shared_folder_urls,
|
||||
include_files_shared_with_me=include_files_shared_with_me,
|
||||
)
|
||||
|
||||
json_string = os.environ["GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR"]
|
||||
json_string = os.environ[
|
||||
_USER_TO_SERVICE_ACCOUNT_CREDENTIALS_MAP[primary_admin_email]
|
||||
]
|
||||
refried_json_string = json.dumps(parse_credentials(json_string))
|
||||
|
||||
# Load Service Account Credentials
|
||||
|
@ -7,14 +7,14 @@ SHARED_DRIVE_FILES = list(range(20, 25))
|
||||
|
||||
|
||||
ADMIN_FILE_IDS = list(range(0, 5))
|
||||
ADMIN_FOLDER_3_FILE_IDS = list(range(65, 70))
|
||||
ADMIN_FOLDER_3_FILE_IDS = list(range(65, 70)) # This folder is shared with test_user_1
|
||||
TEST_USER_1_FILE_IDS = list(range(5, 10))
|
||||
TEST_USER_2_FILE_IDS = list(range(10, 15))
|
||||
TEST_USER_3_FILE_IDS = list(range(15, 20))
|
||||
SHARED_DRIVE_1_FILE_IDS = list(range(20, 25))
|
||||
FOLDER_1_FILE_IDS = list(range(25, 30))
|
||||
FOLDER_1_1_FILE_IDS = list(range(30, 35))
|
||||
FOLDER_1_2_FILE_IDS = list(range(35, 40))
|
||||
FOLDER_1_2_FILE_IDS = list(range(35, 40)) # This folder is public
|
||||
SHARED_DRIVE_2_FILE_IDS = list(range(40, 45))
|
||||
FOLDER_2_FILE_IDS = list(range(45, 50))
|
||||
FOLDER_2_1_FILE_IDS = list(range(50, 55))
|
||||
@ -75,26 +75,29 @@ ACCESS_MAPPING: dict[str, list[int]] = {
|
||||
+ FOLDER_2_2_FILE_IDS
|
||||
+ SECTIONS_FILE_IDS
|
||||
),
|
||||
# This user has access to drive 1
|
||||
# This user has redundant access to folder 1 because of group access
|
||||
# This user has been given individual access to files in Admin's My Drive
|
||||
TEST_USER_1_EMAIL: (
|
||||
TEST_USER_1_FILE_IDS
|
||||
# This user has access to drive 1
|
||||
+ SHARED_DRIVE_1_FILE_IDS
|
||||
# This user has redundant access to folder 1 because of group access
|
||||
+ FOLDER_1_FILE_IDS
|
||||
+ FOLDER_1_1_FILE_IDS
|
||||
+ FOLDER_1_2_FILE_IDS
|
||||
# This user has been given shared access to folder 3 in Admin's My Drive
|
||||
+ ADMIN_FOLDER_3_FILE_IDS
|
||||
# This user has been given shared access to files 0 and 1 in Admin's My Drive
|
||||
+ list(range(0, 2))
|
||||
),
|
||||
# Group 1 includes this user, giving access to folder 1
|
||||
# This user has also been given access to folder 2-1
|
||||
# This user has also been given individual access to files in folder 2
|
||||
TEST_USER_2_EMAIL: (
|
||||
TEST_USER_2_FILE_IDS
|
||||
# Group 1 includes this user, giving access to folder 1
|
||||
+ FOLDER_1_FILE_IDS
|
||||
+ FOLDER_1_1_FILE_IDS
|
||||
# This folder is public
|
||||
+ FOLDER_1_2_FILE_IDS
|
||||
# Folder 2-1 is shared with this user
|
||||
+ FOLDER_2_1_FILE_IDS
|
||||
# This user has been given shared access to files 45 and 46 in folder 2
|
||||
+ list(range(45, 47))
|
||||
),
|
||||
# This user can only see his own files and public files
|
||||
|
@ -5,6 +5,7 @@ from unittest.mock import patch
|
||||
|
||||
from danswer.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from danswer.connectors.models import Document
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import (
|
||||
@ -26,8 +27,6 @@ from tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FILE_I
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_URL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_EMAIL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_3_EMAIL
|
||||
|
||||
|
||||
@patch(
|
||||
@ -40,8 +39,13 @@ def test_include_all(
|
||||
) -> None:
|
||||
print("\n\nRunning test_include_all")
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
include_my_drives=True,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=None,
|
||||
my_drive_emails=None,
|
||||
shared_drive_urls=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
@ -77,8 +81,13 @@ def test_include_shared_drives_only(
|
||||
) -> None:
|
||||
print("\n\nRunning test_include_shared_drives_only")
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
include_my_drives=False,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=None,
|
||||
my_drive_emails=None,
|
||||
shared_drive_urls=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
@ -112,8 +121,13 @@ def test_include_my_drives_only(
|
||||
) -> None:
|
||||
print("\n\nRunning test_include_my_drives_only")
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=False,
|
||||
include_my_drives=True,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=None,
|
||||
my_drive_emails=None,
|
||||
shared_drive_urls=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
@ -138,8 +152,12 @@ def test_drive_one_only(
|
||||
print("\n\nRunning test_drive_one_only")
|
||||
drive_urls = [SHARED_DRIVE_1_URL]
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
include_my_drives=False,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=None,
|
||||
my_drive_emails=None,
|
||||
shared_drive_urls=",".join([str(url) for url in drive_urls]),
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
@ -170,19 +188,20 @@ def test_folder_and_shared_drive(
|
||||
drive_urls = [SHARED_DRIVE_1_URL]
|
||||
folder_urls = [FOLDER_2_URL]
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
include_my_drives=True,
|
||||
shared_drive_urls=",".join([str(url) for url in drive_urls]),
|
||||
include_my_drives=False,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=",".join([str(url) for url in folder_urls]),
|
||||
my_drive_emails=None,
|
||||
shared_drive_urls=",".join([str(url) for url in drive_urls]),
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
retrieved_docs.extend(doc_batch)
|
||||
|
||||
expected_file_ids = (
|
||||
ADMIN_FILE_IDS
|
||||
+ ADMIN_FOLDER_3_FILE_IDS
|
||||
+ SHARED_DRIVE_1_FILE_IDS
|
||||
SHARED_DRIVE_1_FILE_IDS
|
||||
+ FOLDER_1_FILE_IDS
|
||||
+ FOLDER_1_1_FILE_IDS
|
||||
+ FOLDER_1_2_FILE_IDS
|
||||
@ -216,10 +235,13 @@ def test_folders_only(
|
||||
FOLDER_1_1_URL,
|
||||
]
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
include_shared_drives=False,
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
include_my_drives=False,
|
||||
shared_drive_urls=",".join([str(url) for url in shared_drive_urls]),
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=",".join([str(url) for url in folder_urls]),
|
||||
my_drive_emails=None,
|
||||
shared_drive_urls=",".join([str(url) for url in shared_drive_urls]),
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
@ -238,37 +260,6 @@ def test_folders_only(
|
||||
)
|
||||
|
||||
|
||||
@patch(
|
||||
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
)
|
||||
def test_specific_emails(
|
||||
mock_get_api_key: MagicMock,
|
||||
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
print("\n\nRunning test_specific_emails")
|
||||
my_drive_emails = [
|
||||
TEST_USER_1_EMAIL,
|
||||
TEST_USER_3_EMAIL,
|
||||
]
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
include_shared_drives=False,
|
||||
include_my_drives=True,
|
||||
my_drive_emails=",".join([str(email) for email in my_drive_emails]),
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
retrieved_docs.extend(doc_batch)
|
||||
|
||||
# No matter who is specified, when using oauth, if include_my_drives is True,
|
||||
# we will get all the files from the admin's My Drive
|
||||
expected_file_ids = ADMIN_FILE_IDS + ADMIN_FOLDER_3_FILE_IDS
|
||||
assert_retrieved_docs_match_expected(
|
||||
retrieved_docs=retrieved_docs,
|
||||
expected_file_ids=expected_file_ids,
|
||||
)
|
||||
|
||||
|
||||
@patch(
|
||||
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
@ -282,9 +273,13 @@ def test_personal_folders_only(
|
||||
FOLDER_3_URL,
|
||||
]
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
include_shared_drives=False,
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
include_my_drives=False,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=",".join([str(url) for url in folder_urls]),
|
||||
my_drive_emails=None,
|
||||
shared_drive_urls=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
@ -5,6 +5,7 @@ from unittest.mock import patch
|
||||
|
||||
from danswer.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from danswer.connectors.models import Document
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FOLDER_URL
|
||||
|
||||
|
||||
@ -18,14 +19,22 @@ def test_google_drive_sections(
|
||||
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
oauth_connector = google_drive_oauth_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=False,
|
||||
include_my_drives=False,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=SECTIONS_FOLDER_URL,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
service_acct_connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=False,
|
||||
include_my_drives=False,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=SECTIONS_FOLDER_URL,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
for connector in [oauth_connector, service_acct_connector]:
|
||||
retrieved_docs: list[Document] = []
|
@ -5,6 +5,7 @@ from unittest.mock import patch
|
||||
|
||||
from danswer.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from danswer.connectors.models import Document
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import (
|
||||
@ -43,8 +44,13 @@ def test_include_all(
|
||||
) -> None:
|
||||
print("\n\nRunning test_include_all")
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
include_my_drives=True,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=None,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
@ -83,8 +89,13 @@ def test_include_shared_drives_only(
|
||||
) -> None:
|
||||
print("\n\nRunning test_include_shared_drives_only")
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
include_my_drives=False,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=None,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
@ -118,8 +129,13 @@ def test_include_my_drives_only(
|
||||
) -> None:
|
||||
print("\n\nRunning test_include_my_drives_only")
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=False,
|
||||
include_my_drives=True,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=None,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
@ -150,9 +166,13 @@ def test_drive_one_only(
|
||||
print("\n\nRunning test_drive_one_only")
|
||||
urls = [SHARED_DRIVE_1_URL]
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
include_shared_drives=True,
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=False,
|
||||
include_my_drives=False,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=None,
|
||||
shared_drive_urls=",".join([str(url) for url in urls]),
|
||||
my_drive_emails=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
@ -183,10 +203,13 @@ def test_folder_and_shared_drive(
|
||||
drive_urls = [SHARED_DRIVE_1_URL]
|
||||
folder_urls = [FOLDER_2_URL]
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
include_shared_drives=True,
|
||||
include_my_drives=True,
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=False,
|
||||
include_my_drives=False,
|
||||
include_files_shared_with_me=False,
|
||||
shared_drive_urls=",".join([str(url) for url in drive_urls]),
|
||||
shared_folder_urls=",".join([str(url) for url in folder_urls]),
|
||||
my_drive_emails=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
@ -194,12 +217,7 @@ def test_folder_and_shared_drive(
|
||||
|
||||
# Should get everything except for the top level files in drive 2
|
||||
expected_file_ids = (
|
||||
ADMIN_FILE_IDS
|
||||
+ ADMIN_FOLDER_3_FILE_IDS
|
||||
+ TEST_USER_1_FILE_IDS
|
||||
+ TEST_USER_2_FILE_IDS
|
||||
+ TEST_USER_3_FILE_IDS
|
||||
+ SHARED_DRIVE_1_FILE_IDS
|
||||
SHARED_DRIVE_1_FILE_IDS
|
||||
+ FOLDER_1_FILE_IDS
|
||||
+ FOLDER_1_1_FILE_IDS
|
||||
+ FOLDER_1_2_FILE_IDS
|
||||
@ -233,10 +251,13 @@ def test_folders_only(
|
||||
FOLDER_1_1_URL,
|
||||
]
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=False,
|
||||
include_my_drives=False,
|
||||
include_files_shared_with_me=False,
|
||||
shared_drive_urls=",".join([str(url) for url in shared_drive_urls]),
|
||||
shared_folder_urls=",".join([str(url) for url in folder_urls]),
|
||||
my_drive_emails=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
@ -269,8 +290,12 @@ def test_specific_emails(
|
||||
TEST_USER_3_EMAIL,
|
||||
]
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=False,
|
||||
include_my_drives=True,
|
||||
include_my_drives=False,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=None,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=",".join([str(email) for email in my_drive_emails]),
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
@ -293,42 +318,17 @@ def get_specific_folders_in_my_drive(
|
||||
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
print("\n\nRunning get_specific_folders_in_my_drive")
|
||||
my_drive_emails = [
|
||||
TEST_USER_1_EMAIL,
|
||||
TEST_USER_3_EMAIL,
|
||||
]
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
include_shared_drives=False,
|
||||
include_my_drives=True,
|
||||
my_drive_emails=",".join([str(email) for email in my_drive_emails]),
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
retrieved_docs.extend(doc_batch)
|
||||
|
||||
expected_file_ids = TEST_USER_1_FILE_IDS + TEST_USER_3_FILE_IDS
|
||||
assert_retrieved_docs_match_expected(
|
||||
retrieved_docs=retrieved_docs,
|
||||
expected_file_ids=expected_file_ids,
|
||||
)
|
||||
|
||||
|
||||
@patch(
|
||||
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
)
|
||||
def test_personal_folders_only(
|
||||
mock_get_api_key: MagicMock,
|
||||
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
print("\n\nRunning test_personal_folders_only")
|
||||
folder_urls = [
|
||||
FOLDER_3_URL,
|
||||
]
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=False,
|
||||
include_my_drives=False,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=",".join([str(url) for url in folder_urls]),
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
@ -126,8 +126,13 @@ def test_all_permissions(
|
||||
google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
google_drive_connector = google_drive_service_acct_connector_factory(
|
||||
primary_admin_email=ADMIN_EMAIL,
|
||||
include_shared_drives=True,
|
||||
include_my_drives=True,
|
||||
include_files_shared_with_me=False,
|
||||
shared_folder_urls=None,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
|
||||
access_map: dict[str, ExternalAccess] = {}
|
218
backend/tests/daily/connectors/google_drive/test_user_1_oauth.py
Normal file
218
backend/tests/daily/connectors/google_drive/test_user_1_oauth.py
Normal file
@ -0,0 +1,218 @@
|
||||
import time
|
||||
from collections.abc import Callable
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import patch
|
||||
|
||||
from danswer.connectors.google_drive.connector import GoogleDriveConnector
|
||||
from danswer.connectors.models import Document
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import (
|
||||
assert_retrieved_docs_match_expected,
|
||||
)
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_URL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_3_URL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_EMAIL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import TEST_USER_1_FILE_IDS
|
||||
|
||||
|
||||
@patch(
|
||||
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
)
|
||||
def test_all(
|
||||
mock_get_api_key: MagicMock,
|
||||
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
print("\n\nRunning test_all")
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
primary_admin_email=TEST_USER_1_EMAIL,
|
||||
include_files_shared_with_me=True,
|
||||
include_shared_drives=True,
|
||||
include_my_drives=True,
|
||||
shared_folder_urls=None,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
retrieved_docs.extend(doc_batch)
|
||||
|
||||
expected_file_ids = (
|
||||
# These are the files from my drive
|
||||
TEST_USER_1_FILE_IDS
|
||||
# These are the files from shared drives
|
||||
+ SHARED_DRIVE_1_FILE_IDS
|
||||
+ FOLDER_1_FILE_IDS
|
||||
+ FOLDER_1_1_FILE_IDS
|
||||
+ FOLDER_1_2_FILE_IDS
|
||||
# These are the files shared with me from admin
|
||||
+ ADMIN_FOLDER_3_FILE_IDS
|
||||
+ list(range(0, 2))
|
||||
)
|
||||
assert_retrieved_docs_match_expected(
|
||||
retrieved_docs=retrieved_docs,
|
||||
expected_file_ids=expected_file_ids,
|
||||
)
|
||||
|
||||
|
||||
@patch(
|
||||
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
)
|
||||
def test_shared_drives_only(
|
||||
mock_get_api_key: MagicMock,
|
||||
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
print("\n\nRunning test_shared_drives_only")
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
primary_admin_email=TEST_USER_1_EMAIL,
|
||||
include_files_shared_with_me=False,
|
||||
include_shared_drives=True,
|
||||
include_my_drives=False,
|
||||
shared_folder_urls=None,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
retrieved_docs.extend(doc_batch)
|
||||
|
||||
expected_file_ids = (
|
||||
# These are the files from shared drives
|
||||
SHARED_DRIVE_1_FILE_IDS
|
||||
+ FOLDER_1_FILE_IDS
|
||||
+ FOLDER_1_1_FILE_IDS
|
||||
+ FOLDER_1_2_FILE_IDS
|
||||
)
|
||||
assert_retrieved_docs_match_expected(
|
||||
retrieved_docs=retrieved_docs,
|
||||
expected_file_ids=expected_file_ids,
|
||||
)
|
||||
|
||||
|
||||
@patch(
|
||||
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
)
|
||||
def test_shared_with_me_only(
|
||||
mock_get_api_key: MagicMock,
|
||||
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
print("\n\nRunning test_shared_with_me_only")
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
primary_admin_email=TEST_USER_1_EMAIL,
|
||||
include_files_shared_with_me=True,
|
||||
include_shared_drives=False,
|
||||
include_my_drives=False,
|
||||
shared_folder_urls=None,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
retrieved_docs.extend(doc_batch)
|
||||
|
||||
expected_file_ids = (
|
||||
# These are the files shared with me from admin
|
||||
ADMIN_FOLDER_3_FILE_IDS
|
||||
+ list(range(0, 2))
|
||||
)
|
||||
assert_retrieved_docs_match_expected(
|
||||
retrieved_docs=retrieved_docs,
|
||||
expected_file_ids=expected_file_ids,
|
||||
)
|
||||
|
||||
|
||||
@patch(
|
||||
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
)
|
||||
def test_my_drive_only(
|
||||
mock_get_api_key: MagicMock,
|
||||
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
print("\n\nRunning test_my_drive_only")
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
primary_admin_email=TEST_USER_1_EMAIL,
|
||||
include_files_shared_with_me=False,
|
||||
include_shared_drives=False,
|
||||
include_my_drives=True,
|
||||
shared_folder_urls=None,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
retrieved_docs.extend(doc_batch)
|
||||
|
||||
# These are the files from my drive
|
||||
expected_file_ids = TEST_USER_1_FILE_IDS
|
||||
assert_retrieved_docs_match_expected(
|
||||
retrieved_docs=retrieved_docs,
|
||||
expected_file_ids=expected_file_ids,
|
||||
)
|
||||
|
||||
|
||||
@patch(
|
||||
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
)
|
||||
def test_shared_my_drive_folder(
|
||||
mock_get_api_key: MagicMock,
|
||||
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
print("\n\nRunning test_shared_my_drive_folder")
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
primary_admin_email=TEST_USER_1_EMAIL,
|
||||
include_files_shared_with_me=False,
|
||||
include_shared_drives=False,
|
||||
include_my_drives=True,
|
||||
shared_folder_urls=FOLDER_3_URL,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
retrieved_docs.extend(doc_batch)
|
||||
|
||||
expected_file_ids = (
|
||||
# this is a folder from admin's drive that is shared with me
|
||||
ADMIN_FOLDER_3_FILE_IDS
|
||||
)
|
||||
assert_retrieved_docs_match_expected(
|
||||
retrieved_docs=retrieved_docs,
|
||||
expected_file_ids=expected_file_ids,
|
||||
)
|
||||
|
||||
|
||||
@patch(
|
||||
"danswer.file_processing.extract_file_text.get_unstructured_api_key",
|
||||
return_value=None,
|
||||
)
|
||||
def test_shared_drive_folder(
|
||||
mock_get_api_key: MagicMock,
|
||||
google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector],
|
||||
) -> None:
|
||||
print("\n\nRunning test_shared_drive_folder")
|
||||
connector = google_drive_oauth_connector_factory(
|
||||
primary_admin_email=TEST_USER_1_EMAIL,
|
||||
include_files_shared_with_me=False,
|
||||
include_shared_drives=False,
|
||||
include_my_drives=True,
|
||||
shared_folder_urls=FOLDER_1_URL,
|
||||
shared_drive_urls=None,
|
||||
my_drive_emails=None,
|
||||
)
|
||||
retrieved_docs: list[Document] = []
|
||||
for doc_batch in connector.poll_source(0, time.time()):
|
||||
retrieved_docs.extend(doc_batch)
|
||||
|
||||
expected_file_ids = FOLDER_1_FILE_IDS + FOLDER_1_1_FILE_IDS + FOLDER_1_2_FILE_IDS
|
||||
assert_retrieved_docs_match_expected(
|
||||
retrieved_docs=retrieved_docs,
|
||||
expected_file_ids=expected_file_ids,
|
||||
)
|
@ -1,20 +1,13 @@
|
||||
import React, { Dispatch, FC, SetStateAction, useState } from "react";
|
||||
import CredentialSubText, {
|
||||
AdminBooleanFormField,
|
||||
} from "@/components/credentials/CredentialFields";
|
||||
import { FileUpload } from "@/components/admin/connectors/FileUpload";
|
||||
import CredentialSubText from "@/components/credentials/CredentialFields";
|
||||
import { ConnectionConfiguration } from "@/lib/connectors/connectors";
|
||||
import SelectInput from "./ConnectorInput/SelectInput";
|
||||
import NumberInput from "./ConnectorInput/NumberInput";
|
||||
import { TextFormField } from "@/components/admin/connectors/Field";
|
||||
import ListInput from "./ConnectorInput/ListInput";
|
||||
import FileInput from "./ConnectorInput/FileInput";
|
||||
import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle";
|
||||
import { AccessTypeForm } from "@/components/admin/connectors/AccessTypeForm";
|
||||
import { AccessTypeGroupSelector } from "@/components/admin/connectors/AccessTypeGroupSelector";
|
||||
import { ConfigurableSources } from "@/lib/types";
|
||||
import { Credential } from "@/lib/connectors/credentials";
|
||||
import CollapsibleSection from "@/app/admin/assistants/CollapsibleSection";
|
||||
import { RenderField } from "./FieldRendering";
|
||||
|
||||
export interface DynamicConnectionFormProps {
|
||||
config: ConnectionConfiguration;
|
||||
@ -25,105 +18,6 @@ export interface DynamicConnectionFormProps {
|
||||
currentCredential: Credential<any> | null;
|
||||
}
|
||||
|
||||
interface RenderFieldProps {
|
||||
field: any;
|
||||
values: any;
|
||||
selectedFiles: File[];
|
||||
setSelectedFiles: Dispatch<SetStateAction<File[]>>;
|
||||
connector: ConfigurableSources;
|
||||
currentCredential: Credential<any> | null;
|
||||
}
|
||||
|
||||
const RenderField: FC<RenderFieldProps> = ({
|
||||
field,
|
||||
values,
|
||||
selectedFiles,
|
||||
setSelectedFiles,
|
||||
connector,
|
||||
currentCredential,
|
||||
}) => {
|
||||
if (
|
||||
field.visibleCondition &&
|
||||
!field.visibleCondition(values, currentCredential)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const label =
|
||||
typeof field.label === "function"
|
||||
? field.label(currentCredential)
|
||||
: field.label;
|
||||
const description =
|
||||
typeof field.description === "function"
|
||||
? field.description(currentCredential)
|
||||
: field.description;
|
||||
|
||||
const fieldContent = (
|
||||
<>
|
||||
{field.type === "file" ? (
|
||||
<FileUpload
|
||||
name={field.name}
|
||||
selectedFiles={selectedFiles}
|
||||
setSelectedFiles={setSelectedFiles}
|
||||
/>
|
||||
) : field.type === "zip" ? (
|
||||
<FileInput
|
||||
name={field.name}
|
||||
label={label}
|
||||
optional={field.optional}
|
||||
description={description}
|
||||
/>
|
||||
) : field.type === "list" ? (
|
||||
<ListInput name={field.name} label={label} description={description} />
|
||||
) : field.type === "select" ? (
|
||||
<SelectInput
|
||||
name={field.name}
|
||||
optional={field.optional}
|
||||
description={description}
|
||||
options={field.options || []}
|
||||
label={label}
|
||||
/>
|
||||
) : field.type === "number" ? (
|
||||
<NumberInput
|
||||
label={label}
|
||||
optional={field.optional}
|
||||
description={description}
|
||||
name={field.name}
|
||||
/>
|
||||
) : field.type === "checkbox" ? (
|
||||
<AdminBooleanFormField
|
||||
checked={values[field.name]}
|
||||
subtext={description}
|
||||
name={field.name}
|
||||
label={label}
|
||||
/>
|
||||
) : (
|
||||
<TextFormField
|
||||
subtext={description}
|
||||
optional={field.optional}
|
||||
type={field.type}
|
||||
label={label}
|
||||
name={field.name}
|
||||
isTextArea={true}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
|
||||
if (
|
||||
field.visibleCondition &&
|
||||
field.visibleCondition(values, currentCredential)
|
||||
) {
|
||||
return (
|
||||
<CollapsibleSection prompt={label} key={field.name}>
|
||||
{fieldContent}
|
||||
</CollapsibleSection>
|
||||
);
|
||||
} else {
|
||||
return <div key={field.name}>{fieldContent}</div>;
|
||||
}
|
||||
};
|
||||
|
||||
const DynamicConnectionForm: FC<DynamicConnectionFormProps> = ({
|
||||
config,
|
||||
selectedFiles,
|
||||
@ -136,14 +30,12 @@ const DynamicConnectionForm: FC<DynamicConnectionFormProps> = ({
|
||||
|
||||
return (
|
||||
<>
|
||||
<h2 className="text-2xl font-bold text-text-800">{config.description}</h2>
|
||||
|
||||
{config.subtext && (
|
||||
<CredentialSubText>{config.subtext}</CredentialSubText>
|
||||
)}
|
||||
|
||||
<TextFormField
|
||||
subtext="A descriptive name for the connector. This will be used to identify the connector in the Admin UI."
|
||||
subtext="A descriptive name for the connector."
|
||||
type={"text"}
|
||||
label={"Connector Name"}
|
||||
name={"name"}
|
||||
@ -174,17 +66,20 @@ const DynamicConnectionForm: FC<DynamicConnectionFormProps> = ({
|
||||
setShowAdvancedOptions={setShowAdvancedOptions}
|
||||
/>
|
||||
{showAdvancedOptions &&
|
||||
config.advanced_values.map((field) => (
|
||||
<RenderField
|
||||
key={field.name}
|
||||
field={field}
|
||||
values={values}
|
||||
selectedFiles={selectedFiles}
|
||||
setSelectedFiles={setSelectedFiles}
|
||||
connector={connector}
|
||||
currentCredential={currentCredential}
|
||||
/>
|
||||
))}
|
||||
config.advanced_values.map(
|
||||
(field) =>
|
||||
!field.hidden && (
|
||||
<RenderField
|
||||
key={field.name}
|
||||
field={field}
|
||||
values={values}
|
||||
selectedFiles={selectedFiles}
|
||||
setSelectedFiles={setSelectedFiles}
|
||||
connector={connector}
|
||||
currentCredential={currentCredential}
|
||||
/>
|
||||
)
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
|
@ -0,0 +1,224 @@
|
||||
import React, { Dispatch, FC, SetStateAction } from "react";
|
||||
import { AdminBooleanFormField } from "@/components/credentials/CredentialFields";
|
||||
import { FileUpload } from "@/components/admin/connectors/FileUpload";
|
||||
import { TabOption } from "@/lib/connectors/connectors";
|
||||
import SelectInput from "./ConnectorInput/SelectInput";
|
||||
import NumberInput from "./ConnectorInput/NumberInput";
|
||||
import { TextFormField } from "@/components/admin/connectors/Field";
|
||||
import ListInput from "./ConnectorInput/ListInput";
|
||||
import FileInput from "./ConnectorInput/FileInput";
|
||||
import { ConfigurableSources } from "@/lib/types";
|
||||
import { Credential } from "@/lib/connectors/credentials";
|
||||
import CollapsibleSection from "@/app/admin/assistants/CollapsibleSection";
|
||||
import {
|
||||
Tabs,
|
||||
TabsContent,
|
||||
TabsList,
|
||||
TabsTrigger,
|
||||
} from "@/components/ui/fully_wrapped_tabs";
|
||||
|
||||
interface TabsFieldProps {
|
||||
tabField: TabOption;
|
||||
values: any;
|
||||
selectedFiles: File[];
|
||||
setSelectedFiles: Dispatch<SetStateAction<File[]>>;
|
||||
connector: ConfigurableSources;
|
||||
currentCredential: Credential<any> | null;
|
||||
}
|
||||
|
||||
const TabsField: FC<TabsFieldProps> = ({
|
||||
tabField,
|
||||
values,
|
||||
selectedFiles,
|
||||
setSelectedFiles,
|
||||
connector,
|
||||
currentCredential,
|
||||
}) => {
|
||||
return (
|
||||
<div className="w-full">
|
||||
{tabField.label && (
|
||||
<div className="mb-4">
|
||||
<h3 className="text-lg font-semibold">
|
||||
{typeof tabField.label === "function"
|
||||
? tabField.label(currentCredential)
|
||||
: tabField.label}
|
||||
</h3>
|
||||
{tabField.description && (
|
||||
<p className="text-sm text-muted-foreground mt-1">
|
||||
{typeof tabField.description === "function"
|
||||
? tabField.description(currentCredential)
|
||||
: tabField.description}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<Tabs
|
||||
defaultValue={tabField.tabs[0].value}
|
||||
className="w-full"
|
||||
onValueChange={(newTab) => {
|
||||
// Clear values from other tabs but preserve defaults
|
||||
tabField.tabs.forEach((tab) => {
|
||||
if (tab.value !== newTab) {
|
||||
tab.fields.forEach((field) => {
|
||||
// Only clear if not default value
|
||||
if (values[field.name] !== field.default) {
|
||||
values[field.name] = field.default;
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}}
|
||||
>
|
||||
<TabsList>
|
||||
{tabField.tabs.map((tab) => (
|
||||
<TabsTrigger key={tab.value} value={tab.value}>
|
||||
{tab.label}
|
||||
</TabsTrigger>
|
||||
))}
|
||||
</TabsList>
|
||||
{tabField.tabs.map((tab) => (
|
||||
<TabsContent key={tab.value} value={tab.value} className="">
|
||||
{tab.fields.map((subField, index, array) => {
|
||||
// Check visibility condition first
|
||||
if (
|
||||
subField.visibleCondition &&
|
||||
!subField.visibleCondition(values, currentCredential)
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
key={subField.name}
|
||||
className={
|
||||
index < array.length - 1 && subField.type !== "string_tab"
|
||||
? "mb-4"
|
||||
: ""
|
||||
}
|
||||
>
|
||||
<RenderField
|
||||
key={subField.name}
|
||||
field={subField}
|
||||
values={values}
|
||||
selectedFiles={selectedFiles}
|
||||
setSelectedFiles={setSelectedFiles}
|
||||
connector={connector}
|
||||
currentCredential={currentCredential}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</TabsContent>
|
||||
))}
|
||||
</Tabs>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
interface RenderFieldProps {
|
||||
field: any;
|
||||
values: any;
|
||||
selectedFiles: File[];
|
||||
setSelectedFiles: Dispatch<SetStateAction<File[]>>;
|
||||
connector: ConfigurableSources;
|
||||
currentCredential: Credential<any> | null;
|
||||
}
|
||||
|
||||
export const RenderField: FC<RenderFieldProps> = ({
|
||||
field,
|
||||
values,
|
||||
selectedFiles,
|
||||
setSelectedFiles,
|
||||
connector,
|
||||
currentCredential,
|
||||
}) => {
|
||||
const label =
|
||||
typeof field.label === "function"
|
||||
? field.label(currentCredential)
|
||||
: field.label;
|
||||
const description =
|
||||
typeof field.description === "function"
|
||||
? field.description(currentCredential)
|
||||
: field.description;
|
||||
|
||||
if (field.type === "tab") {
|
||||
return (
|
||||
<TabsField
|
||||
tabField={field}
|
||||
values={values}
|
||||
selectedFiles={selectedFiles}
|
||||
setSelectedFiles={setSelectedFiles}
|
||||
connector={connector}
|
||||
currentCredential={currentCredential}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
const fieldContent = (
|
||||
<>
|
||||
{field.type === "file" ? (
|
||||
<FileUpload
|
||||
name={field.name}
|
||||
selectedFiles={selectedFiles}
|
||||
setSelectedFiles={setSelectedFiles}
|
||||
/>
|
||||
) : field.type === "zip" ? (
|
||||
<FileInput
|
||||
name={field.name}
|
||||
label={label}
|
||||
optional={field.optional}
|
||||
description={description}
|
||||
/>
|
||||
) : field.type === "list" ? (
|
||||
<ListInput name={field.name} label={label} description={description} />
|
||||
) : field.type === "select" ? (
|
||||
<SelectInput
|
||||
name={field.name}
|
||||
optional={field.optional}
|
||||
description={description}
|
||||
options={field.options || []}
|
||||
label={label}
|
||||
/>
|
||||
) : field.type === "number" ? (
|
||||
<NumberInput
|
||||
label={label}
|
||||
optional={field.optional}
|
||||
description={description}
|
||||
name={field.name}
|
||||
/>
|
||||
) : field.type === "checkbox" ? (
|
||||
<AdminBooleanFormField
|
||||
checked={values[field.name]}
|
||||
subtext={description}
|
||||
name={field.name}
|
||||
label={label}
|
||||
/>
|
||||
) : field.type === "text" ? (
|
||||
<TextFormField
|
||||
subtext={description}
|
||||
optional={field.optional}
|
||||
type={field.type}
|
||||
label={label}
|
||||
name={field.name}
|
||||
isTextArea={field.isTextArea || false}
|
||||
defaultHeight={"h-15"}
|
||||
/>
|
||||
) : field.type === "string_tab" ? (
|
||||
<div className="text-center">{description}</div>
|
||||
) : (
|
||||
<>INVALID FIELD TYPE</>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
|
||||
if (field.wrapInCollapsible) {
|
||||
return (
|
||||
<CollapsibleSection prompt={label} key={field.name}>
|
||||
{fieldContent}
|
||||
</CollapsibleSection>
|
||||
);
|
||||
}
|
||||
|
||||
return <div key={field.name}>{fieldContent}</div>;
|
||||
};
|
@ -1,4 +1,4 @@
|
||||
import { ErrorMessage, Field } from "formik";
|
||||
import { ErrorMessage, Field, useField } from "formik";
|
||||
|
||||
import {
|
||||
ExplanationText,
|
||||
@ -96,18 +96,18 @@ export function AdminTextField({
|
||||
name={name}
|
||||
id={name}
|
||||
className={`
|
||||
${small && "text-sm"}
|
||||
border
|
||||
border-border
|
||||
rounded
|
||||
w-full
|
||||
bg-input
|
||||
py-2
|
||||
px-3
|
||||
mt-1
|
||||
${heightString}
|
||||
${fontSize}
|
||||
${isCode ? " font-mono" : ""}
|
||||
${small && "text-sm"}
|
||||
border
|
||||
border-border
|
||||
rounded
|
||||
w-full
|
||||
bg-input
|
||||
py-2
|
||||
px-3
|
||||
mt-1
|
||||
${heightString}
|
||||
${fontSize}
|
||||
${isCode ? " font-mono" : ""}
|
||||
`}
|
||||
disabled={disabled}
|
||||
placeholder={placeholder}
|
||||
@ -143,13 +143,18 @@ export const AdminBooleanFormField = ({
|
||||
alignTop,
|
||||
onChange,
|
||||
}: BooleanFormFieldProps) => {
|
||||
const [field, meta, helpers] = useField(name);
|
||||
|
||||
return (
|
||||
<div>
|
||||
<label className={`flex text-sm`}>
|
||||
<Field
|
||||
name={name}
|
||||
checked={checked}
|
||||
<input
|
||||
type="checkbox"
|
||||
{...field}
|
||||
checked={Boolean(field.value)}
|
||||
onChange={(e) => {
|
||||
helpers.setValue(e.target.checked);
|
||||
}}
|
||||
className={`mr-3 bg-white px-5 w-3.5 h-3.5 ${
|
||||
alignTop ? "mt-1" : "my-auto"
|
||||
}`}
|
||||
|
94
web/src/components/ui/fully_wrapped_tabs.tsx
Normal file
94
web/src/components/ui/fully_wrapped_tabs.tsx
Normal file
@ -0,0 +1,94 @@
|
||||
"use client";
|
||||
|
||||
import * as React from "react";
|
||||
import * as TabsPrimitive from "@radix-ui/react-tabs";
|
||||
|
||||
import { cn } from "@/lib/utils";
|
||||
|
||||
const Tabs = TabsPrimitive.Root;
|
||||
|
||||
const TabsList = React.forwardRef<
|
||||
React.ElementRef<typeof TabsPrimitive.List>,
|
||||
React.ComponentPropsWithoutRef<typeof TabsPrimitive.List>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<TabsPrimitive.List
|
||||
ref={ref}
|
||||
className={cn(
|
||||
[
|
||||
"inline-flex",
|
||||
"flex w-full",
|
||||
"items-center",
|
||||
"justify-center",
|
||||
"bg-background-150",
|
||||
"text-text-500",
|
||||
"dark:bg-background-800",
|
||||
"dark:text-text-400",
|
||||
"rounded-t-lg",
|
||||
].join(" "),
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
));
|
||||
TabsList.displayName = TabsPrimitive.List.displayName;
|
||||
|
||||
const TabsTrigger = React.forwardRef<
|
||||
React.ElementRef<typeof TabsPrimitive.Trigger>,
|
||||
React.ComponentPropsWithoutRef<typeof TabsPrimitive.Trigger>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<TabsPrimitive.Trigger
|
||||
ref={ref}
|
||||
className={cn(
|
||||
[
|
||||
"relative",
|
||||
"justify-center",
|
||||
"flex w-full",
|
||||
"border-b",
|
||||
"data-[state=active]:border-t",
|
||||
"data-[state=active]:border-l",
|
||||
"data-[state=active]:border-r",
|
||||
"data-[state=active]:border-b-0",
|
||||
"p-2",
|
||||
"data-[state=active]:bg-white",
|
||||
"data-[state=active]:rounded-t-lg",
|
||||
"data-[state=active]:shadow-[3px_-3px_6px_-3px_rgba(0,0,0,0.15)]",
|
||||
].join(" "),
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
));
|
||||
TabsTrigger.displayName = TabsPrimitive.Trigger.displayName;
|
||||
|
||||
const TabsContent = React.forwardRef<
|
||||
React.ElementRef<typeof TabsPrimitive.Content>,
|
||||
React.ComponentPropsWithoutRef<typeof TabsPrimitive.Content>
|
||||
>(({ className, ...props }, ref) => (
|
||||
<TabsPrimitive.Content
|
||||
ref={ref}
|
||||
className={cn(
|
||||
[
|
||||
"mt-2",
|
||||
"ring-offset-background",
|
||||
"focus-visible:outline-none",
|
||||
"focus-visible:ring-2",
|
||||
"focus-visible:ring-text-950",
|
||||
"focus-visible:ring-offset-2",
|
||||
"dark:ring-offset-background-950",
|
||||
"dark:focus-visible:ring-text-300",
|
||||
"border-l",
|
||||
"border-r",
|
||||
"border-b",
|
||||
"px-6 pt-6 pb-3",
|
||||
"-mt-px",
|
||||
"rounded-b-lg",
|
||||
"shadow-[3px_-4px_6px_-3px_rgba(0,0,0,0.15)]",
|
||||
].join(" "),
|
||||
className
|
||||
)}
|
||||
{...props}
|
||||
/>
|
||||
));
|
||||
TabsContent.displayName = TabsPrimitive.Content.displayName;
|
||||
|
||||
export { Tabs, TabsList, TabsTrigger, TabsContent };
|
@ -42,6 +42,7 @@ export interface Option {
|
||||
values: any,
|
||||
currentCredential: Credential<any> | null
|
||||
) => boolean;
|
||||
wrapInCollapsible?: boolean;
|
||||
}
|
||||
|
||||
export interface SelectOption extends Option {
|
||||
@ -59,6 +60,7 @@ export interface ListOption extends Option {
|
||||
export interface TextOption extends Option {
|
||||
type: "text";
|
||||
default?: string;
|
||||
isTextArea?: boolean;
|
||||
}
|
||||
|
||||
export interface NumberOption extends Option {
|
||||
@ -81,6 +83,31 @@ export interface ZipOption extends Option {
|
||||
default?: string;
|
||||
}
|
||||
|
||||
export interface StringTabOption extends Option {
|
||||
type: "string_tab";
|
||||
default?: string;
|
||||
}
|
||||
|
||||
export interface TabOption extends Option {
|
||||
type: "tab";
|
||||
defaultTab?: string;
|
||||
tabs: {
|
||||
label: string;
|
||||
value: string;
|
||||
fields: (
|
||||
| BooleanOption
|
||||
| ListOption
|
||||
| TextOption
|
||||
| NumberOption
|
||||
| SelectOption
|
||||
| FileOption
|
||||
| ZipOption
|
||||
| StringTabOption
|
||||
)[];
|
||||
}[];
|
||||
default?: [];
|
||||
}
|
||||
|
||||
export interface ConnectionConfiguration {
|
||||
description: string;
|
||||
subtext?: string;
|
||||
@ -92,6 +119,7 @@ export interface ConnectionConfiguration {
|
||||
| SelectOption
|
||||
| FileOption
|
||||
| ZipOption
|
||||
| TabOption
|
||||
)[];
|
||||
advanced_values: (
|
||||
| BooleanOption
|
||||
@ -101,6 +129,7 @@ export interface ConnectionConfiguration {
|
||||
| SelectOption
|
||||
| FileOption
|
||||
| ZipOption
|
||||
| TabOption
|
||||
)[];
|
||||
overrideDefaultFreq?: number;
|
||||
}
|
||||
@ -211,64 +240,96 @@ export const connectorConfigs: Record<
|
||||
description: "Configure Google Drive connector",
|
||||
values: [
|
||||
{
|
||||
type: "checkbox",
|
||||
label: "Include shared drives?",
|
||||
description:
|
||||
"This will allow Danswer to index everything in your shared drives.",
|
||||
name: "include_shared_drives",
|
||||
optional: true,
|
||||
default: true,
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
description: (currentCredential) => {
|
||||
return currentCredential?.credential_json?.google_tokens
|
||||
? "If you are a non-admin user authenticated using Google Oauth, you will need to specify the URLs for the shared drives you would like to index. Leaving this blank will NOT index any shared drives."
|
||||
: "Enter a comma separated list of the URLs for the shared drive you would like to index. Leave this blank to index all shared drives.";
|
||||
},
|
||||
label: "Shared Drive URLs",
|
||||
name: "shared_drive_urls",
|
||||
visibleCondition: (values) => values.include_shared_drives,
|
||||
optional: true,
|
||||
},
|
||||
{
|
||||
type: "checkbox",
|
||||
label: (currentCredential) => {
|
||||
return currentCredential?.credential_json?.google_tokens
|
||||
? "Include My Drive?"
|
||||
: "Include Everyone's My Drive?";
|
||||
},
|
||||
description: (currentCredential) => {
|
||||
return currentCredential?.credential_json?.google_tokens
|
||||
? "This will allow Danswer to index everything in your My Drive."
|
||||
: "This will allow Danswer to index everything in everyone's My Drives.";
|
||||
},
|
||||
name: "include_my_drives",
|
||||
optional: true,
|
||||
default: true,
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
description:
|
||||
"Enter a comma separated list of the emails of the users whose MyDrive you want to index. Leave blank to index all MyDrives.",
|
||||
label: "My Drive Emails",
|
||||
name: "my_drive_emails",
|
||||
visibleCondition: (values, currentCredential) =>
|
||||
values.include_my_drives &&
|
||||
!currentCredential?.credential_json?.google_tokens,
|
||||
optional: true,
|
||||
},
|
||||
],
|
||||
advanced_values: [
|
||||
{
|
||||
type: "text",
|
||||
description:
|
||||
"Enter a comma separated list of the URLs of any folders you would like to index. The files located in these folders (and all subfolders) will be indexed. Note: This will be in addition to whatever settings you have selected above, so leave those blank if you only want to index the folders specified here.",
|
||||
label: "Folder URLs",
|
||||
name: "shared_folder_urls",
|
||||
type: "tab",
|
||||
name: "indexing_scope",
|
||||
label: "How should we index your Google Drive?",
|
||||
optional: true,
|
||||
tabs: [
|
||||
{
|
||||
value: "general",
|
||||
label: "General",
|
||||
fields: [
|
||||
{
|
||||
type: "checkbox",
|
||||
label: "Include shared drives?",
|
||||
description: (currentCredential) => {
|
||||
return currentCredential?.credential_json?.google_tokens
|
||||
? "This will allow Danswer to index everything in the shared drives you have access to."
|
||||
: "This will allow Danswer to index everything in your Organization's shared drives.";
|
||||
},
|
||||
name: "include_shared_drives",
|
||||
default: false,
|
||||
},
|
||||
{
|
||||
type: "checkbox",
|
||||
label: (currentCredential) => {
|
||||
return currentCredential?.credential_json?.google_tokens
|
||||
? "Include My Drive?"
|
||||
: "Include Everyone's My Drive?";
|
||||
},
|
||||
description: (currentCredential) => {
|
||||
return currentCredential?.credential_json?.google_tokens
|
||||
? "This will allow Danswer to index everything in your My Drive."
|
||||
: "This will allow Danswer to index everything in everyone's My Drives.";
|
||||
},
|
||||
name: "include_my_drives",
|
||||
default: false,
|
||||
},
|
||||
{
|
||||
type: "checkbox",
|
||||
description:
|
||||
"This will allow Danswer to index all files shared with you.",
|
||||
label: "Include All Files Shared With You?",
|
||||
name: "include_files_shared_with_me",
|
||||
visibleCondition: (values, currentCredential) =>
|
||||
currentCredential?.credential_json?.google_tokens,
|
||||
default: false,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
value: "specific",
|
||||
label: "Specific",
|
||||
fields: [
|
||||
{
|
||||
type: "text",
|
||||
description: (currentCredential) => {
|
||||
return currentCredential?.credential_json?.google_tokens
|
||||
? "Enter a comma separated list of the URLs for the shared drive you would like to index. You must have access to these shared drives."
|
||||
: "Enter a comma separated list of the URLs for the shared drive you would like to index.";
|
||||
},
|
||||
label: "Shared Drive URLs",
|
||||
name: "shared_drive_urls",
|
||||
default: "",
|
||||
isTextArea: true,
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
description:
|
||||
"Enter a comma separated list of the URLs of any folders you would like to index. The files located in these folders (and all subfolders) will be indexed.",
|
||||
label: "Folder URLs",
|
||||
name: "shared_folder_urls",
|
||||
default: "",
|
||||
isTextArea: true,
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
description:
|
||||
"Enter a comma separated list of the emails of the users whose MyDrive you want to index.",
|
||||
label: "My Drive Emails",
|
||||
name: "my_drive_emails",
|
||||
visibleCondition: (values, currentCredential) =>
|
||||
!currentCredential?.credential_json?.google_tokens,
|
||||
default: "",
|
||||
isTextArea: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
defaultTab: "space",
|
||||
},
|
||||
],
|
||||
advanced_values: [],
|
||||
},
|
||||
gmail: {
|
||||
description: "Configure Gmail connector",
|
||||
@ -282,26 +343,7 @@ export const connectorConfigs: Record<
|
||||
},
|
||||
confluence: {
|
||||
description: "Configure Confluence connector",
|
||||
subtext: `Specify the base URL of your Confluence instance, the space name, and optionally a specific page ID to index. If no page ID is provided, the entire space will be indexed. If no space is specified, all available Confluence spaces will be indexed.`,
|
||||
values: [
|
||||
{
|
||||
type: "text",
|
||||
query: "Enter the wiki base URL:",
|
||||
label: "Wiki Base URL",
|
||||
name: "wiki_base",
|
||||
optional: false,
|
||||
description:
|
||||
"The base URL of your Confluence instance (e.g., https://your-domain.atlassian.net/wiki)",
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
query: "Enter the space:",
|
||||
label: "Space",
|
||||
name: "space",
|
||||
optional: true,
|
||||
description:
|
||||
"The Confluence space name to index (e.g. `KB`). If no space is specified, all available Confluence spaces will be indexed.",
|
||||
},
|
||||
{
|
||||
type: "checkbox",
|
||||
query: "Is this a Confluence Cloud instance?",
|
||||
@ -312,36 +354,92 @@ export const connectorConfigs: Record<
|
||||
description:
|
||||
"Check if this is a Confluence Cloud instance, uncheck for Confluence Server/Data Center",
|
||||
},
|
||||
],
|
||||
advanced_values: [
|
||||
{
|
||||
type: "text",
|
||||
query: "Enter the page ID (optional):",
|
||||
label: "Page ID",
|
||||
name: "page_id",
|
||||
optional: true,
|
||||
description:
|
||||
"Specific page ID to index - leave empty to index the entire space (e.g. `131368`)",
|
||||
},
|
||||
{
|
||||
type: "checkbox",
|
||||
query: "Should index pages recursively?",
|
||||
label: "Index Recursively",
|
||||
name: "index_recursively",
|
||||
description:
|
||||
"If this is set and the Wiki Page URL leads to a page, we will index the page and all of its children instead of just the page. This is set by default for Confluence connectors without a page ID specified.",
|
||||
query: "Enter the wiki base URL:",
|
||||
label: "Wiki Base URL",
|
||||
name: "wiki_base",
|
||||
optional: false,
|
||||
description:
|
||||
"The base URL of your Confluence instance (e.g., https://your-domain.atlassian.net/wiki)",
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
query: "Enter the CQL query (optional):",
|
||||
label: "CQL Query",
|
||||
name: "cql_query",
|
||||
type: "tab",
|
||||
name: "indexing_scope",
|
||||
label: "How Should We Index Your Confluence?",
|
||||
optional: true,
|
||||
description:
|
||||
"IMPORTANT: This will overwrite all other selected connector settings (besides Wiki Base URL). We currently only support CQL queries that return objects of type 'page'. This means all CQL queries must contain 'type=page' as the only type filter. It is also important that no filters for 'lastModified' are used as it will cause issues with our connector polling logic. We will still get all attachments and comments for the pages returned by the CQL query. Any 'lastmodified' filters will be overwritten. See https://developer.atlassian.com/server/confluence/advanced-searching-using-cql/ for more details.",
|
||||
tabs: [
|
||||
{
|
||||
value: "everything",
|
||||
label: "Everything",
|
||||
fields: [
|
||||
{
|
||||
type: "string_tab",
|
||||
label: "Everything",
|
||||
name: "everything",
|
||||
description:
|
||||
"This connector will index all pages the provided credentials have access to!",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
value: "space",
|
||||
label: "Space",
|
||||
fields: [
|
||||
{
|
||||
type: "text",
|
||||
query: "Enter the space:",
|
||||
label: "Space Key",
|
||||
name: "space",
|
||||
default: "",
|
||||
description: "The Confluence space key to index (e.g. `KB`).",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
value: "page",
|
||||
label: "Page",
|
||||
fields: [
|
||||
{
|
||||
type: "text",
|
||||
query: "Enter the page ID:",
|
||||
label: "Page ID",
|
||||
name: "page_id",
|
||||
default: "",
|
||||
description: "Specific page ID to index (e.g. `131368`)",
|
||||
},
|
||||
{
|
||||
type: "checkbox",
|
||||
query: "Should index pages recursively?",
|
||||
label: "Index Recursively",
|
||||
name: "index_recursively",
|
||||
description:
|
||||
"If this is set, we will index the page indicated by the Page ID as well as all of its children.",
|
||||
optional: false,
|
||||
default: true,
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
value: "cql",
|
||||
label: "CQL Query",
|
||||
fields: [
|
||||
{
|
||||
type: "text",
|
||||
query: "Enter the CQL query (optional):",
|
||||
label: "CQL Query",
|
||||
name: "cql_query",
|
||||
default: "",
|
||||
description:
|
||||
"IMPORTANT: We currently only support CQL queries that return objects of type 'page'. This means all CQL queries must contain 'type=page' as the only type filter. It is also important that no filters for 'lastModified' are used as it will cause issues with our connector polling logic. We will still get all attachments and comments for the pages returned by the CQL query. Any 'lastmodified' filters will be overwritten. See https://developer.atlassian.com/server/confluence/advanced-searching-using-cql/ for more details.",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
defaultTab: "space",
|
||||
},
|
||||
],
|
||||
advanced_values: [],
|
||||
},
|
||||
jira: {
|
||||
description: "Configure Jira connector",
|
||||
|
Loading…
x
Reference in New Issue
Block a user