mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-07 11:28:09 +02:00
prevent other tests from interfering with existing google drive tests (#3624)
* prevent other tests from interfering with existing google drive tests * cleanup gdrive tests * finished * done
This commit is contained in:
parent
a95f4298ad
commit
4ce24d68f7
@ -123,8 +123,19 @@ SPECIAL_FILE_ID_TO_CONTENT_MAP: dict[int, str] = {
|
||||
file_name_template = "file_{}.txt"
|
||||
file_text_template = "This is file {}"
|
||||
|
||||
# This is done to prevent different tests from interfering with each other
|
||||
# So each test type should have its own valid prefix
|
||||
_VALID_PREFIX = "file_"
|
||||
|
||||
def print_discrepencies(expected: set[str], retrieved: set[str]) -> None:
|
||||
|
||||
def filter_invalid_prefixes(names: set[str]) -> set[str]:
|
||||
return {name for name in names if name.startswith(_VALID_PREFIX)}
|
||||
|
||||
|
||||
def print_discrepencies(
|
||||
expected: set[str],
|
||||
retrieved: set[str],
|
||||
) -> None:
|
||||
if expected != retrieved:
|
||||
print(expected)
|
||||
print(retrieved)
|
||||
@ -134,7 +145,7 @@ def print_discrepencies(expected: set[str], retrieved: set[str]) -> None:
|
||||
print(expected - retrieved)
|
||||
|
||||
|
||||
def get_file_content(file_id: int) -> str:
|
||||
def _get_expected_file_content(file_id: int) -> str:
|
||||
if file_id in SPECIAL_FILE_ID_TO_CONTENT_MAP:
|
||||
return SPECIAL_FILE_ID_TO_CONTENT_MAP[file_id]
|
||||
|
||||
@ -142,25 +153,42 @@ def get_file_content(file_id: int) -> str:
|
||||
|
||||
|
||||
def assert_retrieved_docs_match_expected(
|
||||
retrieved_docs: list[Document], expected_file_ids: Sequence[int]
|
||||
retrieved_docs: list[Document],
|
||||
expected_file_ids: Sequence[int],
|
||||
) -> None:
|
||||
expected_file_names = {
|
||||
file_name_template.format(file_id) for file_id in expected_file_ids
|
||||
}
|
||||
expected_file_texts = {get_file_content(file_id) for file_id in expected_file_ids}
|
||||
expected_file_texts = {
|
||||
_get_expected_file_content(file_id) for file_id in expected_file_ids
|
||||
}
|
||||
|
||||
retrieved_file_names = set([doc.semantic_identifier for doc in retrieved_docs])
|
||||
retrieved_texts = set(
|
||||
# Filter out invalid prefixes to prevent different tests from interfering with each other
|
||||
valid_retrieved_docs = [
|
||||
doc
|
||||
for doc in retrieved_docs
|
||||
if doc.semantic_identifier.startswith(_VALID_PREFIX)
|
||||
]
|
||||
valid_retrieved_file_names = set(
|
||||
[doc.semantic_identifier for doc in valid_retrieved_docs]
|
||||
)
|
||||
valid_retrieved_texts = set(
|
||||
[
|
||||
" - ".join([section.text for section in doc.sections])
|
||||
for doc in retrieved_docs
|
||||
for doc in valid_retrieved_docs
|
||||
]
|
||||
)
|
||||
|
||||
# Check file names
|
||||
print_discrepencies(expected_file_names, retrieved_file_names)
|
||||
assert expected_file_names == retrieved_file_names
|
||||
print_discrepencies(
|
||||
expected=expected_file_names,
|
||||
retrieved=valid_retrieved_file_names,
|
||||
)
|
||||
assert expected_file_names == valid_retrieved_file_names
|
||||
|
||||
# Check file texts
|
||||
print_discrepencies(expected_file_texts, retrieved_texts)
|
||||
assert expected_file_texts == retrieved_texts
|
||||
print_discrepencies(
|
||||
expected=expected_file_texts,
|
||||
retrieved=valid_retrieved_texts,
|
||||
)
|
||||
assert expected_file_texts == valid_retrieved_texts
|
||||
|
@ -15,6 +15,7 @@ from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_EMAIL
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import ADMIN_FOLDER_3_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import file_name_template
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import filter_invalid_prefixes
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_1_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_2_FILE_IDS
|
||||
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_IDS
|
||||
@ -81,9 +82,10 @@ def assert_correct_access_for_user(
|
||||
all_accessible_ids = expected_access_ids + PUBLIC_RANGE
|
||||
expected_file_names = {file_name_template.format(i) for i in all_accessible_ids}
|
||||
|
||||
print_discrepencies(expected_file_names, retrieved_file_names)
|
||||
filtered_retrieved_file_names = filter_invalid_prefixes(retrieved_file_names)
|
||||
print_discrepencies(expected_file_names, filtered_retrieved_file_names)
|
||||
|
||||
assert expected_file_names == retrieved_file_names
|
||||
assert expected_file_names == filtered_retrieved_file_names
|
||||
|
||||
|
||||
# This function is supposed to map to the group_sync.py file for the google drive connector
|
||||
@ -172,8 +174,9 @@ def test_all_permissions(
|
||||
}
|
||||
|
||||
# Should get everything
|
||||
print_discrepencies(expected_file_names, found_file_names)
|
||||
assert expected_file_names == found_file_names
|
||||
filtered_retrieved_file_names = filter_invalid_prefixes(found_file_names)
|
||||
print_discrepencies(expected_file_names, filtered_retrieved_file_names)
|
||||
assert expected_file_names == filtered_retrieved_file_names
|
||||
|
||||
group_map = get_group_map(google_drive_connector)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user