WIP, testing theory

This commit is contained in:
Evan Lohn 2025-03-13 14:03:45 -07:00
parent 2b38b0c53b
commit ebb92dc6d7
3 changed files with 13 additions and 9 deletions

View File

@ -122,7 +122,7 @@ def crawl_folders_for_files(
This function starts crawling from any folder. It is slower though.
"""
logger.info("Entered crawl_folders_for_files with parent_id: " + parent_id)
if parent_id not in traversed_parent_ids:
if True: # TODO: temporary for debugging
logger.info("Parent id not in traversed parent ids, getting files")
found_files = False
for file in _get_files_in_parent(

View File

@ -135,13 +135,15 @@ def filter_invalid_prefixes(names: set[str]) -> set[str]:
return {name for name in names if name.startswith(_VALID_PREFIX)}
def print_discrepencies(
def print_discrepancies(
expected: set[str],
retrieved: set[str],
) -> None:
if expected != retrieved:
print(expected)
print(retrieved)
expected_list = sorted(list(expected), key=lambda x: int(x.split("_")[-1]))
retrieved_list = sorted(list(retrieved), key=lambda x: int(x.split("_")[-1]))
print(expected_list)
print(retrieved_list)
print("Extra:")
print(retrieved - expected)
print("Missing:")
@ -166,6 +168,8 @@ def assert_retrieved_docs_match_expected(
_get_expected_file_content(file_id) for file_id in expected_file_ids
}
retrieved_docs.sort(key=lambda x: x.semantic_identifier)
for doc in retrieved_docs:
print(f"doc.semantic_identifier: {doc.semantic_identifier}")
@ -192,14 +196,14 @@ def assert_retrieved_docs_match_expected(
)
# Check file names
print_discrepencies(
print_discrepancies(
expected=expected_file_names,
retrieved=valid_retrieved_file_names,
)
assert expected_file_names == valid_retrieved_file_names
# Check file texts
print_discrepencies(
print_discrepancies(
expected=expected_file_texts,
retrieved=valid_retrieved_texts,
)

View File

@ -22,7 +22,7 @@ from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_1_FILE_I
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_1_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import FOLDER_2_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import print_discrepencies
from tests.daily.connectors.google_drive.consts_and_utils import print_discrepancies
from tests.daily.connectors.google_drive.consts_and_utils import PUBLIC_RANGE
from tests.daily.connectors.google_drive.consts_and_utils import SECTIONS_FILE_IDS
from tests.daily.connectors.google_drive.consts_and_utils import SHARED_DRIVE_1_FILE_IDS
@ -83,7 +83,7 @@ def assert_correct_access_for_user(
expected_file_names = {file_name_template.format(i) for i in all_accessible_ids}
filtered_retrieved_file_names = filter_invalid_prefixes(retrieved_file_names)
print_discrepencies(expected_file_names, filtered_retrieved_file_names)
print_discrepancies(expected_file_names, filtered_retrieved_file_names)
assert expected_file_names == filtered_retrieved_file_names
@ -175,7 +175,7 @@ def test_all_permissions(
# Should get everything
filtered_retrieved_file_names = filter_invalid_prefixes(found_file_names)
print_discrepencies(expected_file_names, filtered_retrieved_file_names)
print_discrepancies(expected_file_names, filtered_retrieved_file_names)
assert expected_file_names == filtered_retrieved_file_names
group_map = get_group_map(google_drive_connector)