From cb7da6b1d5ddc4899325a37f5ceb9e9d85974551 Mon Sep 17 00:00:00 2001 From: Evan Lohn Date: Mon, 10 Mar 2025 12:21:24 -0700 Subject: [PATCH] better drive url cleaning --- backend/onyx/connectors/google_drive/connector.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/onyx/connectors/google_drive/connector.py b/backend/onyx/connectors/google_drive/connector.py index c3a085b06..68a586e27 100644 --- a/backend/onyx/connectors/google_drive/connector.py +++ b/backend/onyx/connectors/google_drive/connector.py @@ -4,6 +4,7 @@ from concurrent.futures import as_completed from concurrent.futures import ThreadPoolExecutor from functools import partial from typing import Any +from urllib.parse import urlparse from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore @@ -59,7 +60,7 @@ def _extract_str_list_from_comma_str(string: str | None) -> list[str]: def _extract_ids_from_urls(urls: list[str]) -> list[str]: - return [url.split("/")[-1] for url in urls] + return [urlparse(url).path.split("/")[-1].split("?")[0] for url in urls] def _convert_single_file(