mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-02 17:08:22 +02:00
better drive url cleaning
This commit is contained in:
parent
463340b8a1
commit
cb7da6b1d5
@ -4,6 +4,7 @@ from concurrent.futures import as_completed
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from functools import partial
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore
|
||||
from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore
|
||||
@ -59,7 +60,7 @@ def _extract_str_list_from_comma_str(string: str | None) -> list[str]:
|
||||
|
||||
|
||||
def _extract_ids_from_urls(urls: list[str]) -> list[str]:
|
||||
return [url.split("/")[-1] for url in urls]
|
||||
return [urlparse(url).path.split("/")[-1].split("?")[0] for url in urls]
|
||||
|
||||
|
||||
def _convert_single_file(
|
||||
|
Loading…
x
Reference in New Issue
Block a user