mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-28 04:49:21 +02:00
Address issue with links for Google Sites connector
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from typing import cast
|
from typing import cast
|
||||||
@@ -29,7 +30,9 @@ def process_link(element: BeautifulSoup | Tag) -> str:
|
|||||||
href = urllib.parse.unquote(href)
|
href = urllib.parse.unquote(href)
|
||||||
href = href.rstrip(".html").lower()
|
href = href.rstrip(".html").lower()
|
||||||
href = href.replace("_", "")
|
href = href.replace("_", "")
|
||||||
href = href.replace(" ", "-")
|
href = re.sub(
|
||||||
|
r"([\s-]+)", "-", href
|
||||||
|
) # replace all whitespace/'-' groups with a single '-'
|
||||||
|
|
||||||
return href
|
return href
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user