refac: web search

This commit is contained in:
Timothy J. Baek 2024-06-01 19:57:00 -07:00
parent 999d2bc21b
commit fbdfb7e4fa
7 changed files with 36 additions and 20 deletions

View File

@ -739,7 +739,11 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
# TODO: add playwright to search the web
if engine == "searxng":
if app.state.config.SEARXNG_QUERY_URL:
return search_searxng(app.state.config.SEARXNG_QUERY_URL, query)
return search_searxng(
app.state.config.SEARXNG_QUERY_URL,
query,
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
)
else:
raise Exception("No SEARXNG_QUERY_URL found in environment variables")
elif engine == "google_pse":
@ -751,6 +755,7 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
app.state.config.GOOGLE_PSE_API_KEY,
app.state.config.GOOGLE_PSE_ENGINE_ID,
query,
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
)
else:
raise Exception(
@ -758,7 +763,11 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
)
elif engine == "brave":
if app.state.config.BRAVE_SEARCH_API_KEY:
return search_brave(app.state.config.BRAVE_SEARCH_API_KEY, query)
return search_brave(
app.state.config.BRAVE_SEARCH_API_KEY,
query,
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
)
else:
raise Exception("No BRAVE_SEARCH_API_KEY found in environment variables")
elif engine == "serpstack":
@ -766,13 +775,18 @@ def search_web(engine: str, query: str) -> list[SearchResult]:
return search_serpstack(
app.state.config.SERPSTACK_API_KEY,
query,
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
https_enabled=app.state.config.SERPSTACK_HTTPS,
)
else:
raise Exception("No SERPSTACK_API_KEY found in environment variables")
elif engine == "serper":
if app.state.config.SERPER_API_KEY:
return search_serper(app.state.config.SERPER_API_KEY, query)
return search_serper(
app.state.config.SERPER_API_KEY,
query,
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
)
else:
raise Exception("No SERPER_API_KEY found in environment variables")
else:

View File

@ -3,13 +3,13 @@ import logging
import requests
from apps.rag.search.main import SearchResult
from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
from config import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
def search_brave(api_key: str, query: str) -> list[SearchResult]:
def search_brave(api_key: str, query: str, count: int) -> list[SearchResult]:
"""Search using Brave's Search API and return the results as a list of SearchResult objects.
Args:
@ -22,7 +22,7 @@ def search_brave(api_key: str, query: str) -> list[SearchResult]:
"Accept-Encoding": "gzip",
"X-Subscription-Token": api_key,
}
params = {"q": query, "count": RAG_WEB_SEARCH_RESULT_COUNT}
params = {"q": query, "count": count}
response = requests.get(url, headers=headers, params=params)
response.raise_for_status()
@ -33,5 +33,5 @@ def search_brave(api_key: str, query: str) -> list[SearchResult]:
SearchResult(
link=result["url"], title=result.get("title"), snippet=result.get("snippet")
)
for result in results[:RAG_WEB_SEARCH_RESULT_COUNT]
for result in results[:count]
]

View File

@ -4,14 +4,14 @@ import logging
import requests
from apps.rag.search.main import SearchResult
from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
from config import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
def search_google_pse(
api_key: str, search_engine_id: str, query: str
api_key: str, search_engine_id: str, query: str, count: int
) -> list[SearchResult]:
"""Search using Google's Programmable Search Engine API and return the results as a list of SearchResult objects.
@ -27,7 +27,7 @@ def search_google_pse(
"cx": search_engine_id,
"q": query,
"key": api_key,
"num": RAG_WEB_SEARCH_RESULT_COUNT,
"num": count,
}
response = requests.request("GET", url, headers=headers, params=params)

View File

@ -3,13 +3,13 @@ import logging
import requests
from apps.rag.search.main import SearchResult
from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
from config import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
def search_searxng(query_url: str, query: str) -> list[SearchResult]:
def search_searxng(query_url: str, query: str, count: int) -> list[SearchResult]:
"""Search a SearXNG instance for a query and return the results as a list of SearchResult objects.
Args:
@ -40,5 +40,5 @@ def search_searxng(query_url: str, query: str) -> list[SearchResult]:
SearchResult(
link=result["url"], title=result.get("title"), snippet=result.get("content")
)
for result in sorted_results[:RAG_WEB_SEARCH_RESULT_COUNT]
for result in sorted_results[:count]
]

View File

@ -4,13 +4,13 @@ import logging
import requests
from apps.rag.search.main import SearchResult
from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
from config import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
def search_serper(api_key: str, query: str) -> list[SearchResult]:
def search_serper(api_key: str, query: str, count: int) -> list[SearchResult]:
"""Search using serper.dev's API and return the results as a list of SearchResult objects.
Args:
@ -35,5 +35,5 @@ def search_serper(api_key: str, query: str) -> list[SearchResult]:
title=result.get("title"),
snippet=result.get("description"),
)
for result in results[:RAG_WEB_SEARCH_RESULT_COUNT]
for result in results[:count]
]

View File

@ -4,14 +4,14 @@ import logging
import requests
from apps.rag.search.main import SearchResult
from config import SRC_LOG_LEVELS, RAG_WEB_SEARCH_RESULT_COUNT
from config import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
def search_serpstack(
api_key: str, query: str, https_enabled: bool = True
api_key: str, query: str, count: int, https_enabled: bool = True
) -> list[SearchResult]:
"""Search using serpstack.com's and return the results as a list of SearchResult objects.
@ -39,5 +39,5 @@ def search_serpstack(
SearchResult(
link=result["url"], title=result.get("title"), snippet=result.get("snippet")
)
for result in results[:RAG_WEB_SEARCH_RESULT_COUNT]
for result in results[:count]
]

View File

@ -35,7 +35,9 @@
? ''
: 'border-b border-gray-300/30 dark:border-gray-700/50'} group/item justify-between font-normal text-gray-800 dark:text-gray-300"
>
{url}
<div class=" line-clamp-1">
{url}
</div>
<div
class=" ml-1 text-white dark:text-gray-900 group-hover/item:text-gray-600 dark:group-hover/item:text-white transition"