only keep URLs as sources for which the content could actually be retrieved

This commit is contained in:
Jan Kessler 2025-04-06 20:31:12 +02:00
parent 193a927aba
commit a506a1a61e
No known key found for this signature in database
GPG Key ID: FCF0DCB4ADFC53E7

View File

@ -1478,6 +1478,7 @@ async def process_web_search(
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
)
docs = await loader.aload()
urls = [doc.metadata["source"] for doc in docs] # only keep URLs which could be retrieved
if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
return {