feat: created new graph-based deep-research agent
This commit is contained in:
@@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Check that the sources cited in the research report are reachable.
|
||||
|
||||
Scans the final report for URLs and DOIs, probes each with a HEAD
|
||||
request, and writes a `source_check` summary into state so the human
|
||||
reviewer sees broken citations at the approval step.
|
||||
|
||||
Times out per request so a slow source cannot stall the graph.
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
DOI_RE = re.compile(r"\b(10\.\d{4,9}/[-._;()/:A-Z0-9]+)", re.IGNORECASE)
|
||||
URL_RE = re.compile(r"https?://[^\s)\]\}\"'>]+")
|
||||
|
||||
|
||||
def load_state():
|
||||
path = os.environ.get("GRAPH_STATE_FILE")
|
||||
if path:
|
||||
with open(path) as f:
|
||||
return json.load(f)
|
||||
return json.loads(os.environ.get("GRAPH_STATE", "{}"))
|
||||
|
||||
|
||||
def reachable(url, timeout=5.0):
|
||||
req = urllib.request.Request(url, method="HEAD")
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return 200 <= resp.status < 400
|
||||
except urllib.error.HTTPError as e:
|
||||
return 200 <= e.code < 400
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
state = load_state()
|
||||
report = state.get("report") or ""
|
||||
|
||||
urls = sorted({u.rstrip(".,;)") for u in URL_RE.findall(report)})
|
||||
dois = sorted(set(DOI_RE.findall(report)))
|
||||
|
||||
results = []
|
||||
for url in urls:
|
||||
ok = reachable(url)
|
||||
results.append(f" {'OK' if ok else 'UNREACHABLE'} {url}")
|
||||
for doi in dois:
|
||||
url = f"https://doi.org/{doi}"
|
||||
if url in urls:
|
||||
continue
|
||||
ok = reachable(url)
|
||||
results.append(f" {'OK' if ok else 'UNREACHABLE'} DOI {doi} ({url})")
|
||||
|
||||
if not results:
|
||||
summary = "No web sources were cited in the report."
|
||||
else:
|
||||
summary = (
|
||||
f"Source reachability ({len(results)} checked):\n"
|
||||
+ "\n".join(results)
|
||||
)
|
||||
|
||||
print(json.dumps({"source_check": summary}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user