import json import argparse import os import sys import requests # Load .env from project root if ADS_API_KEY not already set def _load_token(): token = os.environ.get("ADS_API_KEY", "") if token and token != "your_api_key_here": return token project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) env_path = os.path.join(project_root, ".env") if os.path.isfile(env_path): with open(env_path, encoding="utf-8") as f: for line in f: line = line.strip() if line and not line.startswith("#") and "=" in line: k, _, v = line.partition("=") k, v = k.strip(), v.strip() if k == "ADS_API_KEY" and v and v != "your_api_key_here": return v return "" ADS_API_URL = "https://api.adsabs.harvard.edu/v1/search/query" def main(): parser = argparse.ArgumentParser(description="Search ADS and return metadata") parser.add_argument("--query", required=True, help="ADS Search Query") parser.add_argument("--output", required=True, help="Output JSON file path") parser.add_argument("--rows", type=int, default=10, help="Number of rows to return") parser.add_argument("--year_range", help="Year range to filter, e.g. 2018-2023 or 2020") args = parser.parse_args() token = _load_token() if not token: print("Error: ADS_API_KEY not configured. Edit .env in project root or set env var.") sys.exit(1) q = args.query if args.year_range: if "-" in args.year_range: start, end = args.year_range.split("-", 1) q += f" year:[{start} TO {end}]" else: q += f" year:{args.year_range}" print(f"Searching ADS for query: {q}") params = { "q": q, "rows": args.rows, "fl": "bibcode,title,author,year,abstract,citation_count,reference_count,pub,doi", } headers = {"Authorization": f"Bearer {token}"} try: resp = requests.get(ADS_API_URL, params=params, headers=headers, timeout=30) resp.raise_for_status() data = resp.json() except Exception as e: print(f"Query Failed: {e}") sys.exit(1) docs = data.get("response", {}).get("docs", []) results = [] for d in docs: title_list = d.get("title", []) doi_list = d.get("doi", []) results.append({ "bibcode": d.get("bibcode", ""), "title": title_list[0] if title_list else "", "author": d.get("author", []), "year": d.get("year", ""), "abstract": d.get("abstract", ""), "citation_count": d.get("citation_count", 0), "reference_count": d.get("reference_count", 0), "pub": d.get("pub", ""), "doi": doi_list[0] if doi_list else "", }) with open(args.output, "w", encoding="utf-8") as f: json.dump(results, f, ensure_ascii=False, indent=2) print(f"Found {len(results)} papers. Saved metadata to {args.output}.") for i, r in enumerate(results[:5]): print(f"\n[{i+1}] {r['title']} ({r['year']})") print(f" Bibcode: {r['bibcode']} | Citations: {r['citation_count']}") authors = ", ".join(r['author'][:3]) + (" et al." if len(r['author']) > 3 else "") print(f" Authors: {authors}") if __name__ == "__main__": main()