Article/skills/ads_metadata_search/scripts/search.py
Asfmq dfd0a980a5 feat: 重写 ADS 搜索脚本为 REST API,新增 Obsidian 转换 skill,修复路径
- ads_metadata_search: 移除 ads 库依赖,改用 requests 直连 ADS REST API;
  移除硬编码 API Key,改为 .env 文件/环境变量加载
- 新增 ads_html_to_obsidian skill:将下载的 HTML 文献批量转换为
  Obsidian Markdown 笔记(BS4 提取正文 + Pandoc 转换 + 清洗后处理)
- 两个 SKILL.md 中的 Windows 绝对路径改为相对路径
2026-05-26 17:30:36 +08:00

96 lines
3.4 KiB
Python

import json
import argparse
import os
import sys
import requests
# Load .env from project root if ADS_API_KEY not already set
def _load_token():
token = os.environ.get("ADS_API_KEY", "")
if token and token != "your_api_key_here":
return token
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
env_path = os.path.join(project_root, ".env")
if os.path.isfile(env_path):
with open(env_path, encoding="utf-8") as f:
for line in f:
line = line.strip()
if line and not line.startswith("#") and "=" in line:
k, _, v = line.partition("=")
k, v = k.strip(), v.strip()
if k == "ADS_API_KEY" and v and v != "your_api_key_here":
return v
return ""
ADS_API_URL = "https://api.adsabs.harvard.edu/v1/search/query"
def main():
parser = argparse.ArgumentParser(description="Search ADS and return metadata")
parser.add_argument("--query", required=True, help="ADS Search Query")
parser.add_argument("--output", required=True, help="Output JSON file path")
parser.add_argument("--rows", type=int, default=10, help="Number of rows to return")
parser.add_argument("--year_range", help="Year range to filter, e.g. 2018-2023 or 2020")
args = parser.parse_args()
token = _load_token()
if not token:
print("Error: ADS_API_KEY not configured. Edit .env in project root or set env var.")
sys.exit(1)
q = args.query
if args.year_range:
if "-" in args.year_range:
start, end = args.year_range.split("-", 1)
q += f" year:[{start} TO {end}]"
else:
q += f" year:{args.year_range}"
print(f"Searching ADS for query: {q}")
params = {
"q": q,
"rows": args.rows,
"fl": "bibcode,title,author,year,abstract,citation_count,reference_count,pub,doi",
}
headers = {"Authorization": f"Bearer {token}"}
try:
resp = requests.get(ADS_API_URL, params=params, headers=headers, timeout=30)
resp.raise_for_status()
data = resp.json()
except Exception as e:
print(f"Query Failed: {e}")
sys.exit(1)
docs = data.get("response", {}).get("docs", [])
results = []
for d in docs:
title_list = d.get("title", [])
doi_list = d.get("doi", [])
results.append({
"bibcode": d.get("bibcode", ""),
"title": title_list[0] if title_list else "",
"author": d.get("author", []),
"year": d.get("year", ""),
"abstract": d.get("abstract", ""),
"citation_count": d.get("citation_count", 0),
"reference_count": d.get("reference_count", 0),
"pub": d.get("pub", ""),
"doi": doi_list[0] if doi_list else "",
})
with open(args.output, "w", encoding="utf-8") as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"Found {len(results)} papers. Saved metadata to {args.output}.")
for i, r in enumerate(results[:5]):
print(f"\n[{i+1}] {r['title']} ({r['year']})")
print(f" Bibcode: {r['bibcode']} | Citations: {r['citation_count']}")
authors = ", ".join(r['author'][:3]) + (" et al." if len(r['author']) > 3 else "")
print(f" Authors: {authors}")
if __name__ == "__main__":
main()