- ads_metadata_search: 移除 ads 库依赖,改用 requests 直连 ADS REST API; 移除硬编码 API Key,改为 .env 文件/环境变量加载 - 新增 ads_html_to_obsidian skill:将下载的 HTML 文献批量转换为 Obsidian Markdown 笔记(BS4 提取正文 + Pandoc 转换 + 清洗后处理) - 两个 SKILL.md 中的 Windows 绝对路径改为相对路径
96 lines
3.4 KiB
Python
96 lines
3.4 KiB
Python
import json
|
|
import argparse
|
|
import os
|
|
import sys
|
|
|
|
import requests
|
|
|
|
# Load .env from project root if ADS_API_KEY not already set
|
|
def _load_token():
|
|
token = os.environ.get("ADS_API_KEY", "")
|
|
if token and token != "your_api_key_here":
|
|
return token
|
|
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
|
|
env_path = os.path.join(project_root, ".env")
|
|
if os.path.isfile(env_path):
|
|
with open(env_path, encoding="utf-8") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line and not line.startswith("#") and "=" in line:
|
|
k, _, v = line.partition("=")
|
|
k, v = k.strip(), v.strip()
|
|
if k == "ADS_API_KEY" and v and v != "your_api_key_here":
|
|
return v
|
|
return ""
|
|
|
|
ADS_API_URL = "https://api.adsabs.harvard.edu/v1/search/query"
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Search ADS and return metadata")
|
|
parser.add_argument("--query", required=True, help="ADS Search Query")
|
|
parser.add_argument("--output", required=True, help="Output JSON file path")
|
|
parser.add_argument("--rows", type=int, default=10, help="Number of rows to return")
|
|
parser.add_argument("--year_range", help="Year range to filter, e.g. 2018-2023 or 2020")
|
|
args = parser.parse_args()
|
|
|
|
token = _load_token()
|
|
if not token:
|
|
print("Error: ADS_API_KEY not configured. Edit .env in project root or set env var.")
|
|
sys.exit(1)
|
|
|
|
q = args.query
|
|
if args.year_range:
|
|
if "-" in args.year_range:
|
|
start, end = args.year_range.split("-", 1)
|
|
q += f" year:[{start} TO {end}]"
|
|
else:
|
|
q += f" year:{args.year_range}"
|
|
|
|
print(f"Searching ADS for query: {q}")
|
|
|
|
params = {
|
|
"q": q,
|
|
"rows": args.rows,
|
|
"fl": "bibcode,title,author,year,abstract,citation_count,reference_count,pub,doi",
|
|
}
|
|
headers = {"Authorization": f"Bearer {token}"}
|
|
|
|
try:
|
|
resp = requests.get(ADS_API_URL, params=params, headers=headers, timeout=30)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
except Exception as e:
|
|
print(f"Query Failed: {e}")
|
|
sys.exit(1)
|
|
|
|
docs = data.get("response", {}).get("docs", [])
|
|
results = []
|
|
for d in docs:
|
|
title_list = d.get("title", [])
|
|
doi_list = d.get("doi", [])
|
|
results.append({
|
|
"bibcode": d.get("bibcode", ""),
|
|
"title": title_list[0] if title_list else "",
|
|
"author": d.get("author", []),
|
|
"year": d.get("year", ""),
|
|
"abstract": d.get("abstract", ""),
|
|
"citation_count": d.get("citation_count", 0),
|
|
"reference_count": d.get("reference_count", 0),
|
|
"pub": d.get("pub", ""),
|
|
"doi": doi_list[0] if doi_list else "",
|
|
})
|
|
|
|
with open(args.output, "w", encoding="utf-8") as f:
|
|
json.dump(results, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"Found {len(results)} papers. Saved metadata to {args.output}.")
|
|
|
|
for i, r in enumerate(results[:5]):
|
|
print(f"\n[{i+1}] {r['title']} ({r['year']})")
|
|
print(f" Bibcode: {r['bibcode']} | Citations: {r['citation_count']}")
|
|
authors = ", ".join(r['author'][:3]) + (" et al." if len(r['author']) > 3 else "")
|
|
print(f" Authors: {authors}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|