AstroResearch/src/clients/ads.rs
Asfmq cd6af4f995 feat: 重构 PDF/文献检索同步机制、升级引力图交互与控制台 UI 样式
- [后端/PDF解析] 重构 MinerU PDF 解析流程:引入预签名两阶段直传机制,解决大文件 API 传输限制问题;支持轮询机制与本地 images 备用目录存储。
- [后端/同步与下载] 新增经典 ADS SCAN 扫描件 PDF 和 ADS_PDF 直接通道的下载逻辑;新增常用同步检索配置的持久化存储与去重管理 API。
- [后端/日志] 重构日志系统,支持控制台 pretty 输出与每日滚动文件日志(使用上海 +08:00 时区),引入 HTTP 路由请求链路追踪。
- [前端/引力图] 升级引用星系图 canvas 交互:支持平移拖拽与滚轮缩放,添加引力圈轨道装饰及未导入文献的半透明视觉区分。
- [前端/控制台] 统一重构为扁平高对比度浅色纯中文控制台样式;重新设计文献详情弹窗与状态进度条。
- [数据库] 新增 papers 表的 doctype 字段及 sync_queries 检索配置表。
2026-06-10 17:29:07 +08:00

221 lines
7.1 KiB
Rust

// src/ads.rs
use serde::{Deserialize, Serialize};
use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION, CONTENT_TYPE};
use tracing::{info, error};
// 原始 ADS API 返回的数据文档结构
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AdsPaperDoc {
pub bibcode: String,
pub title: Option<Vec<String>>,
pub author: Option<Vec<String>>,
pub year: Option<String>,
#[serde(rename = "pub")]
pub pub_journal: Option<String>,
pub keyword: Option<Vec<String>>,
pub abstract_text: Option<String>,
pub doi: Option<Vec<String>>,
pub citation_count: Option<i32>,
pub reference_count: Option<i32>,
pub reference: Option<Vec<String>>,
pub citation: Option<Vec<String>>,
pub identifier: Option<Vec<String>>,
pub doctype: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct AdsResponseDocs {
pub docs: Vec<AdsPaperDoc>,
}
#[derive(Debug, Deserialize)]
pub struct AdsSearchResponse {
pub response: AdsResponseDocs,
}
#[derive(Debug, Deserialize)]
pub struct AdsExportResponse {
pub export: String,
}
// ADS API 服务客户端
#[derive(Clone)]
pub struct AdsClient {
api_key: String,
client: reqwest::Client,
}
impl AdsClient {
pub fn new(api_key: String) -> Self {
AdsClient {
api_key,
client: reqwest::Client::new(),
}
}
// 拼装鉴权 Header
fn headers(&self) -> HeaderMap {
let mut headers = HeaderMap::new();
headers.insert(
AUTHORIZATION,
HeaderValue::from_str(&format!("Bearer {}", self.api_key)).unwrap_or_else(|_| HeaderValue::from_static("")),
);
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
headers
}
// 调用 ADS 检索接口获取文献元数据列表,支持分页与排序
pub async fn search(&self, query: &str, start: i32, rows: i32, sort: &str) -> anyhow::Result<Vec<AdsPaperDoc>> {
let url = "https://api.adsabs.harvard.edu/v1/search/query";
let translated = crate::services::query_parser::to_ads_query(query);
// fl 声明返回字段,包括 reference 和 citation 引用关系数组及 identifier 和 doctype
let fl = "bibcode,title,author,year,pub,keyword,abstract,doi,citation_count,reference_count,reference,citation,identifier,doctype";
let ads_sort = match sort {
"date_desc" => "date desc",
"date_asc" => "date asc",
"citations_desc" => "citation_count desc",
_ => "score desc",
};
info!("正在发送检索请求到 ADS 平台: 原始词='{}', 翻译词='{}', 起始={}, 数量={}, 排序='{}'", query, translated, start, rows, ads_sort);
let start_str = start.to_string();
let rows_str = rows.to_string();
let response = self.client
.get(url)
.headers(self.headers())
.query(&[
("q", translated.as_str()),
("start", start_str.as_str()),
("rows", rows_str.as_str()),
("fl", fl),
("sort", ads_sort),
])
.send()
.await?;
if !response.status().is_success() {
let status = response.status();
let err_body = response.text().await.unwrap_or_default();
error!("ADS 检索请求失败: 状态码={}, 返回错误={}", status, err_body);
return Err(anyhow::anyhow!("ADS API 接口返回错误码: {}", status));
}
let raw_res: RawSearchResponse = response.json().await?;
let docs = raw_res.response.docs.into_iter().map(|d| {
AdsPaperDoc {
bibcode: d.bibcode,
title: d.title,
author: d.author,
year: d.year,
pub_journal: d.pub_journal,
keyword: d.keyword,
abstract_text: d.abstract_field,
doi: d.doi,
citation_count: d.citation_count,
reference_count: d.reference_count,
reference: d.reference,
citation: d.citation,
identifier: d.identifier,
doctype: d.doctype,
}
}).collect();
Ok(docs)
}
// 调用 ADS Export 接口导出 BibTeX 文本内容
pub async fn export_bibtex(&self, bibcodes: Vec<String>) -> anyhow::Result<String> {
let url = "https://api.adsabs.harvard.edu/v1/export/bibtex";
info!("正在向 ADS 请求导出 {} 篇文献的 BibTeX 数据", bibcodes.len());
let payload = serde_json::json!({
"bibcode": bibcodes
});
let response = self.client
.post(url)
.headers(self.headers())
.json(&payload)
.send()
.await?;
if !response.status().is_success() {
let status = response.status();
let err_body = response.text().await.unwrap_or_default();
error!("ADS 导出 BibTeX 失败: 状态码={}, 返回信息={}", status, err_body);
return Err(anyhow::anyhow!("ADS 导出接口返回错误码: {}", status));
}
let res_data: AdsExportResponse = response.json().await?;
Ok(res_data.export)
}
// 获取某个查询词在 ADS 的匹配文献总量
pub async fn get_total_count(&self, query: &str) -> anyhow::Result<i32> {
let url = "https://api.adsabs.harvard.edu/v1/search/query";
let translated = crate::services::query_parser::to_ads_query(query);
info!("正在向 ADS 查询匹配的总文献数, 原始词: '{}', 翻译词: '{}'", query, translated);
let response = self.client
.get(url)
.headers(self.headers())
.query(&[("q", translated.as_str()), ("rows", "0")])
.send()
.await?;
if !response.status().is_success() {
let status = response.status();
return Err(anyhow::anyhow!("ADS API 接口返回错误码: {}", status));
}
#[derive(Deserialize)]
struct SimpleResponse {
response: SimpleDocs,
}
#[derive(Deserialize)]
struct SimpleDocs {
#[serde(rename = "numFound")]
num_found: i32,
}
let raw: SimpleResponse = response.json().await?;
Ok(raw.response.num_found)
}
}
// 内部反序列化辅助结构,防止由于 abstract/pub 关键字冲突导致编译失败
#[derive(Debug, Deserialize)]
struct RawDoc {
bibcode: String,
title: Option<Vec<String>>,
author: Option<Vec<String>>,
year: Option<String>,
#[serde(rename = "pub")]
pub_journal: Option<String>,
keyword: Option<Vec<String>>,
#[serde(rename = "abstract")]
abstract_field: Option<String>,
doi: Option<Vec<String>>,
citation_count: Option<i32>,
reference_count: Option<i32>,
reference: Option<Vec<String>>,
citation: Option<Vec<String>>,
identifier: Option<Vec<String>>,
doctype: Option<String>,
}
#[derive(Debug, Deserialize)]
struct RawSearchResponse {
response: RawDocs,
}
#[derive(Debug, Deserialize)]
struct RawDocs {
docs: Vec<RawDoc>,
}