SpectraRust/extract_fortran.py
2026-03-19 14:05:33 +08:00

303 lines
10 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
提取 synspec54.f 中的各个子程序/函数到独立文件
"""
import re
import os
import sys
from pathlib import Path
def extract_units(source_file, output_dir):
"""提取 Fortran 程序单元到独立文件"""
with open(source_file, 'r') as f:
content = f.read()
lines = content.split('\n')
# 创建输出目录
os.makedirs(output_dir, exist_ok=True)
# 匹配程序单元开始的正则表达式
# 注意: BLOCK DATA 和 PROGRAM 可以是无名的
# 使用 \s* 允许名称前没有空格(无名情况)
unit_pattern = re.compile(
r'^\s*('
r'SUBROUTINE\s+(\w+)|'
r'FUNCTION\s+(\w+)|'
r'PROGRAM\s*(\w*)|'
r'BLOCK\s+DATA\s*(\w*)'
r')',
re.IGNORECASE
)
# 找到所有单元的起始位置
units = []
for i, line in enumerate(lines):
match = unit_pattern.match(line)
if match:
groups = match.groups()
# groups: (整体匹配, SUBROUTINE名, FUNCTION名, PROGRAM名, BLOCK DATA名)
if groups[1]: # SUBROUTINE
name, unit_type = groups[1], 'SUBROUTINE'
elif groups[2]: # FUNCTION
name, unit_type = groups[2], 'FUNCTION'
elif groups[3]: # PROGRAM (非空)
name, unit_type = groups[3], 'PROGRAM'
elif groups[3] is not None: # PROGRAM (空字符串,无名)
name, unit_type = None, 'PROGRAM'
elif groups[4]: # BLOCK DATA (非空)
name, unit_type = groups[4], 'BLOCK DATA'
elif groups[4] is not None: # BLOCK DATA (空字符串,无名)
name, unit_type = None, 'BLOCK DATA'
else:
name, unit_type = None, 'UNKNOWN'
# 处理无名单元
if not name:
name = f"_UNNAMED_{unit_type.replace(' ', '_')}_"
units.append((i, name.upper(), unit_type))
print(f"找到 {len(units)} 个程序单元")
# 提取每个单元
extracted = []
for idx, (start_line, name, unit_type) in enumerate(units):
# 确定结束位置
if idx + 1 < len(units):
end_line = units[idx + 1][0]
else:
end_line = len(lines)
# 提取单元内容
unit_lines = lines[start_line:end_line]
# 查找实际的 END 语句
actual_end = end_line
for i in range(len(unit_lines) - 1, -1, -1):
if re.match(r'^\s*END\s*$', unit_lines[i], re.IGNORECASE):
actual_end = start_line + i + 1
break
unit_content = '\n'.join(lines[start_line:actual_end])
# 写入文件
filename = f"{name.lower()}.f"
filepath = os.path.join(output_dir, filename)
with open(filepath, 'w') as f:
f.write(unit_content)
if not unit_content.endswith('\n'):
f.write('\n')
extracted.append({
'name': name,
'type': unit_type,
'file': filename,
'start': start_line + 1,
'end': actual_end,
'lines': actual_end - start_line
})
print(f" 提取: {name} ({unit_type}) -> {filename} ({actual_end - start_line} 行)")
# 生成摘要文件
summary_path = os.path.join(output_dir, '_SUMMARY.txt')
with open(summary_path, 'w') as f:
f.write(f"SYNSPEC54.F 提取摘要\n")
f.write(f"{'='*60}\n\n")
f.write(f"源文件: {source_file}\n")
f.write(f"总单元数: {len(extracted)}\n")
f.write(f"总行数: {len(lines)}\n\n")
f.write(f"{'名称':<20} {'类型':<12} {'文件':<20} {'行数':>8}\n")
f.write(f"{'-'*60}\n")
for unit in extracted:
f.write(f"{unit['name']:<20} {unit['type']:<12} {unit['file']:<20} {unit['lines']:>8}\n")
# 按类型统计
types = {}
for unit in extracted:
types[unit['type']] = types.get(unit['type'], 0) + 1
f.write(f"\n按类型统计:\n")
for t, c in types.items():
f.write(f" {t}: {c}\n")
print(f"\n摘要已保存到: {summary_path}")
return extracted
def analyze_commons(output_dir):
"""分析 COMMON 块依赖"""
# 命名COMMON块: COMMON /NAME/ ...
named_common_pattern = re.compile(r'COMMON\s*/\s*(\w+)\s*/', re.IGNORECASE)
# 空白COMMON块: COMMON varname (不带斜杠)
blank_common_pattern = re.compile(r'^\s*COMMON\s+[A-Z]', re.IGNORECASE | re.MULTILINE)
include_pattern = re.compile(r'INCLUDE\s*[\'"]([^\'"]+)[\'"]', re.IGNORECASE)
commons = {}
includes = {}
for filepath in Path(output_dir).glob('*.f'):
if filepath.name.startswith('_'):
continue
with open(filepath, 'r') as f:
content = f.read()
unit_name = filepath.stem.upper()
found_commons = named_common_pattern.findall(content)
found_includes = include_pattern.findall(content)
# 检查空白COMMON块
if blank_common_pattern.search(content):
found_commons.append('BLANK') # 添加空白COMMON块标识
if found_commons:
commons[unit_name] = list(set(found_commons))
if found_includes:
includes[unit_name] = list(set(found_includes))
# 写入 COMMON 分析
common_path = os.path.join(output_dir, '_COMMON_ANALYSIS.txt')
with open(common_path, 'w') as f:
f.write("COMMON 块依赖分析\n")
f.write(f"{'='*60}\n\n")
f.write("有 COMMON 依赖的单元:\n")
f.write(f"{'-'*60}\n")
for unit, common_list in sorted(commons.items()):
f.write(f"{unit}: {', '.join(common_list)}\n")
f.write(f"\n{len(commons)} 个单元有 COMMON 依赖\n")
f.write(f"{len([u for u in commons.values()])} 个 COMMON 块被引用\n")
# 找出所有唯一的 COMMON 块
all_commons = set()
for c in commons.values():
all_commons.update(c)
f.write(f"\n唯一的 COMMON 块: {sorted(all_commons)}\n")
f.write(f"\n\nINCLUDE 文件依赖:\n")
f.write(f"{'-'*60}\n")
for unit, inc_list in sorted(includes.items()):
f.write(f"{unit}: {', '.join(inc_list)}\n")
print(f"COMMON 分析已保存到: {common_path}")
# 返回无 COMMON 依赖的纯函数
pure_units = []
for filepath in Path(output_dir).glob('*.f'):
if filepath.name.startswith('_'):
continue
unit_name = filepath.stem.upper()
if unit_name not in commons:
pure_units.append(unit_name)
return pure_units, commons, includes
def generate_makefile(output_dir, extracted, source_file):
"""生成 Makefile 用于编译所有提取的文件"""
# 根据源文件名确定程序名称
source_name = os.path.basename(source_file).lower()
if 'tlusty' in source_name:
prog_name = 'tlusty'
elif 'synspec' in source_name:
prog_name = 'synspec'
else:
prog_name = os.path.splitext(os.path.basename(source_file))[0].lower()
makefile_path = os.path.join(output_dir, 'Makefile')
with open(makefile_path, 'w') as f:
f.write(f"# Makefile for {prog_name.upper()} extracted modules\n")
f.write("# 使用大内存模型支持大型 COMMON 数组\n\n")
f.write("FC = gfortran\n")
f.write("FFLAGS = -O3 -fno-automatic -mcmodel=large\n\n")
f.write("# 编译输出目录\n")
f.write("BUILD_DIR = build\n\n")
f.write("# 目标可执行文件\n")
f.write(f"MAIN = $(BUILD_DIR)/{prog_name}_extracted\n\n")
f.write("# 所有 .f 源文件\n")
f.write("SRCS = $(wildcard *.f)\n\n")
f.write("# 目标文件放在build目录\n")
f.write("OBJS = $(patsubst %.f,$(BUILD_DIR)/%.o,$(notdir $(SRCS)))\n\n")
f.write("# 默认目标\n")
f.write("all: $(BUILD_DIR) $(MAIN)\n")
f.write("\t@echo \"==========================================\"\n")
f.write("\t@echo \"编译成功: $(MAIN)\"\n")
f.write("\t@echo \"==========================================\"\n\n")
f.write("# 创建build目录\n")
f.write("$(BUILD_DIR):\n")
f.write("\tmkdir -p $(BUILD_DIR)\n\n")
f.write("# 链接所有目标文件\n")
f.write("$(MAIN): $(OBJS)\n")
f.write("\t$(FC) $(FFLAGS) -o $@ $(OBJS)\n\n")
f.write("# 编译规则\n")
f.write("$(BUILD_DIR)/%.o: %.f | $(BUILD_DIR)\n")
f.write("\t$(FC) $(FFLAGS) -c $< -o $@\n\n")
f.write("# 清理\n")
f.write("clean:\n")
f.write("\trm -rf $(BUILD_DIR)\n\n")
f.write("# 只编译不链接(检查语法)\n")
f.write("compile-only: $(OBJS)\n")
f.write("\t@echo \"所有文件编译完成(未链接)\"\n\n")
f.write("# 统计信息\n")
f.write("stats:\n")
f.write("\t@echo \"=== 编译统计 ===\"\n")
f.write("\t@echo \"源文件数: $(words $(SRCS))\"\n")
f.write("\t@echo \"目标文件数: $(words $(OBJS))\"\n")
f.write("\t@wc -l *.f | tail -1\n\n")
f.write(".PHONY: all clean compile-only stats\n")
print(f"Makefile 已生成: {makefile_path}")
def main():
if len(sys.argv) < 2:
source_file = "/home/fmq/program/tlusty/tl208-s54/rust/synspec/synspec54.f"
output_dir = "/home/fmq/program/tlusty/tl208-s54/rust/synspec/extracted"
else:
source_file = sys.argv[1]
output_dir = sys.argv[2] if len(sys.argv) > 2 else "extracted"
print(f"源文件: {source_file}")
print(f"输出目录: {output_dir}\n")
# 提取单元
extracted = extract_units(source_file, output_dir)
# 分析 COMMON 依赖
print("\n分析 COMMON 依赖...")
pure_units, commons, includes = analyze_commons(output_dir)
print(f"\n无 COMMON 依赖的纯函数/子程序: {len(pure_units)}")
for u in sorted(pure_units):
print(f" {u}")
# 生成 Makefile
generate_makefile(output_dir, extracted, source_file)
# 保存纯函数列表
pure_path = os.path.join(output_dir, '_PURE_UNITS.txt')
with open(pure_path, 'w') as f:
f.write("无 COMMON 依赖的纯函数/子程序\n")
f.write(f"{'='*40}\n\n")
for u in sorted(pure_units):
f.write(f"{u}\n")
print(f"\n纯函数列表已保存到: {pure_path}")
if __name__ == '__main__':
main()