#!/usr/bin/env python3 """ 提取 synspec54.f 中的各个子程序/函数到独立文件 """ import re import os import sys from pathlib import Path def extract_units(source_file, output_dir): """提取 Fortran 程序单元到独立文件""" with open(source_file, 'r') as f: content = f.read() lines = content.split('\n') # 创建输出目录 os.makedirs(output_dir, exist_ok=True) # 匹配程序单元开始的正则表达式 # 注意: BLOCK DATA 和 PROGRAM 可以是无名的 # 使用 \s* 允许名称前没有空格(无名情况) unit_pattern = re.compile( r'^\s*(' r'SUBROUTINE\s+(\w+)|' r'FUNCTION\s+(\w+)|' r'PROGRAM\s*(\w*)|' r'BLOCK\s+DATA\s*(\w*)' r')', re.IGNORECASE ) # 找到所有单元的起始位置 units = [] for i, line in enumerate(lines): match = unit_pattern.match(line) if match: groups = match.groups() # groups: (整体匹配, SUBROUTINE名, FUNCTION名, PROGRAM名, BLOCK DATA名) if groups[1]: # SUBROUTINE name, unit_type = groups[1], 'SUBROUTINE' elif groups[2]: # FUNCTION name, unit_type = groups[2], 'FUNCTION' elif groups[3]: # PROGRAM (非空) name, unit_type = groups[3], 'PROGRAM' elif groups[3] is not None: # PROGRAM (空字符串,无名) name, unit_type = None, 'PROGRAM' elif groups[4]: # BLOCK DATA (非空) name, unit_type = groups[4], 'BLOCK DATA' elif groups[4] is not None: # BLOCK DATA (空字符串,无名) name, unit_type = None, 'BLOCK DATA' else: name, unit_type = None, 'UNKNOWN' # 处理无名单元 if not name: name = f"_UNNAMED_{unit_type.replace(' ', '_')}_" units.append((i, name.upper(), unit_type)) print(f"找到 {len(units)} 个程序单元") # 提取每个单元 extracted = [] for idx, (start_line, name, unit_type) in enumerate(units): # 确定结束位置 if idx + 1 < len(units): end_line = units[idx + 1][0] else: end_line = len(lines) # 提取单元内容 unit_lines = lines[start_line:end_line] # 查找实际的 END 语句 actual_end = end_line for i in range(len(unit_lines) - 1, -1, -1): if re.match(r'^\s*END\s*$', unit_lines[i], re.IGNORECASE): actual_end = start_line + i + 1 break unit_content = '\n'.join(lines[start_line:actual_end]) # 写入文件 filename = f"{name.lower()}.f" filepath = os.path.join(output_dir, filename) with open(filepath, 'w') as f: f.write(unit_content) if not unit_content.endswith('\n'): f.write('\n') extracted.append({ 'name': name, 'type': unit_type, 'file': filename, 'start': start_line + 1, 'end': actual_end, 'lines': actual_end - start_line }) print(f" 提取: {name} ({unit_type}) -> {filename} ({actual_end - start_line} 行)") # 生成摘要文件 summary_path = os.path.join(output_dir, '_SUMMARY.txt') with open(summary_path, 'w') as f: f.write(f"SYNSPEC54.F 提取摘要\n") f.write(f"{'='*60}\n\n") f.write(f"源文件: {source_file}\n") f.write(f"总单元数: {len(extracted)}\n") f.write(f"总行数: {len(lines)}\n\n") f.write(f"{'名称':<20} {'类型':<12} {'文件':<20} {'行数':>8}\n") f.write(f"{'-'*60}\n") for unit in extracted: f.write(f"{unit['name']:<20} {unit['type']:<12} {unit['file']:<20} {unit['lines']:>8}\n") # 按类型统计 types = {} for unit in extracted: types[unit['type']] = types.get(unit['type'], 0) + 1 f.write(f"\n按类型统计:\n") for t, c in types.items(): f.write(f" {t}: {c}\n") print(f"\n摘要已保存到: {summary_path}") return extracted def analyze_commons(output_dir): """分析 COMMON 块依赖""" # 命名COMMON块: COMMON /NAME/ ... named_common_pattern = re.compile(r'COMMON\s*/\s*(\w+)\s*/', re.IGNORECASE) # 空白COMMON块: COMMON varname (不带斜杠) blank_common_pattern = re.compile(r'^\s*COMMON\s+[A-Z]', re.IGNORECASE | re.MULTILINE) include_pattern = re.compile(r'INCLUDE\s*[\'"]([^\'"]+)[\'"]', re.IGNORECASE) commons = {} includes = {} for filepath in Path(output_dir).glob('*.f'): if filepath.name.startswith('_'): continue with open(filepath, 'r') as f: content = f.read() unit_name = filepath.stem.upper() found_commons = named_common_pattern.findall(content) found_includes = include_pattern.findall(content) # 检查空白COMMON块 if blank_common_pattern.search(content): found_commons.append('BLANK') # 添加空白COMMON块标识 if found_commons: commons[unit_name] = list(set(found_commons)) if found_includes: includes[unit_name] = list(set(found_includes)) # 写入 COMMON 分析 common_path = os.path.join(output_dir, '_COMMON_ANALYSIS.txt') with open(common_path, 'w') as f: f.write("COMMON 块依赖分析\n") f.write(f"{'='*60}\n\n") f.write("有 COMMON 依赖的单元:\n") f.write(f"{'-'*60}\n") for unit, common_list in sorted(commons.items()): f.write(f"{unit}: {', '.join(common_list)}\n") f.write(f"\n共 {len(commons)} 个单元有 COMMON 依赖\n") f.write(f"共 {len([u for u in commons.values()])} 个 COMMON 块被引用\n") # 找出所有唯一的 COMMON 块 all_commons = set() for c in commons.values(): all_commons.update(c) f.write(f"\n唯一的 COMMON 块: {sorted(all_commons)}\n") f.write(f"\n\nINCLUDE 文件依赖:\n") f.write(f"{'-'*60}\n") for unit, inc_list in sorted(includes.items()): f.write(f"{unit}: {', '.join(inc_list)}\n") print(f"COMMON 分析已保存到: {common_path}") # 返回无 COMMON 依赖的纯函数 pure_units = [] for filepath in Path(output_dir).glob('*.f'): if filepath.name.startswith('_'): continue unit_name = filepath.stem.upper() if unit_name not in commons: pure_units.append(unit_name) return pure_units, commons, includes def generate_makefile(output_dir, extracted, source_file): """生成 Makefile 用于编译所有提取的文件""" # 根据源文件名确定程序名称 source_name = os.path.basename(source_file).lower() if 'tlusty' in source_name: prog_name = 'tlusty' elif 'synspec' in source_name: prog_name = 'synspec' else: prog_name = os.path.splitext(os.path.basename(source_file))[0].lower() makefile_path = os.path.join(output_dir, 'Makefile') with open(makefile_path, 'w') as f: f.write(f"# Makefile for {prog_name.upper()} extracted modules\n") f.write("# 使用大内存模型支持大型 COMMON 数组\n\n") f.write("FC = gfortran\n") f.write("FFLAGS = -O3 -fno-automatic -mcmodel=large\n\n") f.write("# 编译输出目录\n") f.write("BUILD_DIR = build\n\n") f.write("# 目标可执行文件\n") f.write(f"MAIN = $(BUILD_DIR)/{prog_name}_extracted\n\n") f.write("# 所有 .f 源文件\n") f.write("SRCS = $(wildcard *.f)\n\n") f.write("# 目标文件(放在build目录)\n") f.write("OBJS = $(patsubst %.f,$(BUILD_DIR)/%.o,$(notdir $(SRCS)))\n\n") f.write("# 默认目标\n") f.write("all: $(BUILD_DIR) $(MAIN)\n") f.write("\t@echo \"==========================================\"\n") f.write("\t@echo \"编译成功: $(MAIN)\"\n") f.write("\t@echo \"==========================================\"\n\n") f.write("# 创建build目录\n") f.write("$(BUILD_DIR):\n") f.write("\tmkdir -p $(BUILD_DIR)\n\n") f.write("# 链接所有目标文件\n") f.write("$(MAIN): $(OBJS)\n") f.write("\t$(FC) $(FFLAGS) -o $@ $(OBJS)\n\n") f.write("# 编译规则\n") f.write("$(BUILD_DIR)/%.o: %.f | $(BUILD_DIR)\n") f.write("\t$(FC) $(FFLAGS) -c $< -o $@\n\n") f.write("# 清理\n") f.write("clean:\n") f.write("\trm -rf $(BUILD_DIR)\n\n") f.write("# 只编译不链接(检查语法)\n") f.write("compile-only: $(OBJS)\n") f.write("\t@echo \"所有文件编译完成(未链接)\"\n\n") f.write("# 统计信息\n") f.write("stats:\n") f.write("\t@echo \"=== 编译统计 ===\"\n") f.write("\t@echo \"源文件数: $(words $(SRCS))\"\n") f.write("\t@echo \"目标文件数: $(words $(OBJS))\"\n") f.write("\t@wc -l *.f | tail -1\n\n") f.write(".PHONY: all clean compile-only stats\n") print(f"Makefile 已生成: {makefile_path}") def main(): if len(sys.argv) < 2: source_file = "/home/fmq/program/tlusty/tl208-s54/rust/synspec/synspec54.f" output_dir = "/home/fmq/program/tlusty/tl208-s54/rust/synspec/extracted" else: source_file = sys.argv[1] output_dir = sys.argv[2] if len(sys.argv) > 2 else "extracted" print(f"源文件: {source_file}") print(f"输出目录: {output_dir}\n") # 提取单元 extracted = extract_units(source_file, output_dir) # 分析 COMMON 依赖 print("\n分析 COMMON 依赖...") pure_units, commons, includes = analyze_commons(output_dir) print(f"\n无 COMMON 依赖的纯函数/子程序: {len(pure_units)} 个") for u in sorted(pure_units): print(f" {u}") # 生成 Makefile generate_makefile(output_dir, extracted, source_file) # 保存纯函数列表 pure_path = os.path.join(output_dir, '_PURE_UNITS.txt') with open(pure_path, 'w') as f: f.write("无 COMMON 依赖的纯函数/子程序\n") f.write(f"{'='*40}\n\n") for u in sorted(pure_units): f.write(f"{u}\n") print(f"\n纯函数列表已保存到: {pure_path}") if __name__ == '__main__': main()