SpectraRust/scratch/check_duplicates.py
2026-06-03 14:11:10 +08:00

125 lines
4.3 KiB
Python

import os
import re
from collections import defaultdict
src_dir = "/home/fmq/program/SpectraRust/src"
# Regular expression to match function definitions
# Matches: fn name(...) or pub fn name(...) or pub(crate) fn name(...)
fn_pattern = re.compile(r'(?:pub\s+)?(?:pub\((?:crate|self|super)\)\s+)?fn\s+([a-zA-Z0-9_]+)\s*[\(<]')
# Matches struct definitions
struct_pattern = re.compile(r'(?:pub\s+)?(?:pub\((?:crate|self|super)\)\s+)?struct\s+([a-zA-Z0-9_]+)\s*[\{<]?')
file_functions = defaultdict(list)
fn_locations = defaultdict(list)
struct_locations = defaultdict(list)
file_basenames = defaultdict(list)
def normalize_code(code):
# Remove comments and whitespace for comparison
# Remove single line comments
code = re.sub(r'//.*', '', code)
# Remove multi-line comments
code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
# Normalize whitespace
code = "".join(code.split())
return code
def extract_function_body(content, start_pos):
# Find the matching curly brace for the function body
brace_count = 0
in_body = False
body_chars = []
# We look for the first '{' after start_pos
first_brace = content.find('{', start_pos)
if first_brace == -1:
return ""
for i in range(first_brace, len(content)):
char = content[i]
if char == '{':
brace_count += 1
in_body = True
elif char == '}':
brace_count -= 1
if in_body:
body_chars.append(char)
if brace_count == 0:
break
return "".join(body_chars)
# Walk directory
for root, dirs, files in os.walk(src_dir):
for file in files:
if file.endswith(".rs") and file != "mod.rs" and file != "lib.rs":
path = os.path.join(root, file)
rel_path = os.path.relpath(path, src_dir)
file_basenames[file].append(rel_path)
with open(path, "r", encoding="utf-8") as f:
content = f.read()
# Find all functions and extract bodies
for match in fn_pattern.finditer(content):
fn_name = match.group(1)
if fn_name == "main" or fn_name.startswith("test_"):
continue
start_pos = match.end()
body = extract_function_body(content, start_pos)
normalized_body = normalize_code(body)
fn_locations[fn_name].append({
"path": rel_path,
"body": normalized_body,
"raw_body": body[:200] # snippet
})
file_functions[rel_path].append(fn_name)
# Find all structs
for match in struct_pattern.finditer(content):
struct_name = match.group(1)
struct_locations[struct_name].append(rel_path)
print("=== 1. 重复的文件名 (Duplicate File Basenames) ===")
dup_files = {k: v for k, v in file_basenames.items() if len(v) > 1}
if dup_files:
for filename, paths in sorted(dup_files.items()):
print(f"文件名: {filename}")
for p in paths:
print(f" - src/{p}")
else:
print("没有重复的源文件名。")
print("\n=== 2. 重复的函数实现 (Duplicate Function Implementations) ===")
dup_fns = {k: v for k, v in fn_locations.items() if len(v) > 1}
if dup_fns:
for fn_name, occurrences in sorted(dup_fns.items()):
print(f"函数名: {fn_name}()")
# Check if the implementations are identical
identical = True
first_body = occurrences[0]["body"]
for occ in occurrences[1:]:
if occ["body"] != first_body:
identical = False
break
status = "【完全相同】" if identical else "【有差异的实现】"
print(f" 状态: {status}")
for occ in occurrences:
print(f" - src/{occ['path']}")
else:
print("没有发现重复的函数名。")
print("\n=== 3. 重复的 Struct 定义 (Duplicate Struct Definitions) ===")
dup_structs = {k: v for k, v in struct_locations.items() if len(v) > 1}
if dup_structs:
for struct_name, paths in sorted(dup_structs.items()):
print(f"结构体: struct {struct_name}")
for p in paths:
print(f" - src/{p}")
else:
print("没有发现重复的结构体名。")