SpectraRust/scratch/check_duplicates.py

import os
import re
from collections import defaultdict

src_dir = "/home/fmq/program/SpectraRust/src"

# Regular expression to match function definitions
# Matches: fn name(...) or pub fn name(...) or pub(crate) fn name(...)
fn_pattern = re.compile(r'(?:pub\s+)?(?:pub\((?:crate|self|super)\)\s+)?fn\s+([a-zA-Z0-9_]+)\s*[\(<]')

# Matches struct definitions
struct_pattern = re.compile(r'(?:pub\s+)?(?:pub\((?:crate|self|super)\)\s+)?struct\s+([a-zA-Z0-9_]+)\s*[\{<]?')

file_functions = defaultdict(list)
fn_locations = defaultdict(list)
struct_locations = defaultdict(list)
file_basenames = defaultdict(list)

def normalize_code(code):
    # Remove comments and whitespace for comparison
    # Remove single line comments
    code = re.sub(r'//.*', '', code)
    # Remove multi-line comments
    code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
    # Normalize whitespace
    code = "".join(code.split())
    return code

def extract_function_body(content, start_pos):
    # Find the matching curly brace for the function body
    brace_count = 0
    in_body = False
    body_chars = []

    # We look for the first '{' after start_pos
    first_brace = content.find('{', start_pos)
    if first_brace == -1:
        return ""

    for i in range(first_brace, len(content)):
        char = content[i]
        if char == '{':
            brace_count += 1
            in_body = True
        elif char == '}':
            brace_count -= 1

        if in_body:
            body_chars.append(char)
            if brace_count == 0:
                break

    return "".join(body_chars)

# Walk directory
for root, dirs, files in os.walk(src_dir):
    for file in files:
        if file.endswith(".rs") and file != "mod.rs" and file != "lib.rs":
            path = os.path.join(root, file)
            rel_path = os.path.relpath(path, src_dir)
            file_basenames[file].append(rel_path)

            with open(path, "r", encoding="utf-8") as f:
                content = f.read()

            # Find all functions and extract bodies
            for match in fn_pattern.finditer(content):
                fn_name = match.group(1)
                if fn_name == "main" or fn_name.startswith("test_"):
                    continue
                start_pos = match.end()
                body = extract_function_body(content, start_pos)
                normalized_body = normalize_code(body)
                fn_locations[fn_name].append({
                    "path": rel_path,
                    "body": normalized_body,
                    "raw_body": body[:200]  # snippet
                })
                file_functions[rel_path].append(fn_name)

            # Find all structs
            for match in struct_pattern.finditer(content):
                struct_name = match.group(1)
                struct_locations[struct_name].append(rel_path)

print("=== 1. 重复的文件名 (Duplicate File Basenames) ===")
dup_files = {k: v for k, v in file_basenames.items() if len(v) > 1}
if dup_files:
    for filename, paths in sorted(dup_files.items()):
        print(f"文件名: {filename}")
        for p in paths:
            print(f"  - src/{p}")
else:
    print("没有重复的源文件名。")

print("\n=== 2. 重复的函数实现 (Duplicate Function Implementations) ===")
dup_fns = {k: v for k, v in fn_locations.items() if len(v) > 1}
if dup_fns:
    for fn_name, occurrences in sorted(dup_fns.items()):
        print(f"函数名: {fn_name}()")
        # Check if the implementations are identical
        identical = True
        first_body = occurrences[0]["body"]
        for occ in occurrences[1:]:
            if occ["body"] != first_body:
                identical = False
                break

        status = "【完全相同】" if identical else "【有差异的实现】"
        print(f"  状态: {status}")
        for occ in occurrences:
            print(f"  - src/{occ['path']}")
else:
    print("没有发现重复的函数名。")

print("\n=== 3. 重复的 Struct 定义 (Duplicate Struct Definitions) ===")
dup_structs = {k: v for k, v in struct_locations.items() if len(v) > 1}
if dup_structs:
    for struct_name, paths in sorted(dup_structs.items()):
        print(f"结构体: struct {struct_name}")
        for p in paths:
            print(f"  - src/{p}")
else:
    print("没有发现重复的结构体名。")