#!/usr/bin/env python3 """Remove Chinese comments (// and # style) from source files.""" from __future__ import annotations import re import sys from pathlib import Path # File extensions and their comment patterns COMMENT_PATTERNS = { # C/C++ style '.c': r'//.*', '.cpp': r'//.*', '.h': r'//.*', '.hpp': r'//.*', # Python style '.py': r'#.*', } def get_comment_pattern(filepath: str) -> str: """Get comment pattern based on file extension.""" ext = Path(filepath).suffix.lower() return COMMENT_PATTERNS.get(ext, r'//.*') def get_comment_prefix(filepath: str) -> str: """Get comment prefix based on file extension.""" ext = Path(filepath).suffix.lower() if ext == '.py': return '#' return '//' def contains_chinese(text: str) -> bool: """Check if text contains Chinese characters.""" return bool(re.search(r'[\u4e00-\u9fff]', text)) def find_chinese_comments(filepath: str): """Find all lines with Chinese comments. Returns list of (line_number, line_content) tuples. """ results = [] path = Path(filepath) if not path.exists(): print(f"Error: File not found: {filepath}") sys.exit(1) pattern = get_comment_pattern(filepath) with open(path, 'r', encoding='utf-8') as f: for line_num, line in enumerate(f, 1): match = re.search(pattern, line) if match and contains_chinese(match.group()): results.append((line_num, line.rstrip())) return results def remove_chinese_comments(filepath: str) -> str: """Remove Chinese comments from file content. Returns the modified content. """ path = Path(filepath) pattern = get_comment_pattern(filepath) prefix = get_comment_prefix(filepath) with open(path, 'r', encoding='utf-8') as f: lines = f.readlines() new_lines = [] for line in lines: match = re.search(pattern, line) if match and contains_chinese(match.group()): stripped = line.lstrip() if stripped.startswith(prefix): # Entire line is a comment - skip it continue else: # Comment is at end of code - remove just the comment comment_start = match.start() new_line = line[:comment_start].rstrip() + '\n' new_lines.append(new_line) else: new_lines.append(line) return ''.join(new_lines) def main(): if len(sys.argv) < 2: print("Usage: python remove_chinese_comments.py [file2 ...]") print(" python remove_chinese_comments.py --check [file2 ...]") print("\nOptions:") print(" --check Only show Chinese comments without removing") sys.exit(1) check_only = False files = sys.argv[1:] if files[0] == '--check': check_only = True files = files[1:] if not files: print("Error: No files specified") sys.exit(1) total_found = 0 for filepath in files: comments = find_chinese_comments(filepath) if not comments: print(f"\n{filepath}: No Chinese comments found") continue total_found += len(comments) print(f"\n{'='*60}") print(f"File: {filepath}") print(f"Found {len(comments)} Chinese comment(s):") print('-'*60) for line_num, line in comments: print(f" L{line_num}: {line}") if check_only: continue # Ask for confirmation print('-'*60) response = input(f"Remove these {len(comments)} comment(s)? [y/N]: ").strip().lower() if response == 'y': new_content = remove_chinese_comments(filepath) with open(filepath, 'w', encoding='utf-8') as f: f.write(new_content) print(f"✓ Removed {len(comments)} Chinese comment(s) from {filepath}") else: print(f"✗ Skipped {filepath}") print(f"\n{'='*60}") print(f"Total: {total_found} Chinese comment(s) found") if __name__ == '__main__': main()