work_script/CP02/remove_chinese_comments.py

#!/usr/bin/env python3
"""Remove Chinese comments (// and # style) from source files."""

from __future__ import annotations

import re
import sys
from pathlib import Path

# File extensions and their comment patterns
COMMENT_PATTERNS = {
    # C/C++ style
    '.c': r'//.*',
    '.cpp': r'//.*',
    '.h': r'//.*',
    '.hpp': r'//.*',
    # Python style
    '.py': r'#.*',
}


def get_comment_pattern(filepath: str) -> str:
    """Get comment pattern based on file extension."""
    ext = Path(filepath).suffix.lower()
    return COMMENT_PATTERNS.get(ext, r'//.*')


def get_comment_prefix(filepath: str) -> str:
    """Get comment prefix based on file extension."""
    ext = Path(filepath).suffix.lower()
    if ext == '.py':
        return '#'
    return '//'


def contains_chinese(text: str) -> bool:
    """Check if text contains Chinese characters."""
    return bool(re.search(r'[\u4e00-\u9fff]', text))


def find_chinese_comments(filepath: str):
    """Find all lines with Chinese comments.

    Returns list of (line_number, line_content) tuples.
    """
    results = []
    path = Path(filepath)

    if not path.exists():
        print(f"Error: File not found: {filepath}")
        sys.exit(1)

    pattern = get_comment_pattern(filepath)

    with open(path, 'r', encoding='utf-8') as f:
        for line_num, line in enumerate(f, 1):
            match = re.search(pattern, line)
            if match and contains_chinese(match.group()):
                results.append((line_num, line.rstrip()))

    return results


def remove_chinese_comments(filepath: str) -> str:
    """Remove Chinese comments from file content.

    Returns the modified content.
    """
    path = Path(filepath)
    pattern = get_comment_pattern(filepath)
    prefix = get_comment_prefix(filepath)

    with open(path, 'r', encoding='utf-8') as f:
        lines = f.readlines()

    new_lines = []
    for line in lines:
        match = re.search(pattern, line)
        if match and contains_chinese(match.group()):
            stripped = line.lstrip()
            if stripped.startswith(prefix):
                # Entire line is a comment - skip it
                continue
            else:
                # Comment is at end of code - remove just the comment
                comment_start = match.start()
                new_line = line[:comment_start].rstrip() + '\n'
                new_lines.append(new_line)
        else:
            new_lines.append(line)

    return ''.join(new_lines)


def main():
    if len(sys.argv) < 2:
        print("Usage: python remove_chinese_comments.py <file> [file2 ...]")
        print("       python remove_chinese_comments.py --check <file> [file2 ...]")
        print("\nOptions:")
        print("  --check    Only show Chinese comments without removing")
        sys.exit(1)

    check_only = False
    files = sys.argv[1:]

    if files[0] == '--check':
        check_only = True
        files = files[1:]

    if not files:
        print("Error: No files specified")
        sys.exit(1)

    total_found = 0

    for filepath in files:
        comments = find_chinese_comments(filepath)

        if not comments:
            print(f"\n{filepath}: No Chinese comments found")
            continue

        total_found += len(comments)
        print(f"\n{'='*60}")
        print(f"File: {filepath}")
        print(f"Found {len(comments)} Chinese comment(s):")
        print('-'*60)

        for line_num, line in comments:
            print(f"  L{line_num}: {line}")

        if check_only:
            continue

        # Ask for confirmation
        print('-'*60)
        response = input(f"Remove these {len(comments)} comment(s)? [y/N]: ").strip().lower()

        if response == 'y':
            new_content = remove_chinese_comments(filepath)
            with open(filepath, 'w', encoding='utf-8') as f:
                f.write(new_content)
            print(f"✓ Removed {len(comments)} Chinese comment(s) from {filepath}")
        else:
            print(f"✗ Skipped {filepath}")

    print(f"\n{'='*60}")
    print(f"Total: {total_found} Chinese comment(s) found")


if __name__ == '__main__':
    main()