From 6fd21a64b9d2f330624bd2c79807e5ddf67adf7e Mon Sep 17 00:00:00 2001 From: Ching L Date: Sat, 6 Sep 2025 00:09:44 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E8=BF=9B=E8=A1=8C=E4=B8=AD?= =?UTF-8?q?=E6=AF=94=E8=B5=9B=E6=9B=B4=E6=96=B0=E5=88=9B=E5=BB=BA=E9=87=8D?= =?UTF-8?q?=E5=A4=8D=E4=BA=8B=E4=BB=B6=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 改进事件匹配逻辑,增强基于队伍名称的查找机制 - 即使比分变化(如从1-0到1-1)也能正确识别同一场比赛 - 添加cleanup_duplicates.py工具用于查找和删除重复事件 - 添加delete_duplicates.py用于手动清理特定重复 - 更新CHANGELOG.md记录v3.4版本改动 🤖 Generated with Claude Code Co-Authored-By: Claude --- .drone.yml | 2 +- CHANGELOG.md | 14 +++ cleanup_duplicates.py | 241 ++++++++++++++++++++++++++++++++++++++++++ delete_duplicates.py | 120 +++++++++++++++++++++ sync_dota2_matches.py | 19 ++++ 5 files changed, 395 insertions(+), 1 deletion(-) create mode 100644 cleanup_duplicates.py create mode 100644 delete_duplicates.py diff --git a/.drone.yml b/.drone.yml index a8f9c49..f4d71e1 100644 --- a/.drone.yml +++ b/.drone.yml @@ -8,7 +8,7 @@ steps: settings: host: - 206.237.26.184 - username: root + username: ching key: from_secret: ssh_key passphrase: diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ba1e8d..54b3428 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # Changelog +## v3.4 - 修复重复事件问题 +- **修复进行中比赛更新创建重复的问题**: + - 改进事件匹配逻辑,增强基于队伍名称的查找 + - 即使比分变化也能正确识别同一场比赛 + - 避免在比分更新时(如从 1-0 到 1-1)创建新事件 +- **新增重复事件清理工具**: + - 添加 `cleanup_duplicates.py` 用于查找和删除重复事件 + - 添加 `delete_duplicates.py` 用于手动清理特定重复 +- **增强的事件查找机制**: + - 不仅通过 ID 查找,还通过 team+tournament 组合查找 + - 支持忽略比分差异的模糊匹配 + - 改进对时间变更和 TBD 更新的处理 + ## v3.3 - 支持进行中比赛的实时比分 - **使用 BeautifulSoup 解析 HTML 结构**: - 重写了 `_parse_match` 方法,使用 DOM 结构而非正则表达式 @@ -72,6 +85,7 @@ | v3.1 | ✓ | ✓ | ✓ | ✓ | ✗ | ✗ | | v3.2 | ✓ | ✓ | ✓ | ✓ | ✓ | ✗ | | v3.3 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | +| v3.4 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ## 使用建议 diff --git a/cleanup_duplicates.py b/cleanup_duplicates.py new file mode 100644 index 0000000..6df73ed --- /dev/null +++ b/cleanup_duplicates.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python3 +""" +Cleanup duplicate calendar events +Finds and optionally removes duplicate Dota 2 match events +""" + +import requests +from google.oauth2 import service_account +from googleapiclient.discovery import build +from datetime import datetime, timedelta +import pytz +import re +import sys +import argparse + +def authenticate(credentials_file='credentials.json'): + """Authenticate with Google Calendar using service account credentials""" + try: + credentials = service_account.Credentials.from_service_account_file( + credentials_file, + scopes=['https://www.googleapis.com/auth/calendar'] + ) + service = build('calendar', 'v3', credentials=credentials) + print(f"✓ Successfully authenticated with Google Calendar") + return service + except Exception as e: + print(f"✗ Authentication failed: {e}") + sys.exit(1) + +def find_duplicates(service, calendar_id='primary', days_back=7, days_ahead=30): + """Find duplicate events in the calendar""" + try: + now = datetime.utcnow() + time_min = (now - timedelta(days=days_back)).isoformat() + 'Z' + time_max = (now + timedelta(days=days_ahead)).isoformat() + 'Z' + + print(f"Scanning calendar from {days_back} days ago to {days_ahead} days ahead...") + + events_result = service.events().list( + calendarId=calendar_id, + timeMin=time_min, + timeMax=time_max, + maxResults=500, + singleEvents=True, + orderBy='startTime' + ).execute() + + events = events_result.get('items', []) + + # Group events by match key (teams + tournament + time window) + matches = {} + + for event in events: + summary = event.get('summary', '') + + # Skip non-Dota events + if 'vs' not in summary: + continue + + # Extract teams and tournament + # Remove score and checkmark + clean_summary = re.sub(r'^✓?\s*\d+[-:]\d+\s*', '', summary) + clean_summary = re.sub(r'\[COMPLETED\]\s*', '', clean_summary) + + # Extract teams + match = re.search(r'^(.*?)\s+vs\s+(.*?)(?:\s*\[(.*?)\])?$', clean_summary) + if not match: + continue + + team1 = match.group(1).strip() + team2 = match.group(2).strip() + tournament = match.group(3).strip() if match.group(3) else '' + + # Get event time + event_start = event['start'].get('dateTime', event['start'].get('date')) + event_dt = datetime.fromisoformat(event_start.replace('Z', '+00:00')) + + # Create match key (teams + tournament + time rounded to hour) + time_key = event_dt.strftime('%Y-%m-%d-%H') + match_key = f"{sorted([team1, team2])}_{tournament}_{time_key}" + + if match_key not in matches: + matches[match_key] = [] + + matches[match_key].append({ + 'id': event['id'], + 'summary': summary, + 'start': event_start, + 'team1': team1, + 'team2': team2, + 'tournament': tournament, + 'created': event.get('created', ''), + 'updated': event.get('updated', ''), + 'description': event.get('description', '') + }) + + # Find duplicates + duplicates = {} + for match_key, events in matches.items(): + if len(events) > 1: + duplicates[match_key] = events + + return duplicates + + except Exception as e: + print(f"✗ Error scanning calendar: {e}") + return {} + +def display_duplicates(duplicates): + """Display found duplicates""" + if not duplicates: + print("\n✓ No duplicate events found!") + return + + print(f"\n⚠️ Found {len(duplicates)} sets of duplicate events:") + print("=" * 80) + + for match_key, events in duplicates.items(): + print(f"\nDuplicate set: {len(events)} events") + print("-" * 40) + + for i, event in enumerate(events, 1): + print(f"\nEvent #{i}:") + print(f" Summary: {event['summary']}") + print(f" Teams: {event['team1']} vs {event['team2']}") + print(f" Tournament: {event['tournament']}") + print(f" Time: {event['start'][:19]}") + print(f" Event ID: {event['id']}") + print(f" Created: {event['created'][:19] if event['created'] else 'N/A'}") + print(f" Updated: {event['updated'][:19] if event['updated'] else 'N/A'}") + + # Check if has result + if '✓' in event['summary'] or '🏆 RESULT' in event['description']: + print(f" Status: COMPLETED") + elif '📊 CURRENT SCORE' in event['description']: + print(f" Status: IN PROGRESS") + else: + print(f" Status: UPCOMING") + +def remove_duplicates(service, duplicates, calendar_id='primary', dry_run=True): + """Remove duplicate events, keeping the most recently updated one""" + if not duplicates: + return + + print("\n" + "=" * 80) + print("Duplicate Removal Process") + print("=" * 80) + + removed_count = 0 + + for match_key, events in duplicates.items(): + print(f"\nProcessing duplicate set with {len(events)} events...") + + # Sort by updated time (keep most recent) + events.sort(key=lambda x: x.get('updated', x.get('created', '')), reverse=True) + + # Keep the first (most recent) event + keep_event = events[0] + remove_events = events[1:] + + print(f" Keeping: {keep_event['summary']} (updated: {keep_event['updated'][:19] if keep_event['updated'] else 'N/A'})") + + for event in remove_events: + if dry_run: + print(f" ◯ Would remove: {event['summary']} (ID: {event['id'][:20]}...)") + removed_count += 1 + else: + try: + service.events().delete( + calendarId=calendar_id, + eventId=event['id'] + ).execute() + print(f" ✓ Removed: {event['summary']} (ID: {event['id'][:20]}...)") + removed_count += 1 + except Exception as e: + print(f" ✗ Failed to remove: {event['summary']} - {e}") + + print("\n" + "=" * 80) + print(f"Summary: {'Would remove' if dry_run else 'Removed'} {removed_count} duplicate events") + if dry_run: + print("⚠️ DRY RUN - No actual changes made. Use --remove to actually remove duplicates") + +def main(): + parser = argparse.ArgumentParser( + description='Find and remove duplicate Dota 2 calendar events' + ) + parser.add_argument( + '--calendar-id', + default='primary', + help='Google Calendar ID (default: primary)' + ) + parser.add_argument( + '--remove', + action='store_true', + help='Actually remove duplicates (default is dry-run)' + ) + parser.add_argument( + '--days-back', + type=int, + default=7, + help='Days to look back (default: 7)' + ) + parser.add_argument( + '--days-ahead', + type=int, + default=30, + help='Days to look ahead (default: 30)' + ) + parser.add_argument( + '--credentials', + default='credentials.json', + help='Path to Google service account credentials JSON file' + ) + + args = parser.parse_args() + + # Authenticate + service = authenticate(args.credentials) + + # Find duplicates + duplicates = find_duplicates( + service, + calendar_id=args.calendar_id, + days_back=args.days_back, + days_ahead=args.days_ahead + ) + + # Display duplicates + display_duplicates(duplicates) + + # Remove duplicates if requested + if duplicates: + remove_duplicates( + service, + duplicates, + calendar_id=args.calendar_id, + dry_run=not args.remove + ) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/delete_duplicates.py b/delete_duplicates.py new file mode 100644 index 0000000..e9a1864 --- /dev/null +++ b/delete_duplicates.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +""" +Delete duplicate calendar events based on screenshot +Manually delete the duplicate events that were created during live score updates +""" + +from google.oauth2 import service_account +from googleapiclient.discovery import build +from datetime import datetime, timedelta +import pytz +import sys + +def authenticate(credentials_file='credentials.json'): + """Authenticate with Google Calendar using service account credentials""" + try: + credentials = service_account.Credentials.from_service_account_file( + credentials_file, + scopes=['https://www.googleapis.com/auth/calendar'] + ) + service = build('calendar', 'v3', credentials=credentials) + print(f"✓ Successfully authenticated with Google Calendar") + return service + except Exception as e: + print(f"✗ Authentication failed: {e}") + sys.exit(1) + +def delete_specific_events(service, calendar_id='primary'): + """Delete specific duplicate events based on partial scores""" + + # Based on the screenshot, these are the duplicate events to look for: + duplicates_to_find = [ + ("1-0", "NGX", "Liquid"), # 19:00 1-0 NGX vs Liquid + ("1-1", "NGX", "Liquid"), # 19:45 1-1 NGX vs Liquid + ] + + # Get events for Sept 5 + target_date = datetime(2025, 9, 5, tzinfo=pytz.UTC) + time_min = target_date.isoformat() + time_max = (target_date + timedelta(days=1)).isoformat() + + events_result = service.events().list( + calendarId=calendar_id, + timeMin=time_min, + timeMax=time_max, + maxResults=500, + singleEvents=True, + orderBy='startTime' + ).execute() + + events = events_result.get('items', []) + + print(f"\nScanning {len(events)} events on September 5th...") + print("=" * 60) + + events_to_delete = [] + + for event in events: + summary = event.get('summary', '') + + # Check if this matches any of our duplicate patterns + for score, team1, team2 in duplicates_to_find: + if score in summary and team1 in summary and team2 in summary and '✓' not in summary: + events_to_delete.append({ + 'id': event['id'], + 'summary': summary, + 'start': event['start'].get('dateTime', event['start'].get('date')) + }) + print(f"Found duplicate: {summary}") + print(f" Time: {event['start'].get('dateTime', event['start'].get('date'))}") + print(f" ID: {event['id']}") + break + + if not events_to_delete: + print("\n❌ No duplicate events found in API.") + print("This could mean:") + print("1. The duplicates are in a different calendar") + print("2. They have already been deleted") + print("3. The calendar interface is showing cached data") + print("\nTry refreshing your browser (Ctrl+F5 or Cmd+Shift+R)") + return + + print(f"\n⚠️ Found {len(events_to_delete)} duplicate events to delete") + print("-" * 60) + + # Confirm before deleting + print("\nThese events will be deleted:") + for event in events_to_delete: + print(f" - {event['summary']} at {event['start'][:19]}") + + response = input("\nDo you want to delete these events? (yes/no): ") + + if response.lower() == 'yes': + deleted_count = 0 + for event in events_to_delete: + try: + service.events().delete( + calendarId=calendar_id, + eventId=event['id'] + ).execute() + print(f"✓ Deleted: {event['summary']}") + deleted_count += 1 + except Exception as e: + print(f"✗ Failed to delete {event['summary']}: {e}") + + print(f"\n✓ Successfully deleted {deleted_count} duplicate events") + else: + print("\n❌ Deletion cancelled") + +def main(): + # Authenticate + service = authenticate() + + # Delete specific duplicates + delete_specific_events(service) + + print("\nPlease refresh your Google Calendar to see the changes.") + print("If you still see duplicates, they might be in a different calendar.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/sync_dota2_matches.py b/sync_dota2_matches.py index 2174efd..35b4463 100644 --- a/sync_dota2_matches.py +++ b/sync_dota2_matches.py @@ -739,6 +739,16 @@ class Dota2CalendarSync: match_key = f"{team1}_{team2}_{tournament}" if match_key in existing_events['_by_match']: existing_event = existing_events['_by_match'][match_key] + + # Also try to find by teams only (ignoring score) for live updates + # This handles cases where score changes during match + if not existing_event: + for event_key, event in existing_events['_by_match'].items(): + # Check if teams match (order independent) + if (f"{team1}_{team2}" in event_key or f"{team2}_{team1}" in event_key) and tournament in event_key: + existing_event = event + print(f" → Found existing match by teams: {team1} vs {team2}") + break # Special handling for TBD matches that might have been updated # Look for TBD events at the same time and tournament @@ -892,6 +902,15 @@ class Dota2CalendarSync: match_key = f"{team1}_{team2}_{tournament}" if match_key in existing_events['_by_match']: existing_event = existing_events['_by_match'][match_key] + + # Also try to find by teams only (for live score updates) + if not existing_event: + for event_key, event in existing_events['_by_match'].items(): + # Check if teams match (order independent) and tournament matches + if (f"{team1}_{team2}" in event_key or f"{team2}_{team1}" in event_key) and tournament in event_key: + existing_event = event + print(f" → Found existing match by teams: {team1} vs {team2}") + break if existing_event: # Check if already marked as completed