dota2-match-calendar/cleanup_duplicates.py
Ching L 6fd21a64b9
All checks were successful
continuous-integration/drone/push Build is passing
修复进行中比赛更新创建重复事件的问题
- 改进事件匹配逻辑,增强基于队伍名称的查找机制
- 即使比分变化(如从1-0到1-1)也能正确识别同一场比赛
- 添加cleanup_duplicates.py工具用于查找和删除重复事件
- 添加delete_duplicates.py用于手动清理特定重复
- 更新CHANGELOG.md记录v3.4版本改动

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-06 00:09:44 +08:00

241 lines
8.1 KiB
Python

#!/usr/bin/env python3
"""
Cleanup duplicate calendar events
Finds and optionally removes duplicate Dota 2 match events
"""
import requests
from google.oauth2 import service_account
from googleapiclient.discovery import build
from datetime import datetime, timedelta
import pytz
import re
import sys
import argparse
def authenticate(credentials_file='credentials.json'):
"""Authenticate with Google Calendar using service account credentials"""
try:
credentials = service_account.Credentials.from_service_account_file(
credentials_file,
scopes=['https://www.googleapis.com/auth/calendar']
)
service = build('calendar', 'v3', credentials=credentials)
print(f"✓ Successfully authenticated with Google Calendar")
return service
except Exception as e:
print(f"✗ Authentication failed: {e}")
sys.exit(1)
def find_duplicates(service, calendar_id='primary', days_back=7, days_ahead=30):
"""Find duplicate events in the calendar"""
try:
now = datetime.utcnow()
time_min = (now - timedelta(days=days_back)).isoformat() + 'Z'
time_max = (now + timedelta(days=days_ahead)).isoformat() + 'Z'
print(f"Scanning calendar from {days_back} days ago to {days_ahead} days ahead...")
events_result = service.events().list(
calendarId=calendar_id,
timeMin=time_min,
timeMax=time_max,
maxResults=500,
singleEvents=True,
orderBy='startTime'
).execute()
events = events_result.get('items', [])
# Group events by match key (teams + tournament + time window)
matches = {}
for event in events:
summary = event.get('summary', '')
# Skip non-Dota events
if 'vs' not in summary:
continue
# Extract teams and tournament
# Remove score and checkmark
clean_summary = re.sub(r'^✓?\s*\d+[-:]\d+\s*', '', summary)
clean_summary = re.sub(r'\[COMPLETED\]\s*', '', clean_summary)
# Extract teams
match = re.search(r'^(.*?)\s+vs\s+(.*?)(?:\s*\[(.*?)\])?$', clean_summary)
if not match:
continue
team1 = match.group(1).strip()
team2 = match.group(2).strip()
tournament = match.group(3).strip() if match.group(3) else ''
# Get event time
event_start = event['start'].get('dateTime', event['start'].get('date'))
event_dt = datetime.fromisoformat(event_start.replace('Z', '+00:00'))
# Create match key (teams + tournament + time rounded to hour)
time_key = event_dt.strftime('%Y-%m-%d-%H')
match_key = f"{sorted([team1, team2])}_{tournament}_{time_key}"
if match_key not in matches:
matches[match_key] = []
matches[match_key].append({
'id': event['id'],
'summary': summary,
'start': event_start,
'team1': team1,
'team2': team2,
'tournament': tournament,
'created': event.get('created', ''),
'updated': event.get('updated', ''),
'description': event.get('description', '')
})
# Find duplicates
duplicates = {}
for match_key, events in matches.items():
if len(events) > 1:
duplicates[match_key] = events
return duplicates
except Exception as e:
print(f"✗ Error scanning calendar: {e}")
return {}
def display_duplicates(duplicates):
"""Display found duplicates"""
if not duplicates:
print("\n✓ No duplicate events found!")
return
print(f"\n⚠️ Found {len(duplicates)} sets of duplicate events:")
print("=" * 80)
for match_key, events in duplicates.items():
print(f"\nDuplicate set: {len(events)} events")
print("-" * 40)
for i, event in enumerate(events, 1):
print(f"\nEvent #{i}:")
print(f" Summary: {event['summary']}")
print(f" Teams: {event['team1']} vs {event['team2']}")
print(f" Tournament: {event['tournament']}")
print(f" Time: {event['start'][:19]}")
print(f" Event ID: {event['id']}")
print(f" Created: {event['created'][:19] if event['created'] else 'N/A'}")
print(f" Updated: {event['updated'][:19] if event['updated'] else 'N/A'}")
# Check if has result
if '' in event['summary'] or '🏆 RESULT' in event['description']:
print(f" Status: COMPLETED")
elif '📊 CURRENT SCORE' in event['description']:
print(f" Status: IN PROGRESS")
else:
print(f" Status: UPCOMING")
def remove_duplicates(service, duplicates, calendar_id='primary', dry_run=True):
"""Remove duplicate events, keeping the most recently updated one"""
if not duplicates:
return
print("\n" + "=" * 80)
print("Duplicate Removal Process")
print("=" * 80)
removed_count = 0
for match_key, events in duplicates.items():
print(f"\nProcessing duplicate set with {len(events)} events...")
# Sort by updated time (keep most recent)
events.sort(key=lambda x: x.get('updated', x.get('created', '')), reverse=True)
# Keep the first (most recent) event
keep_event = events[0]
remove_events = events[1:]
print(f" Keeping: {keep_event['summary']} (updated: {keep_event['updated'][:19] if keep_event['updated'] else 'N/A'})")
for event in remove_events:
if dry_run:
print(f" ◯ Would remove: {event['summary']} (ID: {event['id'][:20]}...)")
removed_count += 1
else:
try:
service.events().delete(
calendarId=calendar_id,
eventId=event['id']
).execute()
print(f" ✓ Removed: {event['summary']} (ID: {event['id'][:20]}...)")
removed_count += 1
except Exception as e:
print(f" ✗ Failed to remove: {event['summary']} - {e}")
print("\n" + "=" * 80)
print(f"Summary: {'Would remove' if dry_run else 'Removed'} {removed_count} duplicate events")
if dry_run:
print("⚠️ DRY RUN - No actual changes made. Use --remove to actually remove duplicates")
def main():
parser = argparse.ArgumentParser(
description='Find and remove duplicate Dota 2 calendar events'
)
parser.add_argument(
'--calendar-id',
default='primary',
help='Google Calendar ID (default: primary)'
)
parser.add_argument(
'--remove',
action='store_true',
help='Actually remove duplicates (default is dry-run)'
)
parser.add_argument(
'--days-back',
type=int,
default=7,
help='Days to look back (default: 7)'
)
parser.add_argument(
'--days-ahead',
type=int,
default=30,
help='Days to look ahead (default: 30)'
)
parser.add_argument(
'--credentials',
default='credentials.json',
help='Path to Google service account credentials JSON file'
)
args = parser.parse_args()
# Authenticate
service = authenticate(args.credentials)
# Find duplicates
duplicates = find_duplicates(
service,
calendar_id=args.calendar_id,
days_back=args.days_back,
days_ahead=args.days_ahead
)
# Display duplicates
display_duplicates(duplicates)
# Remove duplicates if requested
if duplicates:
remove_duplicates(
service,
duplicates,
calendar_id=args.calendar_id,
dry_run=not args.remove
)
if __name__ == "__main__":
main()