dota2-match-calendar/legacy/sync_dota2_matches_v2.py
Ching L a1573a3f82 Initial commit: Dota 2 Calendar Sync
Automated sync tool for Dota 2 Tier 1 matches from Liquipedia to Google Calendar

Features:
- Sync matches from Liquipedia to Google Calendar
- Auto-update match results when completed
- Detect and update time changes
- Clean event format: Team1 vs Team2 [Tournament]

Main files:
- sync_dota2_matches.py: Main sync script
- run_sync.sh: Convenience runner
- requirements.txt: Python dependencies
- README.md: Setup and usage guide

# Conflicts:
#	.gitignore
#	README.md
2025-09-05 11:53:02 +08:00

564 lines
21 KiB
Python

#!/usr/bin/env python3
"""
Dota 2 Tournament Calendar Sync v2
Fetches Tier 1 Dota 2 matches from Liquipedia and syncs them to Google Calendar
Now includes completed match results updating
"""
import requests
from bs4 import BeautifulSoup
from google.oauth2 import service_account
from googleapiclient.discovery import build
from datetime import datetime, timedelta
import pytz
import re
import hashlib
import sys
import argparse
import time
class Dota2CalendarSync:
def __init__(self, credentials_file='credentials.json', calendar_id='primary'):
self.credentials_file = credentials_file
self.calendar_id = calendar_id
self.service = self._authenticate()
def _authenticate(self):
"""Authenticate with Google Calendar using service account credentials"""
try:
credentials = service_account.Credentials.from_service_account_file(
self.credentials_file,
scopes=['https://www.googleapis.com/auth/calendar']
)
service = build('calendar', 'v3', credentials=credentials)
print(f"✓ Successfully authenticated with Google Calendar")
return service
except Exception as e:
print(f"✗ Authentication failed: {e}")
sys.exit(1)
def fetch_all_matches(self):
"""Fetch both upcoming and completed matches from Liquipedia"""
url = 'https://liquipedia.net/dota2/Liquipedia:Matches'
headers = {
'User-Agent': 'Dota2CalendarSync/2.0 (https://github.com/youruser/dota2-calendar)'
}
print(f"Fetching matches from Liquipedia...")
try:
response = requests.get(url, headers=headers, timeout=30)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'lxml')
upcoming_matches = []
completed_matches = []
# Find all match containers
all_divs = soup.find_all('div', recursive=True)
for div in all_divs:
text_content = div.get_text()
# Check if this is a Tier 1 match
is_tier1 = any(tier in text_content for tier in [
'TI2025', 'The International', 'Major', 'Premier',
'Tier 1', 'DreamLeague', 'ESL One', 'PGL Major'
])
if not is_tier1:
continue
# Check if it's a completed match (has score)
score_pattern = r'\d+[-:]\d+'
has_score = re.search(score_pattern, text_content)
# Look for timestamp
timestamp_elem = div.find('span', {'data-timestamp': True})
if timestamp_elem:
if has_score:
# This is a completed match
match_data = self._parse_completed_match(div, timestamp_elem)
if match_data:
completed_matches.append(match_data)
else:
# This is an upcoming match
match_data = self._parse_upcoming_match(div, timestamp_elem)
if match_data:
upcoming_matches.append(match_data)
# Remove duplicates
upcoming_matches = self._remove_duplicates(upcoming_matches)
completed_matches = self._remove_duplicates(completed_matches)
print(f"✓ Found {len(upcoming_matches)} upcoming matches")
print(f"✓ Found {len(completed_matches)} completed matches with results")
return upcoming_matches, completed_matches
except requests.RequestException as e:
print(f"✗ Error fetching Liquipedia data: {e}")
return [], []
def _parse_completed_match(self, div, timestamp_elem):
"""Parse a completed match with result"""
try:
match_data = {}
# Get timestamp
timestamp = timestamp_elem.get('data-timestamp')
if timestamp:
match_data['datetime'] = datetime.fromtimestamp(int(timestamp), tz=pytz.UTC)
text = div.get_text()
# Extract teams and score
# Common patterns: "Team1 2-0 Team2", "Team1 2:1 Team2"
score_patterns = [
r'([A-Za-z0-9\s\.\-_]+?)\s+(\d+)[-:](\d+)\s+([A-Za-z0-9\s\.\-_]+)',
r'([A-Za-z0-9\s\.\-_]+?)(\d+)[-:](\d+)([A-Za-z0-9\s\.\-_]+)',
]
for pattern in score_patterns:
match = re.search(pattern, text)
if match:
team1 = self._clean_team_name(match.group(1))
score1 = match.group(2)
score2 = match.group(3)
team2 = self._clean_team_name(match.group(4))
if team1 and team2:
match_data['team1'] = team1
match_data['team2'] = team2
match_data['score'] = f"{score1}-{score2}"
# Determine winner
if int(score1) > int(score2):
match_data['winner'] = team1
else:
match_data['winner'] = team2
break
# Extract tournament
if 'TI2025' in text:
match_data['tournament'] = 'The International 2025'
round_match = re.search(r'Round\s+\d+', text)
if round_match:
match_data['tournament'] += f" - {round_match.group(0)}"
# Generate ID if we have valid data
if 'team1' in match_data and 'team2' in match_data:
match_data['id'] = self._generate_match_id(match_data)
match_data['completed'] = True
return match_data
except Exception as e:
pass
return None
def _parse_upcoming_match(self, div, timestamp_elem):
"""Parse an upcoming match"""
try:
match_data = {}
# Get timestamp
timestamp = timestamp_elem.get('data-timestamp')
if timestamp:
match_data['datetime'] = datetime.fromtimestamp(int(timestamp), tz=pytz.UTC)
else:
return None
text = div.get_text()
# Extract teams and format
vs_pattern = r'([A-Za-z0-9\s\.\-_]+?)vs\(?(Bo\d)\)?([A-Za-z0-9\s\.\-_]+?)(?:TI2025|Round|Playoff|Group|\+|$)'
match = re.search(vs_pattern, text)
if not match:
vs_pattern = r'([A-Za-z0-9\s\.\-_]+?)vs([A-Za-z0-9\s\.\-_]+?)(?:TI2025|Round|Playoff|Group|\+|$)'
match = re.search(vs_pattern, text)
if match:
team1 = match.group(1).strip()
if len(match.groups()) > 2:
format_str = match.group(2)
team2 = match.group(3).strip()
else:
format_str = None
team2 = match.group(2).strip()
# Clean up team names
team1 = re.sub(r'^.*CEST?', '', team1).strip()
if team1 and team2:
match_data['team1'] = self._clean_team_name(team1)
match_data['team2'] = self._clean_team_name(team2)
if format_str and format_str.startswith('Bo'):
match_data['format'] = format_str
# Extract tournament
if 'TI2025' in text:
match_data['tournament'] = 'The International 2025'
round_match = re.search(r'Round\s+\d+', text)
if round_match:
match_data['tournament'] += f" - {round_match.group(0)}"
# Only return if we have valid teams
if 'team1' in match_data and 'team2' in match_data:
match_data['id'] = self._generate_match_id(match_data)
match_data['completed'] = False
return match_data
except Exception as e:
pass
return None
def _clean_team_name(self, name):
"""Clean and normalize team name"""
name = re.sub(r'\s+', ' ', name).strip()
name = re.sub(r'\s*\(.*?\)\s*$', '', name)
# Remove date/time patterns
name = re.sub(r'^\d{4}-\d{2}-\d{2}.*', '', name).strip()
name = re.sub(r'^\w+\s+\d+,\s+\d{4}.*', '', name).strip()
return name
def _generate_match_id(self, match_data):
"""Generate a unique ID for a match"""
id_parts = []
if 'team1' in match_data:
id_parts.append(match_data['team1'])
if 'team2' in match_data:
id_parts.append(match_data['team2'])
if 'datetime' in match_data:
id_parts.append(str(match_data['datetime'].date()))
if 'tournament' in match_data:
id_parts.append(match_data['tournament'])
unique_string = '_'.join(id_parts)
return hashlib.md5(unique_string.encode()).hexdigest()[:16]
def _remove_duplicates(self, matches):
"""Remove duplicate matches based on ID"""
unique_matches = {}
for match in matches:
if match.get('id'):
unique_matches[match['id']] = match
return list(unique_matches.values())
def get_existing_events(self, days_back=7, days_ahead=30):
"""Get existing Dota 2 events from Google Calendar"""
try:
now = datetime.utcnow()
time_min = (now - timedelta(days=days_back)).isoformat() + 'Z'
time_max = (now + timedelta(days=days_ahead)).isoformat() + 'Z'
print(f"Checking existing events in calendar...")
events_result = self.service.events().list(
calendarId=self.calendar_id,
timeMin=time_min,
timeMax=time_max,
maxResults=500,
singleEvents=True,
orderBy='startTime'
).execute()
events = events_result.get('items', [])
# Filter for Dota 2 events and extract IDs
dota_events = {}
for event in events:
if 'Dota 2' in event.get('summary', ''):
description = event.get('description', '')
# Extract ID from description
id_match = re.search(r'ID:\s*([a-f0-9]+)', description)
if id_match:
dota_events[id_match.group(1)] = event
print(f"✓ Found {len(dota_events)} existing Dota 2 events")
return dota_events
except Exception as e:
print(f"✗ Error fetching calendar events: {e}")
return {}
def create_calendar_event(self, match_data):
"""Create a Google Calendar event for a match"""
# Build event summary
team1 = match_data.get('team1', 'TBD')
team2 = match_data.get('team2', 'TBD')
tournament = match_data.get('tournament', '')
if tournament:
summary = f"Dota 2 - {tournament}: {team1} vs {team2}"
else:
summary = f"Dota 2: {team1} vs {team2}"
# Build description
description_parts = []
if tournament:
description_parts.append(f"Tournament: {tournament}")
description_parts.append(f"Match: {team1} vs {team2}")
if 'format' in match_data:
description_parts.append(f"Format: {match_data['format']}")
if match_data.get('completed'):
description_parts.append(f"\n🏆 RESULT: {match_data.get('score', 'Unknown')}")
description_parts.append(f"Winner: {match_data.get('winner', 'Unknown')}")
description_parts.append(f"ID: {match_data['id']}")
description_parts.append("\nSource: Liquipedia")
description = '\n'.join(description_parts)
# Set start and end times
start_time = match_data.get('datetime', datetime.now(pytz.UTC))
# Estimate match duration based on format
duration = 2 # Default 2 hours
if 'format' in match_data:
if 'Bo5' in match_data['format']:
duration = 4
elif 'Bo3' in match_data['format']:
duration = 3
elif 'Bo1' in match_data['format']:
duration = 1
end_time = start_time + timedelta(hours=duration)
event = {
'summary': summary,
'description': description,
'start': {
'dateTime': start_time.isoformat(),
'timeZone': 'UTC',
},
'end': {
'dateTime': end_time.isoformat(),
'timeZone': 'UTC',
},
'reminders': {
'useDefault': False,
'overrides': [
{'method': 'popup', 'minutes': 30},
],
},
'colorId': '9', # Blue color for Dota 2 events
}
return event
def update_event_with_result(self, event_id, match_data):
"""Update an existing calendar event with match results"""
try:
# Get the existing event
event = self.service.events().get(
calendarId=self.calendar_id,
eventId=event_id
).execute()
# Update the description with results
description = event.get('description', '')
# Check if results are already in the description
if '🏆 RESULT:' in description:
# Update existing result
description = re.sub(
r'🏆 RESULT:.*?\n.*?Winner:.*?\n',
f"🏆 RESULT: {match_data.get('score', 'Unknown')}\nWinner: {match_data.get('winner', 'Unknown')}\n",
description,
flags=re.DOTALL
)
else:
# Add new result
result_text = f"\n🏆 RESULT: {match_data.get('score', 'Unknown')}\nWinner: {match_data.get('winner', 'Unknown')}\n"
# Insert result before ID line
if 'ID:' in description:
description = description.replace('ID:', result_text + 'ID:')
else:
description += result_text
# Update the summary to show it's completed
summary = event.get('summary', '')
if '[COMPLETED]' not in summary:
summary = f"[COMPLETED] {summary}"
# Update the event
event['description'] = description
event['summary'] = summary
updated_event = self.service.events().update(
calendarId=self.calendar_id,
eventId=event_id,
body=event
).execute()
return True
except Exception as e:
print(f"Error updating event: {e}")
return False
def sync_matches_to_calendar(self, dry_run=False, update_results=True):
"""Main sync function with result updating"""
print("\n" + "="*50)
print("Starting Dota 2 Calendar Sync v2")
print("="*50 + "\n")
# Fetch all matches (upcoming and completed)
upcoming_matches, completed_matches = self.fetch_all_matches()
if not upcoming_matches and not completed_matches:
print("No matches found to sync")
return
# Get existing events (including past week for result updates)
existing_events = self.get_existing_events(days_back=7, days_ahead=30)
# Process upcoming matches
added_count = 0
skipped_count = 0
updated_count = 0
error_count = 0
print("\nProcessing upcoming matches...")
print("-" * 30)
# Filter for future matches only
now = datetime.now(pytz.UTC)
future_matches = [m for m in upcoming_matches if m.get('datetime', now) >= now]
for match in future_matches:
match_id = match.get('id')
team1 = match.get('team1', 'TBD')
team2 = match.get('team2', 'TBD')
match_time = match.get('datetime', now)
if not match_id:
continue
if match_id in existing_events:
print(f"⊘ Skipping (exists): {team1} vs {team2}")
skipped_count += 1
else:
if dry_run:
print(f"◯ Would add: {team1} vs {team2} at {match_time.strftime('%Y-%m-%d %H:%M UTC')}")
added_count += 1
else:
try:
event = self.create_calendar_event(match)
self.service.events().insert(
calendarId=self.calendar_id,
body=event
).execute()
print(f"✓ Added: {team1} vs {team2} at {match_time.strftime('%Y-%m-%d %H:%M UTC')}")
added_count += 1
time.sleep(0.2) # Rate limiting
except Exception as e:
print(f"✗ Error adding {team1} vs {team2}: {e}")
error_count += 1
# Process completed matches to update results
if update_results and completed_matches:
print("\nProcessing completed match results...")
print("-" * 30)
for match in completed_matches:
match_id = match.get('id')
team1 = match.get('team1', 'TBD')
team2 = match.get('team2', 'TBD')
score = match.get('score', 'Unknown')
if not match_id:
continue
if match_id in existing_events:
existing_event = existing_events[match_id]
# Check if already marked as completed
if '[COMPLETED]' in existing_event.get('summary', ''):
print(f"⊘ Already updated: {team1} vs {team2} ({score})")
else:
if dry_run:
print(f"◯ Would update: {team1} vs {team2} with result {score}")
updated_count += 1
else:
if self.update_event_with_result(existing_event['id'], match):
print(f"✓ Updated: {team1} vs {team2} - Result: {score}")
updated_count += 1
time.sleep(0.2) # Rate limiting
else:
print(f"✗ Failed to update: {team1} vs {team2}")
error_count += 1
# Summary
print("\n" + "="*50)
print("Sync Summary")
print("="*50)
print(f"✓ Added: {added_count} matches")
print(f"✓ Updated with results: {updated_count} matches")
print(f"⊘ Skipped: {skipped_count} matches (already exist)")
if error_count > 0:
print(f"✗ Errors: {error_count} matches")
if dry_run:
print("\n⚠ DRY RUN - No actual changes were made")
print("\n✓ Sync complete!")
def main():
parser = argparse.ArgumentParser(
description='Sync Dota 2 Tier 1 matches from Liquipedia to Google Calendar with result updates'
)
parser.add_argument(
'--calendar-id',
default='primary',
help='Google Calendar ID (default: primary). Use email address for specific calendar.'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Perform a dry run without actually creating/updating events'
)
parser.add_argument(
'--no-results',
action='store_true',
help='Skip updating completed match results'
)
parser.add_argument(
'--credentials',
default='credentials.json',
help='Path to Google service account credentials JSON file'
)
args = parser.parse_args()
# Important notice
print("\n" + "!"*60)
print("Dota 2 Calendar Sync v2 - Now with Match Results!")
print("Service Account: calendar-bot@tunpok.iam.gserviceaccount.com")
print("!"*60 + "\n")
# Initialize and run sync
try:
sync = Dota2CalendarSync(
credentials_file=args.credentials,
calendar_id=args.calendar_id
)
sync.sync_matches_to_calendar(
dry_run=args.dry_run,
update_results=not args.no_results
)
except KeyboardInterrupt:
print("\n\nSync cancelled by user")
sys.exit(0)
except Exception as e:
print(f"\n✗ Fatal error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()