dota2-match-calendar/legacy/sync_dota2_matches.py
Ching L a1573a3f82 Initial commit: Dota 2 Calendar Sync
Automated sync tool for Dota 2 Tier 1 matches from Liquipedia to Google Calendar

Features:
- Sync matches from Liquipedia to Google Calendar
- Auto-update match results when completed
- Detect and update time changes
- Clean event format: Team1 vs Team2 [Tournament]

Main files:
- sync_dota2_matches.py: Main sync script
- run_sync.sh: Convenience runner
- requirements.txt: Python dependencies
- README.md: Setup and usage guide

# Conflicts:
#	.gitignore
#	README.md
2025-09-05 11:53:02 +08:00

519 lines
19 KiB
Python

#!/usr/bin/env python3
"""
Dota 2 Tournament Calendar Sync
Fetches Tier 1 Dota 2 matches from Liquipedia and syncs them to Google Calendar
"""
import requests
from bs4 import BeautifulSoup
from google.oauth2 import service_account
from googleapiclient.discovery import build
from datetime import datetime, timedelta
import pytz
import re
import hashlib
import sys
import argparse
class Dota2CalendarSync:
def __init__(self, credentials_file='credentials.json', calendar_id='primary'):
self.credentials_file = credentials_file
self.calendar_id = calendar_id
self.service = self._authenticate()
def _authenticate(self):
"""Authenticate with Google Calendar using service account credentials"""
try:
credentials = service_account.Credentials.from_service_account_file(
self.credentials_file,
scopes=['https://www.googleapis.com/auth/calendar']
)
service = build('calendar', 'v3', credentials=credentials)
print(f"✓ Successfully authenticated with Google Calendar")
return service
except Exception as e:
print(f"✗ Authentication failed: {e}")
sys.exit(1)
def fetch_liquipedia_matches(self):
"""Fetch Tier 1 matches from Liquipedia"""
url = 'https://liquipedia.net/dota2/Liquipedia:Matches'
headers = {
'User-Agent': 'Dota2CalendarSync/1.0 (https://github.com/youruser/dota2-calendar)'
}
print(f"Fetching matches from Liquipedia...")
try:
response = requests.get(url, headers=headers, timeout=30)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'lxml')
matches = []
# Main approach: Look for all elements with timestamps
# These contain the match information
timestamps = soup.find_all('span', {'data-timestamp': True})
for timestamp_elem in timestamps:
# Get the parent div that contains the full match info
parent = timestamp_elem.find_parent('div')
if not parent:
continue
text_content = parent.get_text()
# Check if this is a Tier 1 match
# Look for TI (The International), Major, Premier, or Tier 1 tournaments
is_tier1 = any(tier in text_content for tier in [
'TI2025', 'The International', 'Major', 'Premier',
'Tier 1', 'DreamLeague', 'ESL One', 'PGL Major'
])
if is_tier1:
match_data = self._parse_match_from_timestamp_element(parent, timestamp_elem)
if match_data:
matches.append(match_data)
# Remove duplicates based on match ID
unique_matches = {}
for match in matches:
if match.get('id'):
unique_matches[match['id']] = match
matches = list(unique_matches.values())
print(f"✓ Found {len(matches)} Tier 1 matches")
return matches
except requests.RequestException as e:
print(f"✗ Error fetching Liquipedia data: {e}")
return []
def _parse_match_from_timestamp_element(self, parent, timestamp_elem):
"""Parse match data from an element containing a timestamp"""
try:
match_data = {}
# Get timestamp
timestamp = timestamp_elem.get('data-timestamp')
if timestamp:
match_data['datetime'] = datetime.fromtimestamp(int(timestamp), tz=pytz.UTC)
else:
return None
# Parse the text content
text = parent.get_text()
# Extract teams and format using improved regex
# The format from Liquipedia is like: "XGvs(Bo3)FalconsTI2025"
vs_pattern = r'([A-Za-z0-9\s\.\-_]+?)vs\(?(Bo\d)\)?([A-Za-z0-9\s\.\-_]+?)(?:TI2025|Round|Playoff|Group|\+|$)'
match = re.search(vs_pattern, text)
if not match:
# Try alternative pattern without format
vs_pattern = r'([A-Za-z0-9\s\.\-_]+?)vs([A-Za-z0-9\s\.\-_]+?)(?:TI2025|Round|Playoff|Group|\+|$)'
match = re.search(vs_pattern, text)
if match:
team1 = match.group(1).strip()
if len(match.groups()) > 2:
format_str = match.group(2)
team2 = match.group(3).strip()
else:
format_str = None
team2 = match.group(2).strip()
# Clean up team names - remove any date/time remnants
team1 = re.sub(r'^.*CEST?', '', team1).strip()
if team1 and team2:
match_data['team1'] = self._clean_team_name(team1)
match_data['team2'] = self._clean_team_name(team2)
if format_str and format_str.startswith('Bo'):
match_data['format'] = format_str
# Extract tournament
# Look for TI2025 or other tournament indicators
if 'TI2025' in text:
match_data['tournament'] = 'The International 2025'
# Also extract round info if present
round_match = re.search(r'Round\s+\d+', text)
if round_match:
match_data['tournament'] += f" - {round_match.group(0)}"
elif 'DreamLeague' in text:
match_data['tournament'] = 'DreamLeague'
elif 'ESL' in text:
match_data['tournament'] = 'ESL'
elif 'Major' in text:
# Try to extract full major name
major_match = re.search(r'[\w\s]+Major', text)
if major_match:
match_data['tournament'] = major_match.group(0).strip()
# Only return if we have valid teams
if 'team1' in match_data and 'team2' in match_data:
match_data['id'] = self._generate_match_id(match_data)
return match_data
except Exception as e:
pass
return None
def _extract_match_from_infobox(self, box):
"""Extract match data from an infobox element"""
try:
match_data = {}
# Extract teams
team_spans = box.find_all('span', {'class': re.compile(r'team-template|team-name')})
if len(team_spans) >= 2:
match_data['team1'] = self._clean_team_name(team_spans[0].get_text())
match_data['team2'] = self._clean_team_name(team_spans[1].get_text())
# Extract tournament
tournament_link = box.find('a', href=re.compile(r'/dota2/[^#]+'))
if tournament_link:
match_data['tournament'] = tournament_link.get_text().strip()
# Extract datetime
timer = box.find('span', {'class': 'timer-object', 'data-timestamp': True})
if timer:
timestamp = timer.get('data-timestamp')
match_data['datetime'] = datetime.fromtimestamp(int(timestamp), tz=pytz.UTC)
# Extract format
format_text = box.find(string=re.compile(r'Bo\d'))
if format_text:
match_data['format'] = format_text.strip()
if 'team1' in match_data and 'team2' in match_data:
match_data['id'] = self._generate_match_id(match_data)
return match_data
except Exception as e:
pass
return None
def _extract_match_from_row(self, row):
"""Extract match data from a table row"""
try:
cells = row.find_all('td')
if len(cells) < 2:
return None
match_data = {}
# Try to extract date/time from first cell
if cells[0]:
timer = cells[0].find('span', {'class': 'timer-object', 'data-timestamp': True})
if timer:
timestamp = timer.get('data-timestamp')
match_data['datetime'] = datetime.fromtimestamp(int(timestamp), tz=pytz.UTC)
# Extract teams (usually in adjacent cells)
team_cells = []
for cell in cells:
team_elem = cell.find('span', {'class': re.compile(r'team')})
if team_elem:
team_cells.append(team_elem)
if len(team_cells) >= 2:
match_data['team1'] = self._clean_team_name(team_cells[0].get_text())
match_data['team2'] = self._clean_team_name(team_cells[1].get_text())
# Look for tournament info
for cell in cells:
link = cell.find('a', href=re.compile(r'/dota2/[^#]+'))
if link and 'team' not in link.get('href', ''):
match_data['tournament'] = link.get_text().strip()
break
if 'team1' in match_data and 'team2' in match_data:
match_data['id'] = self._generate_match_id(match_data)
return match_data
except Exception:
pass
return None
def _extract_match_with_timer(self, parent, timer):
"""Extract match data when we have a timer element"""
try:
match_data = {}
# Get datetime from timer
timestamp = timer.get('data-timestamp')
match_data['datetime'] = datetime.fromtimestamp(int(timestamp), tz=pytz.UTC)
# Extract teams
team_elems = parent.find_all('span', {'class': re.compile(r'team')})
if len(team_elems) >= 2:
match_data['team1'] = self._clean_team_name(team_elems[0].get_text())
match_data['team2'] = self._clean_team_name(team_elems[1].get_text())
# Extract tournament
tournament_link = parent.find('a', href=re.compile(r'/dota2/[^#]+'))
if tournament_link:
match_data['tournament'] = tournament_link.get_text().strip()
if 'team1' in match_data and 'team2' in match_data:
match_data['id'] = self._generate_match_id(match_data)
return match_data
except Exception:
pass
return None
def _clean_team_name(self, name):
"""Clean and normalize team name"""
# Remove extra whitespace and common suffixes
name = re.sub(r'\s+', ' ', name).strip()
name = re.sub(r'\s*\(.*?\)\s*$', '', name) # Remove parenthetical info
return name
def _generate_match_id(self, match_data):
"""Generate a unique ID for a match"""
# Use teams and datetime if available, otherwise use what we have
id_parts = []
if 'team1' in match_data:
id_parts.append(match_data['team1'])
if 'team2' in match_data:
id_parts.append(match_data['team2'])
if 'datetime' in match_data:
id_parts.append(str(match_data['datetime'].date()))
if 'tournament' in match_data:
id_parts.append(match_data['tournament'])
unique_string = '_'.join(id_parts)
return hashlib.md5(unique_string.encode()).hexdigest()[:16]
def get_existing_events(self, days_ahead=30):
"""Get existing Dota 2 events from Google Calendar"""
try:
now = datetime.utcnow()
time_min = now.isoformat() + 'Z'
time_max = (now + timedelta(days=days_ahead)).isoformat() + 'Z'
print(f"Checking existing events in calendar...")
events_result = self.service.events().list(
calendarId=self.calendar_id,
timeMin=time_min,
timeMax=time_max,
maxResults=200,
singleEvents=True,
orderBy='startTime'
).execute()
events = events_result.get('items', [])
# Filter for Dota 2 events and extract IDs
dota_events = {}
for event in events:
if 'Dota 2' in event.get('summary', ''):
description = event.get('description', '')
# Extract ID from description
id_match = re.search(r'ID:\s*([a-f0-9]+)', description)
if id_match:
dota_events[id_match.group(1)] = event
print(f"✓ Found {len(dota_events)} existing Dota 2 events")
return dota_events
except Exception as e:
print(f"✗ Error fetching calendar events: {e}")
return {}
def create_calendar_event(self, match_data):
"""Create a Google Calendar event for a match"""
# Build event summary
team1 = match_data.get('team1', 'TBD')
team2 = match_data.get('team2', 'TBD')
tournament = match_data.get('tournament', '')
if tournament:
summary = f"Dota 2 - {tournament}: {team1} vs {team2}"
else:
summary = f"Dota 2: {team1} vs {team2}"
# Build description
description_parts = []
if tournament:
description_parts.append(f"Tournament: {tournament}")
description_parts.append(f"Match: {team1} vs {team2}")
if 'format' in match_data:
description_parts.append(f"Format: {match_data['format']}")
description_parts.append(f"ID: {match_data['id']}")
description_parts.append("\nSource: Liquipedia")
description = '\n'.join(description_parts)
# Set start and end times
start_time = match_data.get('datetime', datetime.now(pytz.UTC))
# Estimate match duration based on format
duration = 2 # Default 2 hours
if 'format' in match_data:
if 'Bo5' in match_data['format']:
duration = 4
elif 'Bo3' in match_data['format']:
duration = 3
elif 'Bo1' in match_data['format']:
duration = 1
end_time = start_time + timedelta(hours=duration)
event = {
'summary': summary,
'description': description,
'start': {
'dateTime': start_time.isoformat(),
'timeZone': 'UTC',
},
'end': {
'dateTime': end_time.isoformat(),
'timeZone': 'UTC',
},
'reminders': {
'useDefault': False,
'overrides': [
{'method': 'popup', 'minutes': 30},
],
},
'colorId': '9', # Blue color for Dota 2 events
}
return event
def sync_matches_to_calendar(self, dry_run=False):
"""Main sync function"""
print("\n" + "="*50)
print("Starting Dota 2 Calendar Sync")
print("="*50 + "\n")
# Fetch matches from Liquipedia
matches = self.fetch_liquipedia_matches()
if not matches:
print("No matches found to sync")
return
# Filter for future matches only
now = datetime.now(pytz.UTC)
future_matches = [m for m in matches if m.get('datetime', now) >= now]
print(f"Filtered to {len(future_matches)} future matches")
if not future_matches:
print("No future matches to sync")
return
# Get existing events
existing_events = self.get_existing_events()
# Process each match
added_count = 0
skipped_count = 0
error_count = 0
print("\nProcessing matches...")
print("-" * 30)
for match in future_matches:
match_id = match.get('id')
team1 = match.get('team1', 'TBD')
team2 = match.get('team2', 'TBD')
match_time = match.get('datetime', now)
if not match_id:
continue
if match_id in existing_events:
print(f"⊘ Skipping (exists): {team1} vs {team2}")
skipped_count += 1
else:
if dry_run:
print(f"◯ Would add: {team1} vs {team2} at {match_time.strftime('%Y-%m-%d %H:%M UTC')}")
added_count += 1
else:
try:
event = self.create_calendar_event(match)
self.service.events().insert(
calendarId=self.calendar_id,
body=event
).execute()
print(f"✓ Added: {team1} vs {team2} at {match_time.strftime('%Y-%m-%d %H:%M UTC')}")
added_count += 1
except Exception as e:
print(f"✗ Error adding {team1} vs {team2}: {e}")
error_count += 1
# Summary
print("\n" + "="*50)
print("Sync Summary")
print("="*50)
print(f"✓ Added: {added_count} matches")
print(f"⊘ Skipped: {skipped_count} matches (already exist)")
if error_count > 0:
print(f"✗ Errors: {error_count} matches")
if dry_run:
print("\n⚠ DRY RUN - No actual changes were made")
print("\n✓ Sync complete!")
def main():
parser = argparse.ArgumentParser(
description='Sync Dota 2 Tier 1 matches from Liquipedia to Google Calendar'
)
parser.add_argument(
'--calendar-id',
default='primary',
help='Google Calendar ID (default: primary). Use email address for specific calendar.'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Perform a dry run without actually creating events'
)
parser.add_argument(
'--credentials',
default='credentials.json',
help='Path to Google service account credentials JSON file'
)
args = parser.parse_args()
# Important notice
print("\n" + "!"*60)
print("IMPORTANT: Before using this script:")
print("1. Share your Google Calendar with the service account")
print(" Service Account Email: calendar-bot@tunpok.iam.gserviceaccount.com")
print("2. Grant 'Make changes to events' permission")
print("3. Use your calendar email as --calendar-id parameter")
print("!"*60 + "\n")
# Initialize and run sync
try:
sync = Dota2CalendarSync(
credentials_file=args.credentials,
calendar_id=args.calendar_id
)
sync.sync_matches_to_calendar(dry_run=args.dry_run)
except KeyboardInterrupt:
print("\n\nSync cancelled by user")
sys.exit(0)
except Exception as e:
print(f"\n✗ Fatal error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()