#!/usr/bin/env python3 """ Dota 2 Tournament Calendar Sync Fetches Tier 1 Dota 2 matches from Liquipedia and syncs them to Google Calendar """ import requests from bs4 import BeautifulSoup from google.oauth2 import service_account from googleapiclient.discovery import build from datetime import datetime, timedelta import pytz import re import hashlib import sys import argparse class Dota2CalendarSync: def __init__(self, credentials_file='credentials.json', calendar_id='primary'): self.credentials_file = credentials_file self.calendar_id = calendar_id self.service = self._authenticate() def _authenticate(self): """Authenticate with Google Calendar using service account credentials""" try: credentials = service_account.Credentials.from_service_account_file( self.credentials_file, scopes=['https://www.googleapis.com/auth/calendar'] ) service = build('calendar', 'v3', credentials=credentials) print(f"✓ Successfully authenticated with Google Calendar") return service except Exception as e: print(f"✗ Authentication failed: {e}") sys.exit(1) def fetch_liquipedia_matches(self): """Fetch Tier 1 matches from Liquipedia""" url = 'https://liquipedia.net/dota2/Liquipedia:Matches' headers = { 'User-Agent': 'Dota2CalendarSync/1.0 (https://github.com/youruser/dota2-calendar)' } print(f"Fetching matches from Liquipedia...") try: response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() soup = BeautifulSoup(response.text, 'lxml') matches = [] # Main approach: Look for all elements with timestamps # These contain the match information timestamps = soup.find_all('span', {'data-timestamp': True}) for timestamp_elem in timestamps: # Get the parent div that contains the full match info parent = timestamp_elem.find_parent('div') if not parent: continue text_content = parent.get_text() # Check if this is a Tier 1 match # Look for TI (The International), Major, Premier, or Tier 1 tournaments is_tier1 = any(tier in text_content for tier in [ 'TI2025', 'The International', 'Major', 'Premier', 'Tier 1', 'DreamLeague', 'ESL One', 'PGL Major' ]) if is_tier1: match_data = self._parse_match_from_timestamp_element(parent, timestamp_elem) if match_data: matches.append(match_data) # Remove duplicates based on match ID unique_matches = {} for match in matches: if match.get('id'): unique_matches[match['id']] = match matches = list(unique_matches.values()) print(f"✓ Found {len(matches)} Tier 1 matches") return matches except requests.RequestException as e: print(f"✗ Error fetching Liquipedia data: {e}") return [] def _parse_match_from_timestamp_element(self, parent, timestamp_elem): """Parse match data from an element containing a timestamp""" try: match_data = {} # Get timestamp timestamp = timestamp_elem.get('data-timestamp') if timestamp: match_data['datetime'] = datetime.fromtimestamp(int(timestamp), tz=pytz.UTC) else: return None # Parse the text content text = parent.get_text() # Extract teams and format using improved regex # The format from Liquipedia is like: "XGvs(Bo3)FalconsTI2025" vs_pattern = r'([A-Za-z0-9\s\.\-_]+?)vs\(?(Bo\d)\)?([A-Za-z0-9\s\.\-_]+?)(?:TI2025|Round|Playoff|Group|\+|$)' match = re.search(vs_pattern, text) if not match: # Try alternative pattern without format vs_pattern = r'([A-Za-z0-9\s\.\-_]+?)vs([A-Za-z0-9\s\.\-_]+?)(?:TI2025|Round|Playoff|Group|\+|$)' match = re.search(vs_pattern, text) if match: team1 = match.group(1).strip() if len(match.groups()) > 2: format_str = match.group(2) team2 = match.group(3).strip() else: format_str = None team2 = match.group(2).strip() # Clean up team names - remove any date/time remnants team1 = re.sub(r'^.*CEST?', '', team1).strip() if team1 and team2: match_data['team1'] = self._clean_team_name(team1) match_data['team2'] = self._clean_team_name(team2) if format_str and format_str.startswith('Bo'): match_data['format'] = format_str # Extract tournament # Look for TI2025 or other tournament indicators if 'TI2025' in text: match_data['tournament'] = 'The International 2025' # Also extract round info if present round_match = re.search(r'Round\s+\d+', text) if round_match: match_data['tournament'] += f" - {round_match.group(0)}" elif 'DreamLeague' in text: match_data['tournament'] = 'DreamLeague' elif 'ESL' in text: match_data['tournament'] = 'ESL' elif 'Major' in text: # Try to extract full major name major_match = re.search(r'[\w\s]+Major', text) if major_match: match_data['tournament'] = major_match.group(0).strip() # Only return if we have valid teams if 'team1' in match_data and 'team2' in match_data: match_data['id'] = self._generate_match_id(match_data) return match_data except Exception as e: pass return None def _extract_match_from_infobox(self, box): """Extract match data from an infobox element""" try: match_data = {} # Extract teams team_spans = box.find_all('span', {'class': re.compile(r'team-template|team-name')}) if len(team_spans) >= 2: match_data['team1'] = self._clean_team_name(team_spans[0].get_text()) match_data['team2'] = self._clean_team_name(team_spans[1].get_text()) # Extract tournament tournament_link = box.find('a', href=re.compile(r'/dota2/[^#]+')) if tournament_link: match_data['tournament'] = tournament_link.get_text().strip() # Extract datetime timer = box.find('span', {'class': 'timer-object', 'data-timestamp': True}) if timer: timestamp = timer.get('data-timestamp') match_data['datetime'] = datetime.fromtimestamp(int(timestamp), tz=pytz.UTC) # Extract format format_text = box.find(string=re.compile(r'Bo\d')) if format_text: match_data['format'] = format_text.strip() if 'team1' in match_data and 'team2' in match_data: match_data['id'] = self._generate_match_id(match_data) return match_data except Exception as e: pass return None def _extract_match_from_row(self, row): """Extract match data from a table row""" try: cells = row.find_all('td') if len(cells) < 2: return None match_data = {} # Try to extract date/time from first cell if cells[0]: timer = cells[0].find('span', {'class': 'timer-object', 'data-timestamp': True}) if timer: timestamp = timer.get('data-timestamp') match_data['datetime'] = datetime.fromtimestamp(int(timestamp), tz=pytz.UTC) # Extract teams (usually in adjacent cells) team_cells = [] for cell in cells: team_elem = cell.find('span', {'class': re.compile(r'team')}) if team_elem: team_cells.append(team_elem) if len(team_cells) >= 2: match_data['team1'] = self._clean_team_name(team_cells[0].get_text()) match_data['team2'] = self._clean_team_name(team_cells[1].get_text()) # Look for tournament info for cell in cells: link = cell.find('a', href=re.compile(r'/dota2/[^#]+')) if link and 'team' not in link.get('href', ''): match_data['tournament'] = link.get_text().strip() break if 'team1' in match_data and 'team2' in match_data: match_data['id'] = self._generate_match_id(match_data) return match_data except Exception: pass return None def _extract_match_with_timer(self, parent, timer): """Extract match data when we have a timer element""" try: match_data = {} # Get datetime from timer timestamp = timer.get('data-timestamp') match_data['datetime'] = datetime.fromtimestamp(int(timestamp), tz=pytz.UTC) # Extract teams team_elems = parent.find_all('span', {'class': re.compile(r'team')}) if len(team_elems) >= 2: match_data['team1'] = self._clean_team_name(team_elems[0].get_text()) match_data['team2'] = self._clean_team_name(team_elems[1].get_text()) # Extract tournament tournament_link = parent.find('a', href=re.compile(r'/dota2/[^#]+')) if tournament_link: match_data['tournament'] = tournament_link.get_text().strip() if 'team1' in match_data and 'team2' in match_data: match_data['id'] = self._generate_match_id(match_data) return match_data except Exception: pass return None def _clean_team_name(self, name): """Clean and normalize team name""" # Remove extra whitespace and common suffixes name = re.sub(r'\s+', ' ', name).strip() name = re.sub(r'\s*\(.*?\)\s*$', '', name) # Remove parenthetical info return name def _generate_match_id(self, match_data): """Generate a unique ID for a match""" # Use teams and datetime if available, otherwise use what we have id_parts = [] if 'team1' in match_data: id_parts.append(match_data['team1']) if 'team2' in match_data: id_parts.append(match_data['team2']) if 'datetime' in match_data: id_parts.append(str(match_data['datetime'].date())) if 'tournament' in match_data: id_parts.append(match_data['tournament']) unique_string = '_'.join(id_parts) return hashlib.md5(unique_string.encode()).hexdigest()[:16] def get_existing_events(self, days_ahead=30): """Get existing Dota 2 events from Google Calendar""" try: now = datetime.utcnow() time_min = now.isoformat() + 'Z' time_max = (now + timedelta(days=days_ahead)).isoformat() + 'Z' print(f"Checking existing events in calendar...") events_result = self.service.events().list( calendarId=self.calendar_id, timeMin=time_min, timeMax=time_max, maxResults=200, singleEvents=True, orderBy='startTime' ).execute() events = events_result.get('items', []) # Filter for Dota 2 events and extract IDs dota_events = {} for event in events: if 'Dota 2' in event.get('summary', ''): description = event.get('description', '') # Extract ID from description id_match = re.search(r'ID:\s*([a-f0-9]+)', description) if id_match: dota_events[id_match.group(1)] = event print(f"✓ Found {len(dota_events)} existing Dota 2 events") return dota_events except Exception as e: print(f"✗ Error fetching calendar events: {e}") return {} def create_calendar_event(self, match_data): """Create a Google Calendar event for a match""" # Build event summary team1 = match_data.get('team1', 'TBD') team2 = match_data.get('team2', 'TBD') tournament = match_data.get('tournament', '') if tournament: summary = f"Dota 2 - {tournament}: {team1} vs {team2}" else: summary = f"Dota 2: {team1} vs {team2}" # Build description description_parts = [] if tournament: description_parts.append(f"Tournament: {tournament}") description_parts.append(f"Match: {team1} vs {team2}") if 'format' in match_data: description_parts.append(f"Format: {match_data['format']}") description_parts.append(f"ID: {match_data['id']}") description_parts.append("\nSource: Liquipedia") description = '\n'.join(description_parts) # Set start and end times start_time = match_data.get('datetime', datetime.now(pytz.UTC)) # Estimate match duration based on format duration = 2 # Default 2 hours if 'format' in match_data: if 'Bo5' in match_data['format']: duration = 4 elif 'Bo3' in match_data['format']: duration = 3 elif 'Bo1' in match_data['format']: duration = 1 end_time = start_time + timedelta(hours=duration) event = { 'summary': summary, 'description': description, 'start': { 'dateTime': start_time.isoformat(), 'timeZone': 'UTC', }, 'end': { 'dateTime': end_time.isoformat(), 'timeZone': 'UTC', }, 'reminders': { 'useDefault': False, 'overrides': [ {'method': 'popup', 'minutes': 30}, ], }, 'colorId': '9', # Blue color for Dota 2 events } return event def sync_matches_to_calendar(self, dry_run=False): """Main sync function""" print("\n" + "="*50) print("Starting Dota 2 Calendar Sync") print("="*50 + "\n") # Fetch matches from Liquipedia matches = self.fetch_liquipedia_matches() if not matches: print("No matches found to sync") return # Filter for future matches only now = datetime.now(pytz.UTC) future_matches = [m for m in matches if m.get('datetime', now) >= now] print(f"Filtered to {len(future_matches)} future matches") if not future_matches: print("No future matches to sync") return # Get existing events existing_events = self.get_existing_events() # Process each match added_count = 0 skipped_count = 0 error_count = 0 print("\nProcessing matches...") print("-" * 30) for match in future_matches: match_id = match.get('id') team1 = match.get('team1', 'TBD') team2 = match.get('team2', 'TBD') match_time = match.get('datetime', now) if not match_id: continue if match_id in existing_events: print(f"⊘ Skipping (exists): {team1} vs {team2}") skipped_count += 1 else: if dry_run: print(f"◯ Would add: {team1} vs {team2} at {match_time.strftime('%Y-%m-%d %H:%M UTC')}") added_count += 1 else: try: event = self.create_calendar_event(match) self.service.events().insert( calendarId=self.calendar_id, body=event ).execute() print(f"✓ Added: {team1} vs {team2} at {match_time.strftime('%Y-%m-%d %H:%M UTC')}") added_count += 1 except Exception as e: print(f"✗ Error adding {team1} vs {team2}: {e}") error_count += 1 # Summary print("\n" + "="*50) print("Sync Summary") print("="*50) print(f"✓ Added: {added_count} matches") print(f"⊘ Skipped: {skipped_count} matches (already exist)") if error_count > 0: print(f"✗ Errors: {error_count} matches") if dry_run: print("\n⚠ DRY RUN - No actual changes were made") print("\n✓ Sync complete!") def main(): parser = argparse.ArgumentParser( description='Sync Dota 2 Tier 1 matches from Liquipedia to Google Calendar' ) parser.add_argument( '--calendar-id', default='primary', help='Google Calendar ID (default: primary). Use email address for specific calendar.' ) parser.add_argument( '--dry-run', action='store_true', help='Perform a dry run without actually creating events' ) parser.add_argument( '--credentials', default='credentials.json', help='Path to Google service account credentials JSON file' ) args = parser.parse_args() # Important notice print("\n" + "!"*60) print("IMPORTANT: Before using this script:") print("1. Share your Google Calendar with the service account") print(" Service Account Email: calendar-bot@tunpok.iam.gserviceaccount.com") print("2. Grant 'Make changes to events' permission") print("3. Use your calendar email as --calendar-id parameter") print("!"*60 + "\n") # Initialize and run sync try: sync = Dota2CalendarSync( credentials_file=args.credentials, calendar_id=args.calendar_id ) sync.sync_matches_to_calendar(dry_run=args.dry_run) except KeyboardInterrupt: print("\n\nSync cancelled by user") sys.exit(0) except Exception as e: print(f"\n✗ Fatal error: {e}") sys.exit(1) if __name__ == "__main__": main()