From dd5ba77e1ed538dfcbf7bbd3b25b4b46ae2be017 Mon Sep 17 00:00:00 2001 From: Ching L Date: Fri, 5 Sep 2025 18:04:36 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E8=BF=9B=E8=A1=8C=E4=B8=AD?= =?UTF-8?q?=E6=AF=94=E8=B5=9B=E7=9A=84=E5=AE=9E=E6=97=B6=E6=AF=94=E5=88=86?= =?UTF-8?q?=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 使用 BeautifulSoup 正确解析 HTML 结构,提取队名、比分等信息 - 区分系列赛进行中(如 Bo3 1-0)和已完成状态 - 新增 update_event_with_score 方法处理进行中比赛的比分更新 - 扩展处理时间范围到最近 12 小时,确保捕获进行中的比赛 - 修复了 Aurora vs YkBros 等比赛比分无法识别的问题 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- CHANGELOG.md | 32 ++++- sync_dota2_matches.py | 311 +++++++++++++++++++++++++++++++++--------- 2 files changed, 269 insertions(+), 74 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf277a7..9ba1e8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # Changelog +## v3.3 - 支持进行中比赛的实时比分 +- **使用 BeautifulSoup 解析 HTML 结构**: + - 重写了 `_parse_match` 方法,使用 DOM 结构而非正则表达式 + - 准确提取队名、比分、赛制和锦标赛信息 + - 解决了 "Aurora1:0(Bo3)YkBros" 格式的解析问题 +- **区分系列赛进行中和已完成状态**: + - 新增 `has_score` 标记,表示比赛是否有比分 + - Bo3 需要 2 胜才标记为完成,Bo5 需要 3 胜 + - 进行中的比赛显示当前比分(如 1-0),但不显示✓标记 +- **新增进行中比分更新功能**: + - 添加 `update_event_with_score` 方法处理进行中比赛 + - 在日历标题显示实时比分(如 "1-0 Team1 vs Team2") + - 在描述中添加 "📊 CURRENT SCORE" 标记 +- **扩展处理时间范围**: + - 处理最近 12 小时内的比赛,捕获正在进行的比赛 + - 确保进行中的比赛比分能及时同步 + ## v3.2.1 - 修复比分识别问题 - **修复错误的比分解析**: - 修复了将日期时间误识别为比分的问题(如 "19-00") @@ -47,13 +64,14 @@ ## 功能对比 -| 版本 | 同步比赛 | 更新结果 | 时间变更 | 新格式 | TBD优化 | -|------|---------|---------|---------|--------|---------| -| v1.0 | ✓ | ✗ | ✗ | ✗ | ✗ | -| v2.0 | ✓ | ✓ | ✗ | ✗ | ✗ | -| v3.0 | ✓ | ✓ | ✓ | ✗ | ✗ | -| v3.1 | ✓ | ✓ | ✓ | ✓ | ✗ | -| v3.2 | ✓ | ✓ | ✓ | ✓ | ✓ | +| 版本 | 同步比赛 | 更新结果 | 时间变更 | 新格式 | TBD优化 | 实时比分 | +|------|---------|---------|---------|--------|---------|----------| +| v1.0 | ✓ | ✗ | ✗ | ✗ | ✗ | ✗ | +| v2.0 | ✓ | ✓ | ✗ | ✗ | ✗ | ✗ | +| v3.0 | ✓ | ✓ | ✓ | ✗ | ✗ | ✗ | +| v3.1 | ✓ | ✓ | ✓ | ✓ | ✗ | ✗ | +| v3.2 | ✓ | ✓ | ✓ | ✓ | ✓ | ✗ | +| v3.3 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ## 使用建议 diff --git a/sync_dota2_matches.py b/sync_dota2_matches.py index 296b81f..2174efd 100644 --- a/sync_dota2_matches.py +++ b/sync_dota2_matches.py @@ -94,7 +94,7 @@ class Dota2CalendarSync: return [], [] def _parse_match(self, parent, timestamp_elem): - """Parse match data from an element""" + """Parse match data from an element using HTML structure""" try: match_data = {} @@ -105,83 +105,160 @@ class Dota2CalendarSync: else: return None - text = parent.get_text() + # Extract team names from HTML structure + team_blocks = parent.find_all('div', class_='block-team') + if len(team_blocks) >= 2: + # Get team names - prefer span.name over a tag (a tag might be empty icon link) + team1_elem = team_blocks[0].find('span', class_='name') + if not team1_elem or not team1_elem.get_text().strip(): + # Try finding any a tag with text + for a_tag in team_blocks[0].find_all('a'): + if a_tag.get_text().strip(): + team1_elem = a_tag + break + + team2_elem = team_blocks[1].find('span', class_='name') + if not team2_elem or not team2_elem.get_text().strip(): + # Try finding any a tag with text + for a_tag in team_blocks[1].find_all('a'): + if a_tag.get_text().strip(): + team2_elem = a_tag + break + + if team1_elem and team2_elem: + match_data['team1'] = self._clean_team_name(team1_elem.get_text().strip()) + match_data['team2'] = self._clean_team_name(team2_elem.get_text().strip()) - # Check if it has a score (completed match) - # Look for score patterns specifically in match result context - # Scores are typically between team names, not part of timestamps - score_match = None + # If team blocks not found, try fallback + if 'team1' not in match_data: + # Fallback to text parsing + text = parent.get_text() + # Look for "vs" pattern + vs_match = re.search(r'([A-Za-z0-9\s\.\-_]+?)\s*vs\s*([A-Za-z0-9\s\.\-_]+)', text) + if vs_match: + team1_raw = vs_match.group(1).strip() + team2_raw = vs_match.group(2).strip() + # Clean up team names + team1_raw = re.sub(r'^.*CEST?', '', team1_raw).strip() + match_data['team1'] = self._clean_team_name(team1_raw) + match_data['team2'] = self._clean_team_name(team2_raw) + + # Extract score from HTML structure has_score = False + score_match = None - # First check if this looks like a completed match by looking for score indicators - # Split the text to analyze structure better - lines = text.split('\n') - for line in lines: - # Look for patterns like "Team1 2-1 Team2" or "Team1 2:1 Team2" - # Score should be surrounded by team names or 'vs' context - if 'vs' in line.lower(): - # Check for score pattern near 'vs' - score_pattern = re.search(r'(?:^|\s)(\d{1,2})[-:](\d{1,2})(?:\s|$)', line) - if score_pattern: - score1 = int(score_pattern.group(1)) - score2 = int(score_pattern.group(2)) - # Validate it's a reasonable game score (typically 0-5 for Bo5, 0-3 for Bo3) + # Look for score in structured elements + score_holder = parent.find('div', class_='match-info-header-scoreholder') + if score_holder: + score_elems = score_holder.find_all('span', class_='match-info-header-scoreholder-score') + if len(score_elems) >= 2: + try: + score1 = int(score_elems[0].get_text().strip()) + score2 = int(score_elems[1].get_text().strip()) if 0 <= score1 <= 5 and 0 <= score2 <= 5 and (score1 + score2) > 0: - # Additional check: not a time pattern - if not re.search(r'\d{1,2}:\d{2}(?:\s*[AP]M)?(?:\s*[A-Z]{3,4})?', line): - score_match = score_pattern - has_score = True - break + has_score = True + match_data['score'] = f"{score1}-{score2}" + score_match = True # Use as flag + except ValueError: + pass - # Extract teams and format - vs_pattern = r'([A-Za-z0-9\s\.\-_]+?)vs\(?(Bo\d)\)?([A-Za-z0-9\s\.\-_]+?)(?:TI2025|Round|Playoff|Group|\+|$)' - match = re.search(vs_pattern, text) + # If score not found in structure, try text pattern + if not has_score: + text = parent.get_text() + score_pattern = re.search(r'(\d{1,2})[:|-](\d{1,2})', text) + if score_pattern: + score1 = int(score_pattern.group(1)) + score2 = int(score_pattern.group(2)) + # Validate it's a reasonable game score and not time + if 0 <= score1 <= 5 and 0 <= score2 <= 5 and (score1 + score2) > 0: + # Make sure it's not a time pattern (HH:MM) + if not re.search(r'\d{1,2}:\d{2}\s*(?:CEST?|UTC|[AP]M)', text[max(0, score_pattern.start()-10):score_pattern.end()+10]): + has_score = True + match_data['score'] = f"{score1}-{score2}" + score_match = score_pattern - if not match: - vs_pattern = r'([A-Za-z0-9\s\.\-_]+?)vs([A-Za-z0-9\s\.\-_]+?)(?:TI2025|Round|Playoff|Group|\+|$)' - match = re.search(vs_pattern, text) + # Extract format (Bo1, Bo3, Bo5) + format_elem = parent.find('span', class_='match-info-header-scoreholder-lower') + if format_elem: + format_text = format_elem.get_text().strip() + format_match = re.search(r'(Bo\d)', format_text) + if format_match: + match_data['format'] = format_match.group(1) + else: + # Fallback to text search + text = parent.get_text() + format_match = re.search(r'\(?(Bo\d)\)?', text) + if format_match: + match_data['format'] = format_match.group(1) - if match: - team1 = match.group(1).strip() - if len(match.groups()) > 2: - format_str = match.group(2) - team2 = match.group(3).strip() + # Extract tournament from HTML structure + tournament_elem = parent.find('div', class_='match-info-tournament') + if tournament_elem: + tournament_text = tournament_elem.get_text().strip() + # Clean up tournament name + tournament_text = re.sub(r'\+ Add details.*', '', tournament_text).strip() + if 'TI2025' in tournament_text: + match_data['tournament'] = 'The International 2025' + round_match = re.search(r'Round\s+\d+', tournament_text) + if round_match: + match_data['tournament'] += f" - {round_match.group(0)}" else: - format_str = None - team2 = match.group(2).strip() - - # Clean up team names - team1 = re.sub(r'^.*CEST?', '', team1).strip() - - if team1 and team2: - match_data['team1'] = self._clean_team_name(team1) - match_data['team2'] = self._clean_team_name(team2) - - if format_str and format_str.startswith('Bo'): - match_data['format'] = format_str + match_data['tournament'] = tournament_text + else: + # Fallback to text search + text = parent.get_text() + if 'TI2025' in text: + match_data['tournament'] = 'The International 2025' + round_match = re.search(r'Round\s+\d+', text) + if round_match: + match_data['tournament'] += f" - {round_match.group(0)}" + elif 'Major' in text: + major_match = re.search(r'[\w\s]+Major', text) + if major_match: + match_data['tournament'] = major_match.group(0).strip() - # Extract tournament - if 'TI2025' in text: - match_data['tournament'] = 'The International 2025' - round_match = re.search(r'Round\s+\d+', text) - if round_match: - match_data['tournament'] += f" - {round_match.group(0)}" - elif 'Major' in text: - major_match = re.search(r'[\w\s]+Major', text) - if major_match: - match_data['tournament'] = major_match.group(0).strip() - - # Mark if completed - if has_score and score_match: - match_data['completed'] = True - match_data['score'] = f"{score_match.group(1)}-{score_match.group(2)}" - # Determine winner - if int(score_match.group(1)) > int(score_match.group(2)): - match_data['winner'] = match_data.get('team1', 'Unknown') + # Mark if has score and if completed + if has_score: + # Score already set above, extract score values + score_parts = re.match(r'(\d+)-(\d+)', match_data['score']) + if score_parts: + score1 = int(score_parts.group(1)) + score2 = int(score_parts.group(2)) else: - match_data['winner'] = match_data.get('team2', 'Unknown') + score1 = score2 = 0 + + # Check if series is actually completed based on format + series_completed = False + if 'format' in match_data: + if 'Bo3' in match_data['format']: + # Bo3 is complete when someone reaches 2 wins + series_completed = (score1 >= 2 or score2 >= 2) + elif 'Bo5' in match_data['format']: + # Bo5 is complete when someone reaches 3 wins + series_completed = (score1 >= 3 or score2 >= 3) + elif 'Bo1' in match_data['format']: + # Bo1 is complete when there's any score + series_completed = True + else: + # Unknown format, assume completed if there's a score + series_completed = True + else: + # No format info, try to guess from score + # If someone has 2+ wins, likely a completed Bo3/Bo5 + series_completed = (score1 >= 2 or score2 >= 2) + + match_data['completed'] = series_completed + match_data['has_score'] = True # Mark that there's a score even if not completed + + # Determine winner only if completed + if series_completed: + if score1 > score2: + match_data['winner'] = match_data.get('team1', 'Unknown') + else: + match_data['winner'] = match_data.get('team2', 'Unknown') else: match_data['completed'] = False + match_data['has_score'] = False # Generate ID if we have valid data if 'team1' in match_data and 'team2' in match_data: @@ -481,6 +558,58 @@ class Dota2CalendarSync: print(f"Error updating event with teams: {e}") return False + def update_event_with_score(self, event_id, match_data): + """Update an existing calendar event with in-progress score""" + try: + # Get the existing event + event = self.service.events().get( + calendarId=self.calendar_id, + eventId=event_id + ).execute() + + # Update the description with current score + description = event.get('description', '') + + # Check if score is already in the description + if '📊 CURRENT SCORE:' in description: + # Update existing score + description = re.sub( + r'📊 CURRENT SCORE:.*?\n', + f"📊 CURRENT SCORE: {match_data.get('score', 'Unknown')}\n", + description + ) + else: + # Add new score + score_text = f"\n📊 CURRENT SCORE: {match_data.get('score', 'Unknown')}\n" + if 'ID:' in description: + description = description.replace('ID:', score_text + 'ID:') + else: + description += score_text + + # Update the summary to show current score (without checkmark) + summary = event.get('summary', '') + # Remove any existing score + summary = re.sub(r'^\d+[-:]\d+\s+', '', summary) + # Add new score at the beginning + score = match_data.get('score', '?-?') + summary = f"{score} {summary}" + + # Update the event + event['description'] = description + event['summary'] = summary + + updated_event = self.service.events().update( + calendarId=self.calendar_id, + eventId=event_id, + body=event + ).execute() + + return True + + except Exception as e: + print(f"Error updating event score: {e}") + return False + def update_event_with_result(self, event_id, match_data): """Update an existing calendar event with match results""" try: @@ -583,7 +712,10 @@ class Dota2CalendarSync: print("-" * 30) now = datetime.now(pytz.UTC) - future_matches = [m for m in upcoming_matches if m.get('datetime', now) >= now] + # Include matches from the last 12 hours (to catch ongoing matches with scores) + twelve_hours_ago = now - timedelta(hours=12) + future_matches = [m for m in upcoming_matches + if m.get('datetime', now) >= twelve_hours_ago] for match in future_matches: match_id = match.get('id') @@ -640,6 +772,51 @@ class Dota2CalendarSync: else: print(f"✗ Failed to update TBD match: {team1} vs {team2}") error_count += 1 + # Check if this match has a score (completed or in-progress) and needs update + elif match.get('has_score') and update_results: + # Check current event status + summary = existing_event.get('summary', '') + description = existing_event.get('description', '') + current_score = None + + # Try to extract current score from summary + score_in_summary = re.search(r'✓?\s*(\d+[-:]\d+)', summary) + if score_in_summary: + current_score = score_in_summary.group(1).replace(':', '-') + + # Check if score needs update + new_score = match.get('score', 'Unknown') + + if current_score == new_score: + print(f"⊘ Score unchanged: {team1} vs {team2} ({new_score})") + skipped_count += 1 + else: + if match.get('completed'): + # Series is completed + if dry_run: + print(f"◯ Would update completed result: {team1} vs {team2} - {new_score}") + updated_count += 1 + else: + if self.update_event_with_result(existing_event['id'], match): + print(f"✓ Updated completed result: {team1} vs {team2} - {new_score}") + updated_count += 1 + time.sleep(0.2) + else: + print(f"✗ Failed to update: {team1} vs {team2}") + error_count += 1 + else: + # Series is in-progress with partial score + if dry_run: + print(f"◯ Would update in-progress score: {team1} vs {team2} - {new_score}") + updated_count += 1 + else: + if self.update_event_with_score(existing_event['id'], match): + print(f"📊 Updated in-progress score: {team1} vs {team2} - {new_score}") + updated_count += 1 + time.sleep(0.2) + else: + print(f"✗ Failed to update score: {team1} vs {team2}") + error_count += 1 # Check if time has changed elif update_times: event_start = existing_event['start'].get('dateTime', existing_event['start'].get('date'))