diff --git a/CHANGELOG.md b/CHANGELOG.md index 68a19dc..83a8ef3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ # Changelog +## v4.2 - 2025-09-13 - 智能合并重复比赛 +- **🔀 智能合并重复比赛**: + - 新增 `_merge_duplicate_matches()` 方法 + - 自动检测同时间(30分钟窗口)具有共同队伍的比赛 + - 当发现两场比赛在同一时间且共享一个队伍时,认为是同一场比赛的不同表示 + - 例如:将 "Team Liquid vs OG" 和 "Team Liquid vs OG Esports" 合并为一场比赛 +- **📝 队伍名称标准化**: + - 保留更长的队伍名称作为正式名称 + - 解决了队伍名称变体问题(如 "OG" vs "OG Esports") + - 避免因队伍名称不一致导致的重复事件 +- **⚡ 优化处理流程**: + - 在 `fetch_all_matches()` 后立即进行合并处理 + - 减少后续同步过程中的重复检查 + - 提高整体同步效率 + ## v4.1 - 2025-09-13 - 修复TBD事件重复问题 - **🐛 修复TBD事件匹配逻辑**: - 修复了 `find_existing_event()` 方法中的TBD匹配逻辑 @@ -197,10 +212,11 @@ | v3.9 | ✓ | ✓ | ✓ | ✓ | ✓+ | ✓ | | v4.0 | ✓ | ✓ | ✓ | ✓ | ✓+ | ✓ | | v4.1 | ✓ | ✓ | ✓ | ✓ | ✓++ | ✓ | +| v4.2 | ✓ | ✓ | ✓ | ✓ | ✓++ | ✓ | ## 使用建议 -推荐使用最新的 v4.1 版本,它修复了TBD事件重复问题并提供更好的错误处理: +推荐使用最新的 v4.2 版本,它包含智能合并重复比赛功能并提供更好的错误处理: ```bash python sync_dota2_matches.py --calendar-id "YOUR_CALENDAR_ID" ``` diff --git a/README.md b/README.md index d489f7c..2f92ab2 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,14 @@ -# Dota 2 Calendar Sync v4.1 +# Dota 2 Calendar Sync v4.2 自动从 Liquipedia 获取 Dota 2 Tier 1 比赛信息并同步到 Google Calendar,支持自动更新比赛结果、时间变更、智能管理TBD占位事件、自动清理过期和重复比赛。 ## 更新日志 +### v4.2 (2025-09-13) +- 🔀 智能合并重复比赛:自动检测同时间具有共同队伍的比赛并合并(处理队伍名称变体) +- 📝 队伍名称标准化:当发现 "OG" 和 "OG Esports" 这样的变体时,保留更长的完整名称 +- ⚡ 优化数据获取:在获取数据时即进行去重,减少后续处理的复杂度 + ### v4.1 (2025-09-13) - 🐛 修复TBD事件匹配逻辑:现在可以正确识别并更新部分TBD事件(如 "TBD vs Team" 或 "Team vs TBD") - 🔧 增强TBD事件清理:删除所有与确认比赛同时间的TBD事件,不再限于 "TBD vs TBD" 格式 diff --git a/sync_dota2_matches.py b/sync_dota2_matches.py index c206dcd..236ee5a 100644 --- a/sync_dota2_matches.py +++ b/sync_dota2_matches.py @@ -229,6 +229,10 @@ class Dota2CalendarSync: logger.info(f"✓ Found {len(upcoming)} upcoming matches") logger.info(f"✓ Found {len(completed)} completed matches with results") + # Merge duplicate matches at the same time + upcoming = self._merge_duplicate_matches(upcoming) + completed = self._merge_duplicate_matches(completed) + return upcoming, completed except requests.RequestException as e: @@ -478,6 +482,104 @@ class Dota2CalendarSync: name = re.sub(r'^\w+\s+\d+,\s+\d{4}.*', '', name).strip() return name + def _merge_duplicate_matches(self, matches: List[Match]) -> List[Match]: + """Merge duplicate matches at the same time with one common team + + When multiple matches occur at the same time with one common team, + they likely represent the same match with different team name variations. + This method merges such duplicates, keeping the longer/more complete team name. + + Args: + matches: List of Match objects to check for duplicates + + Returns: + List of Match objects with duplicates merged + """ + if not matches: + return matches + + # Group matches by time (30-minute window) + from collections import defaultdict + matches_by_time = defaultdict(list) + + for match in matches: + # Create time key with 30-minute precision + time_key = match.datetime.replace(minute=(match.datetime.minute // 30) * 30, second=0, microsecond=0) + matches_by_time[time_key].append(match) + + merged_matches = [] + + for time_key, time_matches in matches_by_time.items(): + if len(time_matches) <= 1: + # No duplicates at this time + merged_matches.extend(time_matches) + continue + + # Check for matches with common teams + processed = set() + + for i, match1 in enumerate(time_matches): + if i in processed: + continue + + merged = False + for j, match2 in enumerate(time_matches[i+1:], i+1): + if j in processed: + continue + + # Check if matches share a common team + common_team = None + different_teams = [] + + if match1.team1 == match2.team1: + common_team = match1.team1 + different_teams = [match1.team2, match2.team2] + elif match1.team1 == match2.team2: + common_team = match1.team1 + different_teams = [match1.team2, match2.team1] + elif match1.team2 == match2.team1: + common_team = match1.team2 + different_teams = [match1.team1, match2.team2] + elif match1.team2 == match2.team2: + common_team = match1.team2 + different_teams = [match1.team1, match2.team1] + + if common_team and common_team != 'TBD': + # Found matches with a common non-TBD team + # Keep the longer/more complete team name for the different team + chosen_different = max(different_teams, key=len) + + # Create merged match + merged_match = Match( + id=match1.id, # Keep first match's ID + team1=common_team if match1.team1 == common_team else chosen_different, + team2=chosen_different if match1.team1 == common_team else common_team, + datetime=match1.datetime, + tournament=match1.tournament or match2.tournament, + format=match1.format or match2.format, + score=match1.score or match2.score, + completed=match1.completed or match2.completed, + has_score=match1.has_score or match2.has_score, + winner=match1.winner or match2.winner + ) + + merged_matches.append(merged_match) + processed.add(i) + processed.add(j) + merged = True + + logger.info(f"Merged duplicate matches at {time_key}: " + f"{match1.team1} vs {match1.team2} + {match2.team1} vs {match2.team2} " + f"-> {merged_match.team1} vs {merged_match.team2}") + break + + if not merged and i not in processed: + # No merge found for this match + merged_matches.append(match1) + processed.add(i) + + return merged_matches + def _generate_match_id(self, team1: str, team2: str, tournament: Optional[str], match_datetime: datetime) -> str: """Generate a unique ID for a match