diff --git a/codeclash/arenas/__init__.py b/codeclash/arenas/__init__.py index b1152843..3c918a8e 100644 --- a/codeclash/arenas/__init__.py +++ b/codeclash/arenas/__init__.py @@ -4,6 +4,7 @@ from codeclash.arenas.bridge.bridge import BridgeArena from codeclash.arenas.corewar.corewar import CoreWarArena from codeclash.arenas.dummy.dummy import DummyArena +from codeclash.arenas.figgie.figgie import FiggieArena from codeclash.arenas.gomoku.gomoku import GomokuArena from codeclash.arenas.halite.halite import HaliteArena from codeclash.arenas.halite2.halite2 import Halite2Arena @@ -18,6 +19,7 @@ BridgeArena, CoreWarArena, DummyArena, + FiggieArena, GomokuArena, HaliteArena, Halite2Arena, diff --git a/codeclash/arenas/bridge/bridge.py b/codeclash/arenas/bridge/bridge.py index 53b6e550..58c64ad7 100644 --- a/codeclash/arenas/bridge/bridge.py +++ b/codeclash/arenas/bridge/bridge.py @@ -3,7 +3,6 @@ import json import shlex import subprocess -from collections import Counter from concurrent.futures import ThreadPoolExecutor, as_completed from tqdm.auto import tqdm @@ -53,10 +52,7 @@ def validate_code(self, agent: Player) -> tuple[bool, str | None]: content = agent.environment.execute(f"cat {self.submission}")["output"] # Check for required function definitions - required_functions = [ - "def get_bid(", - "def play_card(" - ] + required_functions = ["def get_bid(", "def play_card("] missing = [] for func in required_functions: @@ -86,7 +82,7 @@ def _run_single_simulation(self, agents: list[Player], idx: int, cmd: str): def execute_round(self, agents: list[Player]): """Execute a round of Bridge games.""" - sims = self.game_config.get('sims_per_round', 10) + sims = self.game_config.get("sims_per_round", 10) self.logger.info(f"Running {sims} Bridge simulations with 4 players") # Build agent paths for the command @@ -100,12 +96,7 @@ def execute_round(self, agents: list[Player]): # Run simulations in parallel with ThreadPoolExecutor(max_workers=8) as executor: futures = [ - executor.submit( - self._run_single_simulation, - agents, - idx, - f"{cmd} --seed {idx} --dealer {idx % 4}" - ) + executor.submit(self._run_single_simulation, agents, idx, f"{cmd} --seed {idx} --dealer {idx % 4}") for idx in range(sims) ] for future in tqdm(as_completed(futures), total=len(futures), desc="Bridge simulations"): @@ -114,11 +105,11 @@ def execute_round(self, agents: list[Player]): def get_results(self, agents: list[Player], round_num: int, stats: RoundStats): """Parse results and determine winners.""" # Initialize team scores - team_scores = {'NS': 0.0, 'EW': 0.0} + team_scores = {"NS": 0.0, "EW": 0.0} games_played = 0 # Parse all simulation logs - for idx in range(self.game_config.get('sims_per_round', 10)): + for idx in range(self.game_config.get("sims_per_round", 10)): log_file = self.log_round(round_num) / f"sim_{idx}.json" if not log_file.exists(): @@ -130,15 +121,15 @@ def get_results(self, agents: list[Player], round_num: int, stats: RoundStats): result = json.load(f) # Check for error - if 'error' in result: + if "error" in result: self.logger.warning(f"Simulation {idx} had error: {result['error']}") continue # Extract VP scores for each team - vp_scores = result.get('normalized_score', {}) + vp_scores = result.get("normalized_score", {}) if vp_scores: - team_scores['NS'] += vp_scores.get('NS', 0.0) - team_scores['EW'] += vp_scores.get('EW', 0.0) + team_scores["NS"] += vp_scores.get("NS", 0.0) + team_scores["EW"] += vp_scores.get("EW", 0.0) games_played += 1 except (json.JSONDecodeError, KeyError) as e: self.logger.warning(f"Error parsing {log_file}: {e}") @@ -153,20 +144,20 @@ def get_results(self, agents: list[Player], round_num: int, stats: RoundStats): return # Average the scores - team_scores['NS'] /= games_played - team_scores['EW'] /= games_played + team_scores["NS"] /= games_played + team_scores["EW"] /= games_played # Determine winning team - if abs(team_scores['NS'] - team_scores['EW']) < 0.01: # Tie threshold + if abs(team_scores["NS"] - team_scores["EW"]) < 0.01: # Tie threshold stats.winner = RESULT_TIE - elif team_scores['NS'] > team_scores['EW']: + elif team_scores["NS"] > team_scores["EW"]: stats.winner = f"{agents[0].name}/{agents[2].name}" else: stats.winner = f"{agents[1].name}/{agents[3].name}" # Assign scores to individual players based on their team for position, agent in enumerate(agents): - team = 'NS' if position % 2 == 0 else 'EW' + team = "NS" if position % 2 == 0 else "EW" score = team_scores[team] stats.scores[agent.name] = score stats.player_stats[agent.name].score = score diff --git a/codeclash/arenas/figgie/Figgie.Dockerfile b/codeclash/arenas/figgie/Figgie.Dockerfile new file mode 100644 index 00000000..622afda7 --- /dev/null +++ b/codeclash/arenas/figgie/Figgie.Dockerfile @@ -0,0 +1,18 @@ +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND=noninteractive + +# Install Python 3.10 and basic tools +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + curl ca-certificates python3.10 python3.10-venv \ + python3-pip python-is-python3 wget git build-essential jq curl locales \ + && rm -rf /var/lib/apt/lists/* + +# Clone Figgie game repository +RUN git clone https://github.com/CodeClash-ai/Figgie.git /workspace \ + && cd /workspace \ + && git remote set-url origin https://github.com/CodeClash-ai/Figgie.git +WORKDIR /workspace + +# No additional dependencies needed - engine uses only standard library diff --git a/codeclash/arenas/figgie/__init__.py b/codeclash/arenas/figgie/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclash/arenas/figgie/figgie.py b/codeclash/arenas/figgie/figgie.py new file mode 100644 index 00000000..83cc0f54 --- /dev/null +++ b/codeclash/arenas/figgie/figgie.py @@ -0,0 +1,120 @@ +"""Figgie Arena for CodeClash. + +Figgie is a card trading game invented at Jane Street in 2013. +It simulates open-outcry commodities trading. +""" + +import re + +from codeclash.agents.player import Player +from codeclash.arenas.arena import CodeArena, RoundStats +from codeclash.constants import RESULT_TIE +from codeclash.utils.environment import assert_zero_exit_code + +FIGGIE_LOG = "result.log" + + +class FiggieArena(CodeArena): + name: str = "Figgie" + submission: str = "main.py" + description: str = """Figgie is a card trading game invented at Jane Street in 2013. +It simulates open-outcry commodities trading where players buy and sell cards to accumulate the goal suit. + +Game Rules: +- 4 or 5 players, each starting with $350 +- 4 players: $50 ante, 10 cards each +- 5 players: $40 ante, 8 cards each +- Pot is always $200 +- Deck: one 12-card suit, two 10-card suits, one 8-card suit +- Goal suit: same color as 12-card suit, contains 8 or 10 cards +- At end: $10 per goal suit card, remainder to player(s) with most goal suit cards + +Trading Model (Simultaneous Tick): +- Each tick, ALL players are polled for their action +- Actions are executed in random order (simulates racing to the order book) +- Order books cleared after each trade (per official Figgie rules) + +Your bot (main.py) must implement: + + def get_action(state: dict) -> dict + +state contains: +- position: your player index (0-3 or 0-4) +- hand: dict of suit -> count of cards you hold +- money: your current money +- books: dict of suit -> {bid: {price, player} or None, ask: {price, player} or None, last_trade} +- trades: list of completed trades +- num_players: number of players (4 or 5) +- tick: current tick number + +Return one of: +- {"type": "pass"} +- {"type": "bid", "suit": "spades", "price": 5} +- {"type": "ask", "suit": "spades", "price": 10} +- {"type": "buy", "suit": "spades"} +- {"type": "sell", "suit": "spades"} + +Suits: "spades", "clubs", "hearts", "diamonds" +""" + + def __init__(self, config, **kwargs): + super().__init__(config, **kwargs) + num_players = len(config.get("players", [])) + if num_players not in [4, 5]: + raise ValueError(f"Figgie requires 4 or 5 players, got {num_players}") + + def execute_round(self, agents: list[Player]) -> None: + args = [f"/{agent.name}/{self.submission}" for agent in agents] + cmd = ( + f"python engine.py {' '.join(args)} -r {self.game_config['sims_per_round']} > {self.log_env / FIGGIE_LOG};" + ) + self.logger.info(f"Running game: {cmd}") + assert_zero_exit_code(self.environment.execute(cmd)) + + def get_results(self, agents: list[Player], round_num: int, stats: RoundStats): + with open(self.log_round(round_num) / FIGGIE_LOG) as f: + round_log = f.read() + lines = round_log.split("FINAL_RESULTS")[-1].splitlines() + + scores = {} + for line in lines: + match = re.search(r"Bot\_(\d)\_main:\s(\d+)\srounds\swon", line) + if match: + bot_id = match.group(1) + rounds_won = int(match.group(2)) + scores[agents[int(bot_id) - 1].name] = rounds_won + + # Handle draws + draw_match = re.search(r"Draws:\s(\d+)", round_log) + if draw_match: + draws = int(draw_match.group(1)) + if draws > 0: + scores[RESULT_TIE] = draws + + stats.winner = max(scores, key=scores.get) if scores else "unknown" + # Check for tie (equal scores) + if scores: + max_score = max(scores.values()) + winners_with_max = [k for k, v in scores.items() if v == max_score and k != RESULT_TIE] + if len(winners_with_max) > 1: + stats.winner = RESULT_TIE + + stats.scores = scores + for player, score in scores.items(): + if player != RESULT_TIE: + stats.player_stats[player].score = score + + def validate_code(self, agent: Player) -> tuple[bool, str | None]: + if self.submission not in agent.environment.execute("ls")["output"]: + return False, f"No {self.submission} file found in the root directory" + + bot_content = agent.environment.execute(f"cat {self.submission}")["output"] + + if "def get_action(" not in bot_content: + return ( + False, + f"{self.submission} must define a get_action(state) function. " + "See the game description for the required signature.", + ) + + return True, None diff --git a/codeclash/arenas/gomoku/gomoku.py b/codeclash/arenas/gomoku/gomoku.py index 843d9828..f1947d7e 100644 --- a/codeclash/arenas/gomoku/gomoku.py +++ b/codeclash/arenas/gomoku/gomoku.py @@ -28,7 +28,9 @@ def __init__(self, config, **kwargs): def execute_round(self, agents: list[Player]) -> None: args = [f"/{agent.name}/{self.submission}" for agent in agents] - cmd = f"python engine.py {' '.join(args)} -r {self.game_config['sims_per_round']} > {self.log_env / GOMOKU_LOG};" + cmd = ( + f"python engine.py {' '.join(args)} -r {self.game_config['sims_per_round']} > {self.log_env / GOMOKU_LOG};" + ) self.logger.info(f"Running game: {cmd}") assert_zero_exit_code(self.environment.execute(cmd)) @@ -75,7 +77,7 @@ def validate_code(self, agent: Player) -> tuple[bool, str | None]: return ( False, f"{self.submission} must define a get_move(board, color) function. " - "See the game description for the required signature." + "See the game description for the required signature.", ) return True, None diff --git a/configs/test/figgie.yaml b/configs/test/figgie.yaml new file mode 100644 index 00000000..26d4fb67 --- /dev/null +++ b/configs/test/figgie.yaml @@ -0,0 +1,57 @@ +tournament: + rounds: 3 +game: + name: Figgie + sims_per_round: 10 +players: +- agent: dummy + name: p1 +- agent: dummy + name: p2 +- agent: dummy + name: p3 +- agent: dummy + name: p4 +prompts: + game_description: | + You are a software developer ({{player_id}}) competing in a coding game called Figgie. + Figgie is a card trading game invented at Jane Street that simulates open-outcry commodities trading. + + Game Rules: + - 4 or 5 players, each starting with $350 + - 4 players: $50 ante, 10 cards each + - 5 players: $40 ante, 8 cards each + - Pot is always $200 + - Deck: one 12-card suit, two 10-card suits, one 8-card suit + - Goal suit: same color as 12-card suit, contains 8 or 10 cards (secret until end) + - At end: $10 per goal suit card, remainder to player(s) with most goal suit cards + + Trading Model (Simultaneous Tick): + - Each tick, ALL players are polled for their action + - Actions are executed in random order (simulates racing to the order book) + - Order books cleared after each trade (per official Figgie rules) + + Your bot (main.py) must implement: + def get_action(state: dict) -> dict + + state contains: + - position: your player index + - hand: dict of suit -> count of cards you hold + - money: your current money + - books: dict of suit -> {bid: {price, player} or None, ask: {price, player} or None, last_trade} + - trades: list of completed trades + - num_players: number of players (4 or 5) + - tick: current tick number + + Return one of: + - {"type": "pass"} + - {"type": "bid", "suit": "spades", "price": 5} + - {"type": "ask", "suit": "spades", "price": 10} + - {"type": "buy", "suit": "spades"} + - {"type": "sell", "suit": "spades"} + + The game is played in {{rounds}} rounds. For every round, you edit program code that controls your bot. This is round {{round}}. + After you and your competitors finish editing your codebases, the game is run automatically. + + Your task: improve the bot in `main.py`, located in {{working_dir}}. + {{working_dir}} is your codebase, which contains both your bot and supporting assets. diff --git a/tests/arenas/test_bridge.py b/tests/arenas/test_bridge.py index 4938f440..92b1fb90 100644 --- a/tests/arenas/test_bridge.py +++ b/tests/arenas/test_bridge.py @@ -105,7 +105,6 @@ def get_bid(game_state): assert "def play_card(" in error - class TestBridgeRequirements: """Test Bridge-specific requirements.""" @@ -119,11 +118,7 @@ def test_requires_4_players(self, minimal_config, tmp_log_dir): ] with pytest.raises(ValueError, match="Bridge requires exactly 4 players"): - BridgeArena( - config, - tournament_id="test_tournament", - local_output_dir=tmp_log_dir - ) + BridgeArena(config, tournament_id="test_tournament", local_output_dir=tmp_log_dir) def test_accepts_4_players(self): """Test that Bridge accepts exactly 4 players by checking class properties.""" diff --git a/tests/arenas/test_figgie.py b/tests/arenas/test_figgie.py new file mode 100644 index 00000000..4da75f3f --- /dev/null +++ b/tests/arenas/test_figgie.py @@ -0,0 +1,120 @@ +"""Unit tests for FiggieArena.""" + +import pytest + +from codeclash.arenas.figgie.figgie import FiggieArena + +VALID_FIGGIE_BOT = """ +def get_action(state): + '''Make a trading decision based on game state.''' + hand = state.get('hand', {}) + offers = state.get('offers', {}) + bids = state.get('bids', {}) + position = state.get('position', 0) + + # Simple strategy: try to sell non-goal suits + for suit in ['spades', 'clubs', 'hearts', 'diamonds']: + bid = bids.get(suit) + if bid and bid.get('player') != position and hand.get(suit, 0) > 0: + return {'type': 'sell', 'suit': suit} + + return {'type': 'pass'} +""" + + +class TestFiggieValidation: + """Tests for FiggieArena.validate_code()""" + + @pytest.fixture + def arena(self, tmp_log_dir, minimal_config): + """Create FiggieArena instance with mocked environment.""" + config = minimal_config.copy() + config["game"]["name"] = "Figgie" + config["players"] = [ + {"name": "p1", "agent": "dummy"}, + {"name": "p2", "agent": "dummy"}, + {"name": "p3", "agent": "dummy"}, + {"name": "p4", "agent": "dummy"}, + ] + arena = FiggieArena.__new__(FiggieArena) + arena.submission = "main.py" + arena.log_local = tmp_log_dir + return arena + + def test_valid_submission(self, arena, mock_player_factory): + """Test that a valid Figgie bot passes validation.""" + player = mock_player_factory( + name="test_player", + files={"main.py": VALID_FIGGIE_BOT}, + command_outputs={ + "ls": {"output": "main.py\n", "returncode": 0}, + "cat main.py": {"output": VALID_FIGGIE_BOT, "returncode": 0}, + }, + ) + is_valid, error = arena.validate_code(player) + assert is_valid is True + assert error is None + + def test_missing_file(self, arena, mock_player_factory): + """Test that missing main.py fails validation.""" + player = mock_player_factory( + name="test_player", + files={}, + command_outputs={ + "ls": {"output": "other.py\n", "returncode": 0}, + }, + ) + is_valid, error = arena.validate_code(player) + assert is_valid is False + assert "main.py" in error + + def test_missing_get_action_function(self, arena, mock_player_factory): + """Test that missing get_action function fails validation.""" + bot_code = """ +def make_move(game_state): + '''Wrong function name.''' + return {'type': 'pass'} +""" + player = mock_player_factory( + name="test_player", + files={"main.py": bot_code}, + command_outputs={ + "ls": {"output": "main.py\n", "returncode": 0}, + "cat main.py": {"output": bot_code, "returncode": 0}, + }, + ) + is_valid, error = arena.validate_code(player) + assert is_valid is False + assert "get_action" in error + + +class TestFiggieRequirements: + """Test Figgie-specific requirements.""" + + def test_rejects_invalid_player_count(self, minimal_config, tmp_log_dir): + """Test that Figgie rejects invalid player counts (not 4 or 5).""" + config = minimal_config.copy() + config["game"]["name"] = "Figgie" + config["players"] = [ + {"name": "p1", "agent": "dummy"}, + {"name": "p2", "agent": "dummy"}, + ] + + with pytest.raises(ValueError, match="Figgie requires 4 or 5 players"): + FiggieArena(config, tournament_id="test_tournament", local_output_dir=tmp_log_dir) + + def test_rejects_6_players(self, minimal_config, tmp_log_dir): + """Test that Figgie rejects 6 players.""" + config = minimal_config.copy() + config["game"]["name"] = "Figgie" + config["players"] = [{"name": f"p{i}", "agent": "dummy"} for i in range(6)] + + with pytest.raises(ValueError, match="Figgie requires 4 or 5 players"): + FiggieArena(config, tournament_id="test_tournament", local_output_dir=tmp_log_dir) + + def test_accepts_4_or_5_players(self): + """Test that Figgie accepts 4 or 5 players by checking class properties.""" + assert FiggieArena.name == "Figgie" + assert FiggieArena.submission == "main.py" + # Description should mention both 4 and 5 players + assert "4 or 5 players" in FiggieArena.description