tournament.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. """
  2. Estimate the strength rating of student-agent with iterative deepening and
  3. a custom heuristic evaluation function against fixed-depth minimax and
  4. alpha-beta search agents by running a round-robin tournament for the student
  5. agent. Note that all agents are constructed from the student CustomPlayer
  6. implementation, so any errors present in that class will affect the outcome
  7. here.
  8. The student agent plays a fixed number of "fair" matches against each test
  9. agent. The matches are fair because the board is initialized randomly for both
  10. players, and the players play each match twice -- switching the player order
  11. between games. This helps to correct for imbalances in the game due to both
  12. starting position and initiative.
  13. For example, if the random moves chosen for initialization are (5, 2) and
  14. (1, 3), then the first match will place agentA at (5, 2) as player 1 and
  15. agentB at (1, 3) as player 2 then play to conclusion; the agents swap
  16. initiative in the second match with agentB at (5, 2) as player 1 and agentA at
  17. (1, 3) as player 2.
  18. """
  19. import itertools
  20. import random
  21. import warnings
  22. from collections import namedtuple
  23. from isolation import Board
  24. from sample_players import RandomPlayer
  25. from sample_players import null_score
  26. from sample_players import open_move_score
  27. from sample_players import improved_score
  28. from game_agent import CustomPlayer
  29. from game_agent import custom_score
  30. NUM_MATCHES = 5 # number of matches against each opponent
  31. TIME_LIMIT = 150 # number of milliseconds before timeout
  32. TIMEOUT_WARNING = "One or more agents lost a match this round due to " + \
  33. "timeout. The get_move() function must return before " + \
  34. "time_left() reaches 0 ms. You will need to leave some " + \
  35. "time for the function to return, and may need to " + \
  36. "increase this margin to avoid timeouts during " + \
  37. "tournament play."
  38. DESCRIPTION = """
  39. This script evaluates the performance of the custom heuristic function by
  40. comparing the strength of an agent using iterative deepening (ID) search with
  41. alpha-beta pruning against the strength rating of agents using other heuristic
  42. functions. The `ID_Improved` agent provides a baseline by measuring the
  43. performance of a basic agent using Iterative Deepening and the "improved"
  44. heuristic (from lecture) on your hardware. The `Student` agent then measures
  45. the performance of Iterative Deepening and the custom heuristic against the
  46. same opponents.
  47. """
  48. Agent = namedtuple("Agent", ["player", "name"])
  49. def play_match(player1, player2):
  50. """
  51. Play a "fair" set of matches between two agents by playing two games
  52. between the players, forcing each agent to play from randomly selected
  53. positions. This should control for differences in outcome resulting from
  54. advantage due to starting position on the board.
  55. """
  56. num_wins = {player1: 0, player2: 0}
  57. num_timeouts = {player1: 0, player2: 0}
  58. num_invalid_moves = {player1: 0, player2: 0}
  59. games = [Board(player1, player2), Board(player2, player1)]
  60. # initialize both games with a random move and response
  61. for _ in range(2):
  62. move = random.choice(games[0].get_legal_moves())
  63. games[0].apply_move(move)
  64. games[1].apply_move(move)
  65. # play both games and tally the results
  66. for game in games:
  67. winner, _, termination = game.play(time_limit=TIME_LIMIT)
  68. if player1 == winner:
  69. num_wins[player1] += 1
  70. if termination == "timeout":
  71. num_timeouts[player2] += 1
  72. else:
  73. num_invalid_moves[player2] += 1
  74. elif player2 == winner:
  75. num_wins[player2] += 1
  76. if termination == "timeout":
  77. num_timeouts[player1] += 1
  78. else:
  79. num_invalid_moves[player1] += 1
  80. if sum(num_timeouts.values()) != 0:
  81. warnings.warn(TIMEOUT_WARNING)
  82. return num_wins[player1], num_wins[player2]
  83. def play_round(agents, num_matches):
  84. """
  85. Play one round (i.e., a single match between each pair of opponents)
  86. """
  87. agent_1 = agents[-1]
  88. wins = 0.
  89. total = 0.
  90. print("\nPlaying Matches:")
  91. print("----------")
  92. for idx, agent_2 in enumerate(agents[:-1]):
  93. counts = {agent_1.player: 0., agent_2.player: 0.}
  94. names = [agent_1.name, agent_2.name]
  95. print(" Match {}: {!s:^11} vs {!s:^11}".format(idx + 1, *names), end=' ')
  96. # Each player takes a turn going first
  97. for p1, p2 in itertools.permutations((agent_1.player, agent_2.player)):
  98. for _ in range(num_matches):
  99. score_1, score_2 = play_match(p1, p2)
  100. counts[p1] += score_1
  101. counts[p2] += score_2
  102. total += score_1 + score_2
  103. wins += counts[agent_1.player]
  104. print("\tResult: {} to {}".format(int(counts[agent_1.player]),
  105. int(counts[agent_2.player])))
  106. return 100. * wins / total
  107. def main():
  108. HEURISTICS = [("Null", null_score),
  109. ("Open", open_move_score),
  110. ("Improved", improved_score)]
  111. AB_ARGS = {"search_depth": 5, "method": 'alphabeta', "iterative": False}
  112. MM_ARGS = {"search_depth": 3, "method": 'minimax', "iterative": False}
  113. CUSTOM_ARGS = {"method": 'alphabeta', 'iterative': True}
  114. # Create a collection of CPU agents using fixed-depth minimax or alpha beta
  115. # search, or random selection. The agent names encode the search method
  116. # (MM=minimax, AB=alpha-beta) and the heuristic function (Null=null_score,
  117. # Open=open_move_score, Improved=improved_score). For example, MM_Open is
  118. # an agent using minimax search with the open moves heuristic.
  119. mm_agents = [Agent(CustomPlayer(score_fn=h, **MM_ARGS),
  120. "MM_" + name) for name, h in HEURISTICS]
  121. ab_agents = [Agent(CustomPlayer(score_fn=h, **AB_ARGS),
  122. "AB_" + name) for name, h in HEURISTICS]
  123. random_agents = [Agent(RandomPlayer(), "Random")]
  124. # ID_Improved agent is used for comparison to the performance of the
  125. # submitted agent for calibration on the performance across different
  126. # systems; i.e., the performance of the student agent is considered
  127. # relative to the performance of the ID_Improved agent to account for
  128. # faster or slower computers.
  129. test_agents = [Agent(CustomPlayer(score_fn=improved_score, **CUSTOM_ARGS), "ID_Improved"),
  130. Agent(CustomPlayer(score_fn=custom_score, **CUSTOM_ARGS), "Student")]
  131. print(DESCRIPTION)
  132. for agentUT in test_agents:
  133. print("")
  134. print("*************************")
  135. print("{:^25}".format("Evaluating: " + agentUT.name))
  136. print("*************************")
  137. agents = random_agents + mm_agents + ab_agents + [agentUT]
  138. win_ratio = play_round(agents, NUM_MATCHES)
  139. print("\n\nResults:")
  140. print("----------")
  141. print("{!s:<15}{:>10.2f}%".format(agentUT.name, win_ratio))
  142. if __name__ == "__main__":
  143. main()