game_agent.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. """This file contains all the classes you must complete for this project.
  2. You can use the test cases in `agent_test.py` to help during development, and
  3. augment the test suite with your own test cases to further test your code.
  4. You must test your agent's strength against a set of agents with known relative
  5. strength using `tournament.py` and include the results in your report.
  6. """
  7. import random
  8. import math
  9. class Timeout(Exception):
  10. """Subclass base exception for code clarity."""
  11. pass
  12. # Heuristic Definitions
  13. def penalize_corners(game, player):
  14. """Heuristic which penalizes the player for moving to a corner and returns
  15. a float value of the difference in legal moves of the two players.
  16. PARAMETERS:
  17. game :: `isolation.Board`
  18. An instance of `isolation.Board` encoding the current state of
  19. the game (e.g. player locations and blocked cells).
  20. player :: object
  21. A player instance in the current game.
  22. RETURNS:
  23. float :: The heuristic value of the current game state to a specified
  24. player.
  25. """
  26. # Check if game is already over
  27. if game.is_loser(player):
  28. return float('-inf')
  29. if game.is_winner(player):
  30. return float('inf')
  31. own_moves = len(game.get_legal_moves(player))
  32. opp_moves = len(game.get_legal_moves(game.get_opponent(player)))
  33. # Penalize for moving to corner
  34. corner_weight = 4
  35. if is_corner(game, game.get_player_location(player)):
  36. own_moves -= corner_weight
  37. return float(own_moves - opp_moves)
  38. def is_corner(game, player_location):
  39. """Returns 'True' if player's current location is a board corner.
  40. """
  41. corner_positions = [(0, 0), (0, game.height - 1), \
  42. (game.width - 1, 0), \
  43. (game.width - 1, game.height - 1)]
  44. return player_location in corner_positions
  45. def run_away(game, player):
  46. """Heuristic to credit player moves that are farther from the opponent and
  47. returns a float value of the difference in legal moves of the two players.
  48. PARAMETERS:
  49. game :: `isolation.Board`
  50. An instance of `isolation.Board` encoding the current state of
  51. the game (e.g. player locations and blocked cells).
  52. player :: object
  53. A player instance in the current game.
  54. RETURNS:
  55. float :: The heuristic value of the current game state to a specified
  56. player.
  57. """
  58. # Check if game is already over
  59. if game.is_loser(player):
  60. return float('-inf')
  61. if game.is_winner(player):
  62. return float('inf')
  63. opp_player = game.get_opponent(player)
  64. own_moves = len(game.get_legal_moves(player))
  65. opp_moves = len(game.get_legal_moves(game.get_opponent(player)))
  66. # Reward player for choosing moves farther from opponent
  67. distance = euclid_distance(game, player, opp_player)
  68. own_moves += distance
  69. return((own_moves - opp_moves))
  70. def euclid_distance(game, player, opp_player):
  71. """Returns distance between current positions of two players.
  72. """
  73. player_location = game.get_player_location(player)
  74. opp_location = game.get_player_location(opp_player)
  75. x_dist = math.pow(player_location[0] - opp_location[0], 2)
  76. y_dist = math.pow(player_location[1] - opp_location[1], 2)
  77. return math.sqrt(x_dist + y_dist)
  78. def foresee_moves(game, player):
  79. """Heuristic that returns a float of the difference in the number of legal
  80. moves in the current game state plus the number of possible moves.
  81. PARAMETERS:
  82. game :: `isolation.Board`
  83. An instance of `isolation.Board` encoding the current state of
  84. the game (e.g. player locations and blocked cells).
  85. player :: object
  86. A player instance in the current game.
  87. RETURNS
  88. float :: The heuristic value of the current game state to a specified
  89. player.
  90. """
  91. # Check if game is already over
  92. if game.is_loser(player):
  93. return float('-inf')
  94. if game.is_winner(player):
  95. return float('inf')
  96. own_legal_moves = game.get_legal_moves(player)
  97. own_moves = len(own_legal_moves)
  98. for m in own_legal_moves:
  99. own_moves += len(game.get_moves(m))
  100. opp_legal_moves = game.get_legal_moves(game.get_opponent(player))
  101. opp_moves = len(opp_legal_moves)
  102. for m in opp_legal_moves:
  103. opp_moves += len(game.get_moves(m))
  104. return float(own_moves - opp_moves)
  105. def normalized_moves(game, player):
  106. """
  107. Heuristic that returns the difference of player's and opponent's move(s),
  108. divided by total of all remaining legal moves.
  109. PARAMETERS:
  110. game :: `isolation.Board`
  111. An instance of `isolation.Board` encoding the current state of
  112. the game (e.g. player locations and blocked cells).
  113. player :: object
  114. A player instance in the current game.
  115. RETURNS:
  116. float :: The heuristic value of the current game state to a specified
  117. player.
  118. """
  119. if game.is_loser(player):
  120. return float("-inf")
  121. if game.is_winner(player):
  122. return float("inf")
  123. own_moves = len(game.get_legal_moves(player))
  124. opp_moves = len(game.get_legal_moves(game.get_opponent(player)))
  125. score = float((own_moves - opp_moves) / (own_moves + opp_moves))
  126. return score
  127. def custom_score(game, player):
  128. """Calclate the heuristic value of a game state from the point of view of
  129. the given player.
  130. Note: this function should be called from within a Player instance as
  131. `self.score()` - you should not need to call this function directly.
  132. PARAMETERS:
  133. game :: `isolation.Board`
  134. An instance of `isolation.Board` encoding the current state of
  135. the game (e.g. player locations and blocked cells).
  136. player :: object
  137. A player instance in the current game.
  138. RETURNS:
  139. float :: The heuristic value of the current game state to a specified
  140. player.
  141. """
  142. # return penalize_corners(game, player)
  143. # return run_away(game, player)
  144. return foresee_moves(game, player)
  145. # return normalized_moves(game, player)
  146. class CustomPlayer:
  147. """Game-playing agent that chooses a move using your evaluation function
  148. and a depth-limited minimax algorithm with alpha-beta pruning. You must
  149. finish and test this player to make sure it properly uses minimax and
  150. alpha-beta to return a good move before the search time limit expires.
  151. PARAMETERS:
  152. search_depth :: int (optional)
  153. A strictly positive integer (i.e., 1, 2, 3,...) for the number of
  154. layers in the game tree to explore for fixed-depth search i.e., a
  155. depth of one (1) would only explore the immediate sucessors of the
  156. current state. This parameter should be ignored when
  157. iterative = True.
  158. score_fn :: callable (optional)
  159. A function to use for heuristic evaluation of game states.
  160. iterative :: boolean (optional)
  161. Flag indicating whether to perform fixed-depth search (False) or
  162. iterative deepening search (True). When True, search_depth should
  163. be ignored and no limit to search depth.
  164. method :: {'minimax', 'alphabeta'} (optional)
  165. The name of the search method to use in get_move().
  166. timeout :: float (optional)
  167. Time remaining (in milliseconds) when search is aborted. Should be
  168. a positive value large enough to allow the function to return
  169. before the timer expires.
  170. """
  171. def __init__(self, search_depth=3, score_fn=custom_score,
  172. iterative=True, method='minimax', timeout=10.):
  173. self.search_depth = search_depth
  174. self.iterative = iterative
  175. self.score = score_fn
  176. self.method = method
  177. self.time_left = None
  178. self.TIMER_THRESHOLD = timeout
  179. def get_move(self, game, legal_moves, time_left):
  180. """Search for the best move from the available legal moves and return a
  181. result before the time limit expires.
  182. This function must perform iterative deepening if self.iterative=True,
  183. and it must use the search method (minimax or alphabeta) corresponding
  184. to the self.method value.
  185. PARAMETERS:
  186. game :: `isolation.Board`
  187. An instance of `isolation.Board` encoding the current state of
  188. the game (e.g., player locations and blocked cells).
  189. legal_moves :: list<(int, int)>
  190. A list containing legal moves. Moves are encoded as tuples of
  191. pairs of ints defining the next (row, col) for the agent to
  192. occupy.
  193. time_left :: callable
  194. A function that returns the number of milliseconds left in the
  195. current turn. Returning with any less than 0 ms remaining
  196. forfeits the game.
  197. RETURNS:
  198. (int, int)
  199. Board coordinates corresponding to a legal move; may return
  200. (-1, -1) if there are no available legal moves.
  201. """
  202. self.time_left = time_left
  203. if not legal_moves:
  204. return (-1, -1)
  205. move = None
  206. try:
  207. # The search method call (alpha beta or minimax) should happen in
  208. # here in order to avoid timeout. The try/except block will
  209. # automatically catch the exception raised by the search method
  210. # when the timer gets close to expiring.
  211. algorithm_name = getattr(self, self.method)
  212. if self.iterative:
  213. depth = 1 # Depth used for iterative deepening.
  214. while True:
  215. _, move = algorithm_name(game, depth)
  216. depth += 1
  217. else:
  218. _, move = algorithm_name(game, self.search_depth)
  219. except Timeout:
  220. # Return the best move so far in-case of time-out.
  221. return move
  222. return move
  223. def minimax(self, game, depth, maximizing_player=True):
  224. """Implement the minimax search algorithm as described in the lectures.
  225. PARAMETERS:
  226. game :: isolation.Board
  227. An instance of the Isolation game `Board` class representing
  228. the current game state.
  229. depth :: int
  230. Depth is an integer representing the maximum number of plies to
  231. search in the game tree before aborting.
  232. maximizing_player :: bool
  233. Flag indicating whether the current search depth corresponds to
  234. a maximizing layer (True) or a minimizing layer (False).
  235. RETURNS:
  236. float :: The score for the current search branch
  237. tuple(int, int) :: The best move for the current branch;
  238. (-1, -1) for no legal moves
  239. """
  240. if self.time_left() < self.TIMER_THRESHOLD:
  241. raise Timeout()
  242. # Return heuristic value of game when search has reached max depth
  243. if depth == 0:
  244. # Note: No need to return the move.
  245. # The max/min players keep a track of them.
  246. return (self.score(game, self), None)
  247. # Verify if there are any available legal moves
  248. legal_moves = game.get_legal_moves()
  249. if not legal_moves:
  250. return (game.utility(self), (-1, -1))
  251. # Maximize/minimize play accordingly
  252. if maximizing_player:
  253. return self.minimax_maximize(game, legal_moves, depth)
  254. else:
  255. return self.minimax_minimize(game, legal_moves, depth)
  256. def minimax_maximize(self, game, legal_moves, depth):
  257. """Minimax maximizer player. Returns the highest (score, move)
  258. tuple found in the game."""
  259. highest_score, selected_move = (float('-inf'), (-1, -1))
  260. for move in legal_moves:
  261. score, _ = self.minimax(game.forecast_move(move), \
  262. depth - 1, False)
  263. highest_score, selected_move = max((highest_score, \
  264. selected_move), (score, move))
  265. return (highest_score, selected_move)
  266. def minimax_minimize(self, game, legal_moves, depth):
  267. """Minimax maximizer player. Returns the lowest (score, move)
  268. tuple found in the game."""
  269. lowest_score, selected_move = (float('inf'), (-1, -1))
  270. for move in legal_moves:
  271. score, _ = self.minimax(game.forecast_move(move), depth - 1)
  272. lowest_score, selected_move = min((lowest_score, \
  273. selected_move), (score, move))
  274. return (lowest_score, selected_move)
  275. def alphabeta(self, game, depth, alpha=float("-inf"), beta=float("inf"),
  276. maximizing_player=True):
  277. """Implement minimax search with alpha-beta pruning as described in the
  278. lectures.
  279. PARAMETERS:
  280. game :: `isolation.Board`
  281. An instance of the Isolation game 'Board' class representing
  282. the current game state.
  283. depth :: int
  284. Depth is an integer representing the maximum number of plies
  285. to search in the game tree before aborting.
  286. alpha :: float
  287. Alpha limits the lower bound of search on minimizing layers.
  288. beta :: float
  289. Beta limits the upper bound of search on maximizing layers.
  290. maximizing_player :: bool
  291. Flag indicating whether the current search depth corresponds
  292. to a maximizing layer (True) or a minimizing layer (False).
  293. RETURNS:
  294. float
  295. The score for the current search branch.
  296. tuple(int, int)
  297. The best move for the current branch; (-1, -1) for no legal
  298. moves.
  299. """
  300. if self.time_left() < self.TIMER_THRESHOLD:
  301. raise Timeout()
  302. # Return heuristic value of game when search has reached max-depth
  303. if depth == 0:
  304. # Move need not be returned.
  305. # Min/Max players keep a track of them.
  306. return (self.score(game, self), None)
  307. # Verify if there are any available legal moves
  308. legal_moves = game.get_legal_moves()
  309. if not legal_moves:
  310. return (game.utility(self), (-1, -1))
  311. # Maximize/minimize play accordingly
  312. if maximizing_player:
  313. return self.ab_maximize_play(game, legal_moves, depth, alpha, beta)
  314. else:
  315. return self.ab_minimize_play(game, legal_moves, depth, alpha, beta)
  316. def ab_maximize_play(self, game, legal_moves, depth, alpha, beta):
  317. """Alphabeta maximizer player. Returns the highest score/move
  318. tuple found in game, pruning the search tree."""
  319. highest_score, selected_move = (float('-inf'), (-1, -1))
  320. for move in legal_moves:
  321. score, _ = self.alphabeta(game.forecast_move(move), \
  322. depth - 1, alpha, beta, False)
  323. if score > alpha:
  324. alpha = score
  325. highest_score, selected_move = score, move
  326. if alpha >= beta:
  327. break
  328. return (highest_score, selected_move)
  329. def ab_minimize_play(self, game, legal_moves, depth, alpha, beta):
  330. """Alphabeta minimizer player. Returns the lowest score/move
  331. tuple found in game, pruning the search tree."""
  332. lowest_score, selected_move = (float('inf'), (-1, -1))
  333. for move in legal_moves:
  334. score, _ = self.alphabeta(game.forecast_move(move), \
  335. depth - 1, alpha, beta, True)
  336. if score < beta:
  337. beta = score
  338. lowest_score, selected_move = score, move
  339. if beta <= alpha:
  340. break
  341. return (lowest_score, selected_move)