r"""
Tools for normal form games.
Definitions and Basic Concepts
------------------------------
An :math:`N`-player *normal form game* :math:`g = (I, (A_i)_{i \in I},
(u_i)_{i \in I})` consists of
- the set of *players* :math:`I = \{0, \ldots, N-1\}`,
- the set of *actions* :math:`A_i = \{0, \ldots, n_i-1\}` for each
player :math:`i \in I`, and
- the *payoff function* :math:`u_i \colon A_i \times A_{i+1} \times
\cdots \times A_{i+N-1} \to \mathbb{R}` for each player :math:`i \in
I`,
where :math:`i+j` is understood modulo :math:`N`. Note that we adopt the
convention that the 0-th argument of the payoff function :math:`u_i` is
player :math:`i`'s own action and the :math:`j`-th argument is player
(:math:`i+j`)'s action (modulo :math:`N`). A mixed action for player
:math:`i` is a probability distribution on :math:`A_i` (while an element
of :math:`A_i` is referred to as a pure action). A pure action
:math:`a_i \in A_i` is identified with the mixed action that assigns
probability one to :math:`a_i`. Denote the set of mixed actions of
player :math:`i` by :math:`X_i`. We also denote :math:`A_{-i} = A_{i+1}
\times \cdots \times A_{i+N-1}` and :math:`X_{-i} = X_{i+1} \times
\cdots \times X_{i+N-1}`.
The (pure-action) *best response correspondence* :math:`b_i \colon
X_{-i} \to A_i` for each player :math:`i` is defined by
.. math::
b_i(x_{-i}) = \{a_i \in A_i \mid
u_i(a_i, x_{-i}) \geq u_i(a_i', x_{-i})
\ \forall\,a_i' \in A_i\},
where :math:`u_i(a_i, x_{-i}) = \sum_{a_{-i} \in A_{-i}} u_i(a_i,
a_{-i}) \prod_{j=1}^{N-1} x_{i+j}(a_j)` is the expected payoff to action
:math:`a_i` against mixed actions :math:`x_{-i}`. A profile of mixed
actions :math:`x^* \in X_0 \times \cdots \times X_{N-1}` is a *Nash
equilibrium* if for all :math:`i \in I` and :math:`a_i \in A_i`,
.. math::
x_i^*(a_i) > 0 \Rightarrow a_i \in b_i(x_{-i}^*),
or equivalently, :math:`x_i^* \cdot v_i(x_{-i}^*) \geq x_i \cdot
v_i(x_{-i}^*)` for all :math:`x_i \in X_i`, where :math:`v_i(x_{-i})` is
the vector of player :math:`i`'s payoffs when the opponent players play
mixed actions :math:`x_{-i}`.
Creating a NormalFormGame
-------------------------
There are three ways to construct a `NormalFormGame` instance.
The first is to pass an array of payoffs for all the players:
>>> matching_pennies_bimatrix = [[(1, -1), (-1, 1)], [(-1, 1), (1, -1)]]
>>> g = NormalFormGame(matching_pennies_bimatrix)
>>> print(g.players[0])
Player in a 2-player normal form game with payoff array:
[[ 1, -1],
[-1, 1]]
>>> print(g.players[1])
Player in a 2-player normal form game with payoff array:
[[-1, 1],
[ 1, -1]]
If a square matrix (2-dimensional array) is given, then it is considered
to be a symmetric two-player game:
>>> coordination_game_matrix = [[4, 0], [3, 2]]
>>> g = NormalFormGame(coordination_game_matrix)
>>> print(g)
2-player NormalFormGame with payoff profile array:
[[[4, 4], [0, 3]],
[[3, 0], [2, 2]]]
The second is to specify the sizes of the action sets of the players,
which gives a `NormalFormGame` instance filled with payoff zeros, and
then set the payoff values to each entry:
>>> g = NormalFormGame((2, 2))
>>> print(g)
2-player NormalFormGame with payoff profile array:
[[[ 0., 0.], [ 0., 0.]],
[[ 0., 0.], [ 0., 0.]]]
>>> g[0, 0] = 1, 1
>>> g[0, 1] = -2, 3
>>> g[1, 0] = 3, -2
>>> print(g)
2-player NormalFormGame with payoff profile array:
[[[ 1., 1.], [-2., 3.]],
[[ 3., -2.], [ 0., 0.]]]
The third is to pass an array of `Player` instances, as explained in the
next section.
Creating a Player
-----------------
A `Player` instance is created by passing a payoff array:
>>> player0 = Player([[3, 1], [0, 2]])
>>> player0.payoff_array
array([[3, 1],
[0, 2]])
Passing an array of `Player` instances is the third way to create a
`NormalFormGame` instance.
>>> player1 = Player([[2, 0], [1, 3]])
>>> player1.payoff_array
array([[2, 0],
[1, 3]])
>>> g = NormalFormGame((player0, player1))
>>> print(g)
2-player NormalFormGame with payoff profile array:
[[[3, 2], [1, 1]],
[[0, 0], [2, 3]]]
Beware that in `payoff_array[h, k]`, `h` refers to the player's own
action, while `k` refers to the opponent player's action.
"""
import re
import numbers
import numpy as np
from numba import jit
from ..util import check_random_state, rng_integers
[docs]class Player:
"""
Class representing a player in an N-player normal form game.
Parameters
----------
payoff_array : array_like(float)
Array representing the player's payoff function, where
`payoff_array[a_0, a_1, ..., a_{N-1}]` is the payoff to the
player when the player plays action `a_0` while his N-1
opponents play actions `a_1`, ..., `a_{N-1}`, respectively.
Attributes
----------
payoff_array : ndarray(float, ndim=N)
See Parameters.
num_actions : scalar(int)
The number of actions available to the player.
num_opponents : scalar(int)
The number of opponent players.
dtype : dtype
Data type of the elements of `payoff_array`.
tol : scalar(float), default=1e-8
Default tolerance value used in determining best responses.
"""
def __init__(self, payoff_array):
self.payoff_array = np.asarray(payoff_array, order='C')
if self.payoff_array.ndim == 0:
raise ValueError('payoff_array must be an array_like')
if np.prod(self.payoff_array.shape) == 0:
raise ValueError('every player must have at least one action')
self.num_opponents = self.payoff_array.ndim - 1
self.num_actions = self.payoff_array.shape[0]
self.dtype = self.payoff_array.dtype
self.tol = 1e-8
def __repr__(self):
# From numpy.matrix.__repr__
# Print also dtype, except for int64, float64
s = repr(self.payoff_array).replace('array', 'Player')
l = s.splitlines()
for i in range(1, len(l)):
if l[i]:
l[i] = ' ' + l[i]
return '\n'.join(l)
def __str__(self):
N = self.num_opponents + 1
s = 'Player in a {N}-player normal form game'.format(N=N)
s += ' with payoff array:\n'
s += np.array2string(self.payoff_array, separator=', ')
return s
[docs] def delete_action(self, action, player_idx=0):
"""
Return a new `Player` instance with the action(s) specified by
`action` deleted from the action set of the player specified by
`player_idx`. Deletion is not performed in place.
Parameters
----------
action : scalar(int) or array_like(int)
Integer or array like of integers representing the action(s)
to be deleted.
player_idx : scalar(int), optional(default=0)
Index of the player to delete action(s) for.
Returns
-------
Player
Copy of `self` with the action(s) deleted as specified.
Examples
--------
>>> player = Player([[3, 0], [0, 3], [1, 1]])
>>> player
Player([[3, 0],
[0, 3],
[1, 1]])
>>> player.delete_action(2)
Player([[3, 0],
[0, 3]])
>>> player.delete_action(0, player_idx=1)
Player([[0],
[3],
[1]])
"""
payoff_array_new = np.delete(self.payoff_array, action, player_idx)
return Player(payoff_array_new)
[docs] def payoff_vector(self, opponents_actions):
"""
Return an array of payoff values, one for each own action, given
a profile of the opponents' actions.
Parameters
----------
opponents_actions : see `best_response`.
Returns
-------
payoff_vector : ndarray(float, ndim=1)
An array representing the player's payoff vector given the
profile of the opponents' actions.
"""
def reduce_last_player(payoff_array, action):
"""
Given `payoff_array` with ndim=M, return the payoff array
with ndim=M-1 fixing the last player's action to be `action`.
"""
if isinstance(action, numbers.Integral): # pure action
return payoff_array.take(action, axis=-1)
else: # mixed action
return payoff_array.dot(action)
if self.num_opponents == 1:
payoff_vector = \
reduce_last_player(self.payoff_array, opponents_actions)
elif self.num_opponents >= 2:
payoff_vector = self.payoff_array
for i in reversed(range(self.num_opponents)):
payoff_vector = \
reduce_last_player(payoff_vector, opponents_actions[i])
else: # Trivial case with self.num_opponents == 0
payoff_vector = self.payoff_array
return payoff_vector
[docs] def is_best_response(self, own_action, opponents_actions, tol=None):
"""
Return True if `own_action` is a best response to
`opponents_actions`.
Parameters
----------
own_action : scalar(int) or array_like(float, ndim=1)
An integer representing a pure action, or an array of floats
representing a mixed action.
opponents_actions : see `best_response`
tol : scalar(float), optional(default=None)
Tolerance level used in determining best responses. If None,
default to the value of the `tol` attribute.
Returns
-------
bool
True if `own_action` is a best response to
`opponents_actions`; False otherwise.
"""
if tol is None:
tol = self.tol
payoff_vector = self.payoff_vector(opponents_actions)
payoff_max = payoff_vector.max()
if isinstance(own_action, numbers.Integral):
return payoff_vector[own_action] >= payoff_max - tol
else:
return np.dot(own_action, payoff_vector) >= payoff_max - tol
[docs] def best_response(self, opponents_actions, tie_breaking='smallest',
payoff_perturbation=None, tol=None, random_state=None):
"""
Return the best response action(s) to `opponents_actions`.
Parameters
----------
opponents_actions : scalar(int) or array_like
A profile of N-1 opponents' actions, represented by either
scalar(int), array_like(float), array_like(int), or
array_like(array_like(float)). If N=2, then it must be a
scalar of integer (in which case it is treated as the
opponent's pure action) or a 1-dimensional array of floats
(in which case it is treated as the opponent's mixed
action). If N>2, then it must be an array of N-1 objects,
where each object must be an integer (pure action) or an
array of floats (mixed action).
tie_breaking : str, optional(default='smallest')
str in {'smallest', 'random', False}. Control how, or
whether, to break a tie (see Returns for details).
payoff_perturbation : array_like(float), optional(default=None)
Array of length equal to the number of actions of the player
containing the values ("noises") to be added to the payoffs
in determining the best response.
tol : scalar(float), optional(default=None)
Tolerance level used in determining best responses. If None,
default to the value of the `tol` attribute.
random_state : int or np.random.RandomState/Generator, optional
Random seed (integer) or np.random.RandomState or Generator
instance to set the initial state of the random number
generator for reproducibility. If None, a randomly
initialized RandomState is used. Relevant only when
tie_breaking='random'.
Returns
-------
scalar(int) or ndarray(int, ndim=1)
If tie_breaking=False, returns an array containing all the
best response pure actions. If tie_breaking='smallest',
returns the best response action with the smallest index; if
tie_breaking='random', returns an action randomly chosen
from the best response actions.
"""
if tol is None:
tol = self.tol
payoff_vector = self.payoff_vector(opponents_actions)
if payoff_perturbation is not None:
try:
payoff_vector += payoff_perturbation
except TypeError: # type mismatch
payoff_vector = payoff_vector + payoff_perturbation
best_responses = \
np.where(payoff_vector >= payoff_vector.max() - tol)[0]
if tie_breaking == 'smallest':
return best_responses[0]
elif tie_breaking == 'random':
return self.random_choice(best_responses,
random_state=random_state)
elif tie_breaking is False:
return best_responses
else:
msg = "tie_breaking must be one of 'smallest', 'random', or False"
raise ValueError(msg)
[docs] def random_choice(self, actions=None, random_state=None):
"""
Return a pure action chosen randomly from `actions`.
Parameters
----------
actions : array_like(int), optional(default=None)
An array of integers representing pure actions.
random_state : int or np.random.RandomState/Generator, optional
Random seed (integer) or np.random.RandomState or Generator
instance to set the initial state of the random number
generator for reproducibility. If None, a randomly
initialized RandomState is used.
Returns
-------
scalar(int)
If `actions` is given, returns an integer representing a
pure action chosen randomly from `actions`; if not, an
action is chosen randomly from the player's all actions.
"""
random_state = check_random_state(random_state)
if actions is not None:
n = len(actions)
else:
n = self.num_actions
if n == 1:
idx = 0
else:
idx = rng_integers(random_state, n)
if actions is not None:
return actions[idx]
else:
return idx
[docs] def is_dominated(self, action, tol=None, method=None):
"""
Determine whether `action` is strictly dominated by some mixed
action.
Parameters
----------
action : scalar(int)
Integer representing a pure action.
tol : scalar(float), optional(default=None)
Tolerance level used in determining domination. If None,
default to the value of the `tol` attribute.
method : str, optional(default=None)
If None, `minmax` from `quantecon.optimize` is used.
Otherwise `scipy.optimize.linprog` is used with the method
as specified by `method`.
Returns
-------
bool
True if `action` is strictly dominated by some mixed action;
False otherwise.
"""
if tol is None:
tol = self.tol
payoff_array = self.payoff_array
if self.num_opponents == 0:
return payoff_array.max() > payoff_array[action] + tol
ind = np.ones(self.num_actions, dtype=bool)
ind[action] = False
D = payoff_array[ind]
D -= payoff_array[action]
if D.shape[0] == 0: # num_actions == 1
return False
if self.num_opponents >= 2:
D.shape = (D.shape[0], np.prod(D.shape[1:]))
if method is None:
from ..optimize.minmax import minmax
v, _, _ = minmax(D)
return v > tol
else:
from scipy.optimize import linprog
m, n = D.shape
A_ub = np.empty((n, m+1))
A_ub[:, :m] = -D.T
A_ub[:, -1] = 1 # Slack variable
b_ub = np.zeros(n)
A_eq = np.empty((1, m+1))
A_eq[:, :m] = 1 # Equality constraint
A_eq[:, -1] = 0
b_eq = np.ones(1)
c = np.zeros(m+1)
c[-1] = -1
try:
res = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq,
method=method)
except ValueError:
raise ValueError("Unknown method '{0}'".format(method))
if res.success:
return res.x[-1] > tol
elif res.status == 2: # infeasible
return False
else: # pragma: no cover
msg = 'scipy.optimize.linprog returned {0}'.format(res.status)
raise RuntimeError(msg)
[docs] def dominated_actions(self, tol=None, method=None):
"""
Return a list of actions that are strictly dominated by some
mixed actions.
Parameters
----------
tol : scalar(float), optional(default=None)
Tolerance level used in determining domination. If None,
default to the value of the `tol` attribute.
method : str, optional(default=None)
If None, `minmax` from `quantecon.optimize` is used. If
`method` is set to `'simplex'`, `'interior-point'`, or
`'revised simplex'`, then `scipy.optimize.linprog` is used
with the method as specified by `method`.
Returns
-------
list(int)
List of integers representing pure actions, each of which is
strictly dominated by some mixed action.
"""
out = []
for action in range(self.num_actions):
if self.is_dominated(action, tol=tol, method=method):
out.append(action)
return out
def _nums_actions2string(nums_actions):
if len(nums_actions) == 1:
s = '{0}-action'.format(nums_actions[0])
else:
s = 'x'.join(map(str, nums_actions))
return s
def _payoff_profile_array2string(payoff_profile_array, class_name=None):
s = np.array2string(payoff_profile_array, separator=', ')
# Remove one linebreak
s = re.sub(r'(\n+)', lambda x: x.group(0)[0:-1], s)
if class_name is not None:
prefix = class_name + '('
next_line_prefix = ' ' * len(prefix)
suffix = ')'
l = s.splitlines()
l[0] = prefix + l[0]
for i in range(1, len(l)):
if l[i]:
l[i] = next_line_prefix + l[i]
l[-1] += suffix
s = '\n'.join(l)
return s
[docs]def pure2mixed(num_actions, action):
"""
Convert a pure action to the corresponding mixed action.
Parameters
----------
num_actions : scalar(int)
The number of the pure actions (= the length of a mixed action).
action : scalar(int)
The pure action to convert to the corresponding mixed action.
Returns
-------
ndarray(float, ndim=1)
The mixed action representation of the given pure action.
"""
mixed_action = np.zeros(num_actions)
mixed_action[action] = 1
return mixed_action
# Numba jitted functions #
[docs]@jit(nopython=True, cache=True)
def best_response_2p(payoff_matrix, opponent_mixed_action, tol=1e-8):
"""
Numba-optimized version of `Player.best_response` compilied in
nopython mode, specialized for 2-player games (where there is only
one opponent).
Return the best response action (with the smallest index if more
than one) to `opponent_mixed_action` under `payoff_matrix`.
Parameters
----------
payoff_matrix : ndarray(float, ndim=2)
Payoff matrix.
opponent_mixed_action : ndarray(float, ndim=1)
Opponent's mixed action. Its length must be equal to
`payoff_matrix.shape[1]`.
tol : scalar(float), optional(default=None)
Tolerance level used in determining best responses.
Returns
-------
scalar(int)
Best response action.
"""
n, m = payoff_matrix.shape
payoff_max = -np.inf
payoff_vector = np.zeros(n)
for a in range(n):
for b in range(m):
payoff_vector[a] += payoff_matrix[a, b] * opponent_mixed_action[b]
if payoff_vector[a] > payoff_max:
payoff_max = payoff_vector[a]
for a in range(n):
if payoff_vector[a] >= payoff_max - tol:
return a