Theory Lab
Runnable lab version for web reading.
Nash Equilibria
Nash equilibrium formalizes strategic stability: each agent's policy is optimal against the policies of the others.
This notebook is the executable companion to notes.md. It uses small payoff matrices and synthetic games so every cell runs without external data.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
COLORS = {
"primary": "#0077BB",
"secondary": "#EE7733",
"tertiary": "#009988",
"error": "#CC3311",
"neutral": "#555555",
"highlight": "#EE3377",
}
def header(title):
print("\n" + "=" * 72)
print(title)
print("=" * 72)
def check_true(condition, name):
ok = bool(condition)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
assert ok, name
def check_close(value, target, tol=1e-8, name="value"):
ok = abs(float(value) - float(target)) <= tol
print(f"{'PASS' if ok else 'FAIL'} - {name}: got {float(value):.6f}, expected {float(target):.6f}")
assert ok, name
def pure_nash(payoff_a, payoff_b):
payoff_a = np.asarray(payoff_a, dtype=float)
payoff_b = np.asarray(payoff_b, dtype=float)
equilibria = []
for i in range(payoff_a.shape[0]):
for j in range(payoff_a.shape[1]):
row_best = payoff_a[i, j] >= np.max(payoff_a[:, j]) - 1e-12
col_best = payoff_b[i, j] >= np.max(payoff_b[i, :]) - 1e-12
if row_best and col_best:
equilibria.append((i, j))
return equilibria
def expected_payoff(payoff, p, q):
return float(np.asarray(p) @ np.asarray(payoff, dtype=float) @ np.asarray(q))
def grid_zero_sum_value(payoff, grid=101):
payoff = np.asarray(payoff, dtype=float)
ps = np.linspace(0, 1, grid)
qs = np.linspace(0, 1, grid)
row_values = []
for p0 in ps:
p = np.array([p0, 1 - p0])
row_values.append(min(expected_payoff(payoff, p, np.array([q0, 1 - q0])) for q0 in qs))
col_values = []
for q0 in qs:
q = np.array([q0, 1 - q0])
col_values.append(max(expected_payoff(payoff, np.array([p0, 1 - p0]), q) for p0 in ps))
return float(max(row_values)), float(min(col_values))
def fictitious_play_rps(steps=200):
payoff = np.array([[0, -1, 1], [1, 0, -1], [-1, 1, 0]], dtype=float)
counts_a = np.ones(3)
counts_b = np.ones(3)
history = []
for _ in range(steps):
q = counts_b / counts_b.sum()
p = counts_a / counts_a.sum()
a = int(np.argmax(payoff @ q))
b = int(np.argmin(p @ payoff))
counts_a[a] += 1
counts_b[b] += 1
history.append(counts_a / counts_a.sum())
return np.array(history)
def pgd_1d(theta=1.0, x=0.25, y=1.0, eps=0.5, steps=20, alpha=0.05):
delta = 0.0
for _ in range(steps):
pred = theta * (x + delta)
grad = 2 * (pred - y) * theta
delta = np.clip(delta + alpha * np.sign(grad), -eps, eps)
robust_loss = (theta * (x + delta) - y) ** 2
return float(delta), float(robust_loss)
print("Helper functions ready.")
Demo 1: strategic stability
This demo turns the approved TOC concept into a small executable game.
Code cell 5
header("Demo 1 - strategic stability: pure Nash equilibria")
A = np.array([[3, 0], [5, 1]], dtype=float)
B = np.array([[3, 5], [0, 1]], dtype=float)
eq = pure_nash(A, B)
print("Pure Nash equilibria:", eq)
check_true((1, 1) in eq, "defect-defect is a pure equilibrium in this payoff table")
print("Game-theory lesson: equilibrium checks unilateral deviations, not social optimality.")
Demo 2: best responses
This demo turns the approved TOC concept into a small executable game.
Code cell 7
header("Demo 2 - best responses: matching pennies mixed strategy")
A = np.array([[1, -1], [-1, 1]], dtype=float)
p = np.array([0.5, 0.5])
q = np.array([0.5, 0.5])
value = expected_payoff(A, p, q)
print("Expected payoff at mixed equilibrium:", value)
check_close(value, 0.0, name="matching pennies value")
print("Game-theory lesson: randomization can remove profitable pure deviations.")
Demo 3: prisoner's dilemma and coordination
This demo turns the approved TOC concept into a small executable game.
Code cell 9
header("Demo 3 - prisoner's dilemma and coordination: minimax grid value")
A = np.array([[1, -1], [-1, 1]], dtype=float)
lower, upper = grid_zero_sum_value(A, grid=101)
print("Grid maximin:", round(lower, 4))
print("Grid minimax:", round(upper, 4))
check_true(abs(lower - upper) < 0.05, "grid approximation nearly matches minimax value")
print("Game-theory lesson: zero-sum optimal attack and defense meet at one value.")
Demo 4: equilibrium vs optimum
This demo turns the approved TOC concept into a small executable game.
Code cell 11
header("Demo 4 - equilibrium vs optimum: fictitious play in rock-paper-scissors")
hist = fictitious_play_rps(steps=180)
final_policy = hist[-1]
print("Final empirical row policy:", np.round(final_policy, 3).tolist())
check_true(np.all(np.abs(final_policy - 1/3) < 0.15), "empirical policy approaches the mixed equilibrium region")
fig, ax = plt.subplots()
ax.plot(hist[:, 0], color=COLORS["primary"], label="Rock")
ax.plot(hist[:, 1], color=COLORS["secondary"], label="Paper")
ax.plot(hist[:, 2], color=COLORS["tertiary"], label="Scissors")
ax.set_title("Fictitious play empirical strategy")
ax.set_xlabel("Iteration")
ax.set_ylabel("Probability")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Game-theory lesson: repeated adaptation can approximate equilibrium behavior.")
Demo 5: why Nash matters for GANs and agents
This demo turns the approved TOC concept into a small executable game.
Code cell 13
header("Demo 5 - why Nash matters for GANs and agents: adversarial inner maximization")
delta, loss = pgd_1d(theta=1.0, x=0.25, y=1.0, eps=0.5)
clean_loss = (0.25 - 1.0) ** 2
print("Adversarial delta:", delta)
print("Clean loss:", round(clean_loss, 4))
print("Adversarial loss:", round(loss, 4))
check_true(loss >= clean_loss, "inner maximization does not reduce loss")
print("Game-theory lesson: robust learning optimizes against an adaptive perturbation.")
Demo 6: normal-form game
This demo turns the approved TOC concept into a small executable game.
Code cell 15
header("Demo 6 - normal-form game: payoff heatmap")
A = np.array([[2, -1, 0], [0, 1, -2], [-1, 3, 1]], dtype=float)
fig, ax = plt.subplots()
im = ax.imshow(A, cmap="RdBu_r")
ax.set_title("Row-player payoff matrix")
ax.set_xlabel("Column action")
ax.set_ylabel("Row action")
fig.colorbar(im, ax=ax, label="Payoff")
fig.tight_layout()
plt.show()
plt.close(fig)
best_rows = np.argmax(A, axis=0)
print("Best row responses by column action:", best_rows.tolist())
check_true(len(best_rows) == A.shape[1], "one row best response per column action")
print("Game-theory lesson: payoff geometry determines best responses.")
Demo 7: players actions and payoffs
This demo turns the approved TOC concept into a small executable game.
Code cell 17
header("Demo 7 - players actions and payoffs: pure Nash equilibria")
A = np.array([[3, 0], [5, 1]], dtype=float)
B = np.array([[3, 5], [0, 1]], dtype=float)
eq = pure_nash(A, B)
print("Pure Nash equilibria:", eq)
check_true((1, 1) in eq, "defect-defect is a pure equilibrium in this payoff table")
print("Game-theory lesson: equilibrium checks unilateral deviations, not social optimality.")
Demo 8: pure strategy
This demo turns the approved TOC concept into a small executable game.
Code cell 19
header("Demo 8 - pure strategy: matching pennies mixed strategy")
A = np.array([[1, -1], [-1, 1]], dtype=float)
p = np.array([0.5, 0.5])
q = np.array([0.5, 0.5])
value = expected_payoff(A, p, q)
print("Expected payoff at mixed equilibrium:", value)
check_close(value, 0.0, name="matching pennies value")
print("Game-theory lesson: randomization can remove profitable pure deviations.")
Demo 9: mixed strategy
This demo turns the approved TOC concept into a small executable game.
Code cell 21
header("Demo 9 - mixed strategy $\\boldsymbol{\\pi}_i$: minimax grid value")
A = np.array([[1, -1], [-1, 1]], dtype=float)
lower, upper = grid_zero_sum_value(A, grid=101)
print("Grid maximin:", round(lower, 4))
print("Grid minimax:", round(upper, 4))
check_true(abs(lower - upper) < 0.05, "grid approximation nearly matches minimax value")
print("Game-theory lesson: zero-sum optimal attack and defense meet at one value.")
Demo 10: Nash equilibrium
This demo turns the approved TOC concept into a small executable game.
Code cell 23
header("Demo 10 - Nash equilibrium: fictitious play in rock-paper-scissors")
hist = fictitious_play_rps(steps=180)
final_policy = hist[-1]
print("Final empirical row policy:", np.round(final_policy, 3).tolist())
check_true(np.all(np.abs(final_policy - 1/3) < 0.15), "empirical policy approaches the mixed equilibrium region")
fig, ax = plt.subplots()
ax.plot(hist[:, 0], color=COLORS["primary"], label="Rock")
ax.plot(hist[:, 1], color=COLORS["secondary"], label="Paper")
ax.plot(hist[:, 2], color=COLORS["tertiary"], label="Scissors")
ax.set_title("Fictitious play empirical strategy")
ax.set_xlabel("Iteration")
ax.set_ylabel("Probability")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Game-theory lesson: repeated adaptation can approximate equilibrium behavior.")
Demo 11: best-response tables
This demo turns the approved TOC concept into a small executable game.
Code cell 25
header("Demo 11 - best-response tables: adversarial inner maximization")
delta, loss = pgd_1d(theta=1.0, x=0.25, y=1.0, eps=0.5)
clean_loss = (0.25 - 1.0) ** 2
print("Adversarial delta:", delta)
print("Clean loss:", round(clean_loss, 4))
print("Adversarial loss:", round(loss, 4))
check_true(loss >= clean_loss, "inner maximization does not reduce loss")
print("Game-theory lesson: robust learning optimizes against an adaptive perturbation.")
Demo 12: dominant strategies
This demo turns the approved TOC concept into a small executable game.
Code cell 27
header("Demo 12 - dominant strategies: payoff heatmap")
A = np.array([[2, -1, 0], [0, 1, -2], [-1, 3, 1]], dtype=float)
fig, ax = plt.subplots()
im = ax.imshow(A, cmap="RdBu_r")
ax.set_title("Row-player payoff matrix")
ax.set_xlabel("Column action")
ax.set_ylabel("Row action")
fig.colorbar(im, ax=ax, label="Payoff")
fig.tight_layout()
plt.show()
plt.close(fig)
best_rows = np.argmax(A, axis=0)
print("Best row responses by column action:", best_rows.tolist())
check_true(len(best_rows) == A.shape[1], "one row best response per column action")
print("Game-theory lesson: payoff geometry determines best responses.")
Demo 13: coordination games
This demo turns the approved TOC concept into a small executable game.
Code cell 29
header("Demo 13 - coordination games: pure Nash equilibria")
A = np.array([[3, 0], [5, 1]], dtype=float)
B = np.array([[3, 5], [0, 1]], dtype=float)
eq = pure_nash(A, B)
print("Pure Nash equilibria:", eq)
check_true((1, 1) in eq, "defect-defect is a pure equilibrium in this payoff table")
print("Game-theory lesson: equilibrium checks unilateral deviations, not social optimality.")
Demo 14: Pareto inefficiency
This demo turns the approved TOC concept into a small executable game.
Code cell 31
header("Demo 14 - Pareto inefficiency: matching pennies mixed strategy")
A = np.array([[1, -1], [-1, 1]], dtype=float)
p = np.array([0.5, 0.5])
q = np.array([0.5, 0.5])
value = expected_payoff(A, p, q)
print("Expected payoff at mixed equilibrium:", value)
check_close(value, 0.0, name="matching pennies value")
print("Game-theory lesson: randomization can remove profitable pure deviations.")
Demo 15: no-pure-equilibrium examples
This demo turns the approved TOC concept into a small executable game.
Code cell 33
header("Demo 15 - no-pure-equilibrium examples: minimax grid value")
A = np.array([[1, -1], [-1, 1]], dtype=float)
lower, upper = grid_zero_sum_value(A, grid=101)
print("Grid maximin:", round(lower, 4))
print("Grid minimax:", round(upper, 4))
check_true(abs(lower - upper) < 0.05, "grid approximation nearly matches minimax value")
print("Game-theory lesson: zero-sum optimal attack and defense meet at one value.")
Demo 16: probability simplex
This demo turns the approved TOC concept into a small executable game.
Code cell 35
header("Demo 16 - probability simplex: fictitious play in rock-paper-scissors")
hist = fictitious_play_rps(steps=180)
final_policy = hist[-1]
print("Final empirical row policy:", np.round(final_policy, 3).tolist())
check_true(np.all(np.abs(final_policy - 1/3) < 0.15), "empirical policy approaches the mixed equilibrium region")
fig, ax = plt.subplots()
ax.plot(hist[:, 0], color=COLORS["primary"], label="Rock")
ax.plot(hist[:, 1], color=COLORS["secondary"], label="Paper")
ax.plot(hist[:, 2], color=COLORS["tertiary"], label="Scissors")
ax.set_title("Fictitious play empirical strategy")
ax.set_xlabel("Iteration")
ax.set_ylabel("Probability")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Game-theory lesson: repeated adaptation can approximate equilibrium behavior.")
Demo 17: indifference principle
This demo turns the approved TOC concept into a small executable game.
Code cell 37
header("Demo 17 - indifference principle: adversarial inner maximization")
delta, loss = pgd_1d(theta=1.0, x=0.25, y=1.0, eps=0.5)
clean_loss = (0.25 - 1.0) ** 2
print("Adversarial delta:", delta)
print("Clean loss:", round(clean_loss, 4))
print("Adversarial loss:", round(loss, 4))
check_true(loss >= clean_loss, "inner maximization does not reduce loss")
print("Game-theory lesson: robust learning optimizes against an adaptive perturbation.")
Demo 18: matching pennies
This demo turns the approved TOC concept into a small executable game.
Code cell 39
header("Demo 18 - matching pennies: payoff heatmap")
A = np.array([[2, -1, 0], [0, 1, -2], [-1, 3, 1]], dtype=float)
fig, ax = plt.subplots()
im = ax.imshow(A, cmap="RdBu_r")
ax.set_title("Row-player payoff matrix")
ax.set_xlabel("Column action")
ax.set_ylabel("Row action")
fig.colorbar(im, ax=ax, label="Payoff")
fig.tight_layout()
plt.show()
plt.close(fig)
best_rows = np.argmax(A, axis=0)
print("Best row responses by column action:", best_rows.tolist())
check_true(len(best_rows) == A.shape[1], "one row best response per column action")
print("Game-theory lesson: payoff geometry determines best responses.")
Demo 19: support enumeration preview
This demo turns the approved TOC concept into a small executable game.
Code cell 41
header("Demo 19 - support enumeration preview: pure Nash equilibria")
A = np.array([[3, 0], [5, 1]], dtype=float)
B = np.array([[3, 5], [0, 1]], dtype=float)
eq = pure_nash(A, B)
print("Pure Nash equilibria:", eq)
check_true((1, 1) in eq, "defect-defect is a pure equilibrium in this payoff table")
print("Game-theory lesson: equilibrium checks unilateral deviations, not social optimality.")
Demo 20: entropy and stochastic policies
This demo turns the approved TOC concept into a small executable game.
Code cell 43
header("Demo 20 - entropy and stochastic policies: matching pennies mixed strategy")
A = np.array([[1, -1], [-1, 1]], dtype=float)
p = np.array([0.5, 0.5])
q = np.array([0.5, 0.5])
value = expected_payoff(A, p, q)
print("Expected payoff at mixed equilibrium:", value)
check_close(value, 0.0, name="matching pennies value")
print("Game-theory lesson: randomization can remove profitable pure deviations.")
Demo 21: Nash existence theorem
This demo turns the approved TOC concept into a small executable game.
Code cell 45
header("Demo 21 - Nash existence theorem: minimax grid value")
A = np.array([[1, -1], [-1, 1]], dtype=float)
lower, upper = grid_zero_sum_value(A, grid=101)
print("Grid maximin:", round(lower, 4))
print("Grid minimax:", round(upper, 4))
check_true(abs(lower - upper) < 0.05, "grid approximation nearly matches minimax value")
print("Game-theory lesson: zero-sum optimal attack and defense meet at one value.")
Demo 22: fixed-point intuition
This demo turns the approved TOC concept into a small executable game.
Code cell 47
header("Demo 22 - fixed-point intuition: fictitious play in rock-paper-scissors")
hist = fictitious_play_rps(steps=180)
final_policy = hist[-1]
print("Final empirical row policy:", np.round(final_policy, 3).tolist())
check_true(np.all(np.abs(final_policy - 1/3) < 0.15), "empirical policy approaches the mixed equilibrium region")
fig, ax = plt.subplots()
ax.plot(hist[:, 0], color=COLORS["primary"], label="Rock")
ax.plot(hist[:, 1], color=COLORS["secondary"], label="Paper")
ax.plot(hist[:, 2], color=COLORS["tertiary"], label="Scissors")
ax.set_title("Fictitious play empirical strategy")
ax.set_xlabel("Iteration")
ax.set_ylabel("Probability")
ax.legend()
fig.tight_layout()
plt.show()
plt.close(fig)
print("Game-theory lesson: repeated adaptation can approximate equilibrium behavior.")
Demo 23: Lemke-Howson preview
This demo turns the approved TOC concept into a small executable game.
Code cell 49
header("Demo 23 - Lemke-Howson preview: adversarial inner maximization")
delta, loss = pgd_1d(theta=1.0, x=0.25, y=1.0, eps=0.5)
clean_loss = (0.25 - 1.0) ** 2
print("Adversarial delta:", delta)
print("Clean loss:", round(clean_loss, 4))
print("Adversarial loss:", round(loss, 4))
check_true(loss >= clean_loss, "inner maximization does not reduce loss")
print("Game-theory lesson: robust learning optimizes against an adaptive perturbation.")
Demo 24: correlated equilibrium preview
This demo turns the approved TOC concept into a small executable game.
Code cell 51
header("Demo 24 - correlated equilibrium preview: payoff heatmap")
A = np.array([[2, -1, 0], [0, 1, -2], [-1, 3, 1]], dtype=float)
fig, ax = plt.subplots()
im = ax.imshow(A, cmap="RdBu_r")
ax.set_title("Row-player payoff matrix")
ax.set_xlabel("Column action")
ax.set_ylabel("Row action")
fig.colorbar(im, ax=ax, label="Payoff")
fig.tight_layout()
plt.show()
plt.close(fig)
best_rows = np.argmax(A, axis=0)
print("Best row responses by column action:", best_rows.tolist())
check_true(len(best_rows) == A.shape[1], "one row best response per column action")
print("Game-theory lesson: payoff geometry determines best responses.")
Demo 25: computational hardness
This demo turns the approved TOC concept into a small executable game.
Code cell 53
header("Demo 25 - computational hardness: pure Nash equilibria")
A = np.array([[3, 0], [5, 1]], dtype=float)
B = np.array([[3, 5], [0, 1]], dtype=float)
eq = pure_nash(A, B)
print("Pure Nash equilibria:", eq)
check_true((1, 1) in eq, "defect-defect is a pure equilibrium in this payoff table")
print("Game-theory lesson: equilibrium checks unilateral deviations, not social optimality.")