Theory Notebook
Converted from
theory.ipynbfor web reading.
Numerical Linear Algebra — Theory Notebook
"It is a capital mistake to theorize before one has data — and in numerical LA, the data is the condition number."
Interactive derivations covering: LU with pivoting, QR factorization, condition numbers, conjugate gradient, iterative methods, and eigenvalue algorithms.
Companion: notes.md | exercises.ipynb
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
import numpy as np
import scipy.linalg as sla
import numpy.linalg as nla
try:
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style='whitegrid', palette='colorblind')
HAS_SNS = True
except ImportError:
plt.style.use('seaborn-v0_8-whitegrid')
HAS_SNS = False
mpl.rcParams.update({
'figure.figsize': (10, 6), 'figure.dpi': 120,
'font.size': 13, 'axes.titlesize': 15, 'axes.labelsize': 13,
'legend.fontsize': 11, 'lines.linewidth': 2.0,
'axes.spines.top': False, 'axes.spines.right': False,
})
HAS_MPL = True
except ImportError:
HAS_MPL = False
COLORS = {
'primary': '#0077BB',
'secondary': '#EE7733',
'tertiary': '#009988',
'error': '#CC3311',
'neutral': '#555555',
'highlight': '#EE3377',
}
np.set_printoptions(precision=8, suppress=True)
np.random.seed(42)
print('Setup complete.')
print("Chapter helper setup complete.")
1. LU Factorization with Partial Pivoting
Implement GEPP from scratch and verify backward stability.
Code cell 5
# === 1.1 GEPP Implementation ===
def lu_pivot(A):
"""Gaussian elimination with partial pivoting. Returns P, L, U."""
n = A.shape[0]
A = A.astype(np.float64).copy()
P = np.eye(n)
L = np.zeros((n, n))
for k in range(n - 1):
pivot_row = k + np.argmax(np.abs(A[k:, k]))
if pivot_row != k:
A[[k, pivot_row], :] = A[[pivot_row, k], :]
P[[k, pivot_row], :] = P[[pivot_row, k], :]
L[[k, pivot_row], :k] = L[[pivot_row, k], :k]
for i in range(k + 1, n):
L[i, k] = A[i, k] / A[k, k]
A[i, k:] -= L[i, k] * A[k, k:]
L += np.eye(n)
U = np.triu(A)
return P, L, U
# Test on random 5x5
np.random.seed(42)
A = np.random.randn(5, 5)
P, L, U = lu_pivot(A)
print('P @ A - L @ U (residual):')
residual = P @ A - L @ U
print(residual)
print(f'Max entry: {np.abs(residual).max():.2e}')
ok = np.allclose(P @ A, L @ U, atol=1e-12)
print(f"{'PASS' if ok else 'FAIL'} - PA = LU verified")
# Verify L has unit diagonal and |L_ij| <= 1
ok_L = np.allclose(np.diag(L), 1.0) and np.all(np.abs(L) <= 1.0 + 1e-10)
print(f"{'PASS' if ok_L else 'FAIL'} - L has unit diagonal and |L_ij| <= 1")
Code cell 6
# === 1.2 Triangular Solves ===
def forward_sub(L, b):
"""Solve Lx = b, L lower triangular with unit diagonal."""
n = len(b)
x = np.zeros(n)
for i in range(n):
x[i] = b[i] - L[i, :i] @ x[:i]
return x
def back_sub(U, b):
"""Solve Ux = b, U upper triangular."""
n = len(b)
x = np.zeros(n)
for i in range(n - 1, -1, -1):
x[i] = (b[i] - U[i, i+1:] @ x[i+1:]) / U[i, i]
return x
def solve_lu(A, b):
P, L, U = lu_pivot(A)
return back_sub(U, forward_sub(L, P @ b))
# Verify against numpy
b = np.random.randn(5)
x_lu = solve_lu(A, b)
x_np = nla.solve(A, b)
print('Custom solve vs numpy.linalg.solve:')
print(f'Max difference: {np.abs(x_lu - x_np).max():.2e}')
print(f'Residual ||Ax - b||: {nla.norm(A @ x_lu - b):.2e}')
ok = np.allclose(x_lu, x_np, atol=1e-10)
print(f"{'PASS' if ok else 'FAIL'} - custom LU matches numpy")
2. Demonstrating Instability Without Pivoting
A near-zero pivot causes catastrophic growth in the elimination process.
Code cell 8
# === 2.1 Near-Zero Pivot Disaster ===
def lu_no_pivot(A):
"""LU without pivoting — UNSTABLE."""
n = A.shape[0]
A = A.astype(np.float64).copy()
L = np.eye(n)
for k in range(n - 1):
if abs(A[k, k]) < 1e-15:
raise ValueError(f'Near-zero pivot at step {k}: {A[k,k]}')
for i in range(k + 1, n):
L[i, k] = A[i, k] / A[k, k]
A[i, k:] -= L[i, k] * A[k, k:]
return L, np.triu(A)
# Matrix with tiny (1,1) entry — pivot is eps
eps_vals = [1e-3, 1e-6, 1e-10, 1e-14]
print('Effect of small pivot on solution accuracy:')
print(f'{"eps":>10} {"No-pivot err":>15} {"GEPP err":>15}')
b = np.array([1.0, 2.0])
for eps in eps_vals:
A = np.array([[eps, 1.0], [1.0, 1.0]])
x_true = nla.solve(A, b)
# No-pivot solution
try:
L_np, U_np = lu_no_pivot(A)
x_np = back_sub(U_np, forward_sub(L_np, b))
err_np = nla.norm(x_np - x_true) / nla.norm(x_true)
except ValueError:
err_np = float('nan')
# GEPP solution
x_gepp = solve_lu(A, b)
err_gepp = nla.norm(x_gepp - x_true) / nla.norm(x_true)
print(f'{eps:10.0e} {err_np:15.3e} {err_gepp:15.3e}')
3. Normal Equations vs QR: Condition Number Squaring
Demonstrate that and its effect on accuracy.
Code cell 10
# === 3.1 Condition Number Squaring ===
def build_matrix_with_kappa(kappa, m=50, n=10):
"""Build m x n matrix with given condition number."""
# Generate orthogonal matrices
U, _ = nla.qr(np.random.randn(m, n))
Vt, _ = nla.qr(np.random.randn(n, n))
# Singular values from 1 to kappa
sigma = np.logspace(0, np.log10(kappa), n)
return U @ np.diag(sigma) @ Vt, sigma
np.random.seed(42)
kappas = [10, 100, 1e4, 1e6, 1e8]
print(f'{"kappa(A)":>12} {"kappa(AtA)":>14} {"ratio":>10}')
for kappa in kappas:
A, _ = build_matrix_with_kappa(kappa)
k_A = nla.cond(A)
k_AtA = nla.cond(A.T @ A)
print(f'{k_A:12.3e} {k_AtA:14.3e} {k_AtA/k_A**2:10.3f}')
print()
print('kappa(A^T A) ≈ kappa(A)^2 — confirmed')
Code cell 11
# === 3.2 Accuracy Comparison ===
np.random.seed(42)
kappas_test = np.logspace(1, 12, 20)
err_normal = []
err_qr = []
for kappa in kappas_test:
A, sigma = build_matrix_with_kappa(kappa, m=50, n=10)
x_true = np.random.randn(10)
b = A @ x_true
# Normal equations
try:
x_n = nla.solve(A.T @ A, A.T @ b)
err_n = nla.norm(x_n - x_true) / nla.norm(x_true)
except:
err_n = 1.0
# QR (lstsq)
x_q, _, _, _ = nla.lstsq(A, b, rcond=None)
err_q = nla.norm(x_q - x_true) / nla.norm(x_true)
err_normal.append(err_n)
err_qr.append(err_q)
if HAS_MPL:
fig, ax = plt.subplots(figsize=(10, 5))
ax.loglog(kappas_test, err_normal, color=COLORS['error'], label='Normal equations')
ax.loglog(kappas_test, err_qr, color=COLORS['primary'], label='Householder QR')
ax.axhline(np.finfo(np.float64).eps, color=COLORS['neutral'],
linestyle='--', label='fp64 ε_mach')
ax.set_xlabel('Condition number κ(A)')
ax.set_ylabel('Relative error in solution')
ax.set_title('Normal equations vs QR: accuracy comparison')
ax.legend()
plt.tight_layout()
plt.show()
print('Plot displayed.')
4. Condition Number and Digit Loss
Verify: if , then digits are lost in solving .
Code cell 13
# === 4.1 Digits Lost as a Function of kappa ===
np.random.seed(0)
n = 10
kappas = [1e1, 1e3, 1e5, 1e7, 1e9, 1e11, 1e13]
print(f'{"kappa(A)":>12} {"log10(kappa)":>14} {"rel error":>12} {"digits lost":>12}')
eps64 = np.finfo(np.float64).eps
for kappa in kappas:
A, _ = build_matrix_with_kappa(kappa, m=n, n=n)
x_true = np.random.randn(n)
b = A @ x_true
x_comp = nla.solve(A, b)
rel_err = nla.norm(x_comp - x_true) / nla.norm(x_true)
digits_lost = max(0, np.log10(max(rel_err, 1e-16)))
print(f'{kappa:12.1e} {np.log10(kappa):14.1f} {rel_err:12.3e} {digits_lost:12.1f}')
print()
print(f'fp64 has ~15 significant digits. At kappa=1e15, all accuracy is lost.')
5. Householder QR Factorization
Implement QR via Householder reflectors and verify orthogonality.
Code cell 15
# === 5.1 Householder Reflector ===
def householder_vector(x):
"""Compute v such that H = I - 2vv^T/v^Tv zeros out x[1:]."""
x = x.copy().astype(np.float64)
n = len(x)
sign = 1.0 if x[0] >= 0 else -1.0
v = x.copy()
v[0] += sign * nla.norm(x)
v = v / nla.norm(v)
return v
def householder_qr(A):
"""QR factorization via Householder reflectors."""
m, n = A.shape
A = A.copy().astype(np.float64)
Q = np.eye(m)
for k in range(min(m-1, n)):
v = householder_vector(A[k:, k])
# Apply H_k to A[k:, k:]: A -= 2v(v^T A)
A[k:, k:] -= 2 * np.outer(v, v @ A[k:, k:])
# Accumulate Q: Q[k:, :] -= 2v(v^T Q[k:, :])
Q[k:, :] -= 2 * np.outer(v, v @ Q[k:, :])
return Q.T, np.triu(A)
# Test
np.random.seed(42)
m, n = 8, 5
A = np.random.randn(m, n)
Q, R = householder_qr(A)
print('Householder QR verification:')
print(f'||Q^T Q - I||_F = {nla.norm(Q.T @ Q - np.eye(m)):.2e}')
print(f'||QR - A||_F = {nla.norm(Q @ R - A):.2e}')
print(f'R lower triangle zeros: {np.allclose(np.tril(R, -1), 0)}')
ok = np.allclose(Q.T @ Q, np.eye(m)) and np.allclose(Q @ R, A)
print(f"{'PASS' if ok else 'FAIL'} - Householder QR verified")
Code cell 16
# === 5.2 Loss of Orthogonality: CGS vs MGS vs Householder ===
def cgs_qr(A):
"""Classical Gram-Schmidt QR."""
m, n = A.shape
Q = np.zeros((m, n))
R = np.zeros((n, n))
for k in range(n):
q = A[:, k].copy()
for i in range(k):
R[i, k] = Q[:, i] @ A[:, k]
q -= R[i, k] * Q[:, i]
R[k, k] = nla.norm(q)
Q[:, k] = q / R[k, k]
return Q, R
def mgs_qr(A):
"""Modified Gram-Schmidt QR."""
m, n = A.shape
Q = A.copy().astype(np.float64)
R = np.zeros((n, n))
for k in range(n):
R[k, k] = nla.norm(Q[:, k])
Q[:, k] /= R[k, k]
for j in range(k+1, n):
R[k, j] = Q[:, k] @ Q[:, j]
Q[:, j] -= R[k, j] * Q[:, k]
return Q, R
# Test on ill-conditioned matrix
np.random.seed(42)
kappas_test = np.logspace(1, 12, 15)
orth_loss_cgs = []
orth_loss_mgs = []
orth_loss_hh = []
for kappa in kappas_test:
A, _ = build_matrix_with_kappa(kappa, m=30, n=10)
Q_cgs, _ = cgs_qr(A)
Q_mgs, _ = mgs_qr(A)
Q_hh, _ = householder_qr(A)
orth_loss_cgs.append(nla.norm(Q_cgs.T @ Q_cgs - np.eye(10)))
orth_loss_mgs.append(nla.norm(Q_mgs.T @ Q_mgs - np.eye(10)))
orth_loss_hh.append(nla.norm(Q_hh[:, :10].T @ Q_hh[:, :10] - np.eye(10)))
if HAS_MPL:
fig, ax = plt.subplots(figsize=(10, 5))
ax.loglog(kappas_test, orth_loss_cgs, color=COLORS['error'], label='Classical GS')
ax.loglog(kappas_test, orth_loss_mgs, color=COLORS['secondary'], label='Modified GS')
ax.loglog(kappas_test, orth_loss_hh, color=COLORS['primary'], label='Householder')
ax.axhline(np.finfo(np.float64).eps, color=COLORS['neutral'],
linestyle='--', label='fp64 ε_mach')
ax.set_xlabel('Condition number κ(A)')
ax.set_ylabel('||I - Q^T Q||_F (loss of orthogonality)')
ax.set_title('Orthogonality loss: CGS vs MGS vs Householder')
ax.legend()
plt.tight_layout()
plt.show()
print('Plot displayed.')
6. Conjugate Gradient Method
Implement CG and verify the convergence rate .
Code cell 18
# === 6.1 CG Implementation ===
def conjugate_gradient(A, b, x0=None, tol=1e-12, max_iter=None):
"""Conjugate gradient for SPD system Ax = b."""
n = len(b)
if x0 is None: x0 = np.zeros(n)
if max_iter is None: max_iter = 2 * n
x = x0.copy().astype(np.float64)
r = b - A @ x
p = r.copy()
rs_old = r @ r
residuals = [np.sqrt(rs_old) / nla.norm(b)]
for k in range(max_iter):
Ap = A @ p
alpha = rs_old / (p @ Ap)
x += alpha * p
r -= alpha * Ap
rs_new = r @ r
residuals.append(np.sqrt(rs_new) / nla.norm(b))
if residuals[-1] < tol:
break
beta = rs_new / rs_old
p = r + beta * p
rs_old = rs_new
return x, residuals
# Test on small SPD system
np.random.seed(42)
n = 20
B = np.random.randn(n, n)
A_spd = B.T @ B + 0.1 * np.eye(n) # Make SPD
b_vec = np.random.randn(n)
x_true = nla.solve(A_spd, b_vec)
x_cg, residuals = conjugate_gradient(A_spd, b_vec)
print(f'CG converged in {len(residuals)-1} iterations')
print(f'Final residual: {residuals[-1]:.2e}')
print(f'Forward error: {nla.norm(x_cg - x_true) / nla.norm(x_true):.2e}')
ok = nla.norm(x_cg - x_true) / nla.norm(x_true) < 1e-10
print(f"{'PASS' if ok else 'FAIL'} - CG converged to correct solution")
Code cell 19
# === 6.2 CG Convergence vs Condition Number ===
np.random.seed(42)
n = 100
kappa_vals = [1, 10, 100, 1000]
convergence_data = {}
for kappa in kappa_vals:
# Build SPD matrix with condition number kappa
lam = np.linspace(1, kappa, n)
Q, _ = nla.qr(np.random.randn(n, n))
A_k = Q @ np.diag(lam) @ Q.T
b_k = np.random.randn(n)
_, residuals = conjugate_gradient(A_k, b_k, max_iter=200)
convergence_data[kappa] = residuals
if HAS_MPL:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
ax = axes[0]
color_list = [COLORS['primary'], COLORS['secondary'],
COLORS['tertiary'], COLORS['error']]
for (kappa, res), col in zip(convergence_data.items(), color_list):
ax.semilogy(res, color=col, label=f'κ = {kappa}')
ax.set_xlabel('CG iteration k')
ax.set_ylabel('Relative residual (log scale)')
ax.set_title('CG convergence vs condition number')
ax.legend()
ax = axes[1]
# Theoretical bound comparison for kappa=100
kappa = 100
res = convergence_data[kappa]
k_vals = np.arange(len(res))
bound = [2 * ((np.sqrt(kappa)-1)/(np.sqrt(kappa)+1))**k for k in k_vals]
ax.semilogy(k_vals, res, color=COLORS['primary'], label='Actual CG (κ=100)')
ax.semilogy(k_vals, bound, color=COLORS['error'], linestyle='--', label='Theoretical bound')
ax.set_xlabel('Iteration k')
ax.set_ylabel('Relative residual / bound')
ax.set_title('CG vs theoretical convergence bound')
ax.legend()
fig.tight_layout()
plt.show()
print('Plot displayed.')
7. Power Iteration and Eigenvalue Algorithms
Implement power iteration and verify convergence rate .
Code cell 21
# === 7.1 Power Iteration ===
def power_iteration(A, tol=1e-12, max_iter=1000):
"""Find dominant eigenvalue/eigenvector."""
n = A.shape[0]
x = np.random.randn(n)
x /= nla.norm(x)
lam_old = 0.0
lam_history = []
for k in range(max_iter):
y = A @ x
lam = x @ y # Rayleigh quotient
lam_history.append(lam)
x = y / nla.norm(y)
if abs(lam - lam_old) < tol * abs(lam):
break
lam_old = lam
return lam, x, lam_history
# Test on symmetric matrix with known eigenvalues
np.random.seed(42)
n = 10
lam_true = np.array([10.0, 3.0, 2.0, 1.5, 1.2, 1.0, 0.8, 0.5, 0.3, 0.1])
Q, _ = nla.qr(np.random.randn(n, n))
A_eig = Q @ np.diag(lam_true) @ Q.T
lam_pi, x_pi, lam_history = power_iteration(A_eig)
print(f'Power iteration: lambda_1 = {lam_pi:.10f} (true: {lam_true[0]:.1f})')
print(f'Converged in {len(lam_history)} iterations')
print()
# Verify convergence rate
ratio = lam_true[1] / lam_true[0] # lambda_2 / lambda_1
print(f'Predicted convergence rate: |lambda_2/lambda_1| = {ratio:.3f}')
print(f'Predicted iterations to 1e-6: ~{int(-6/np.log10(ratio)) + 1}')
ok = abs(lam_pi - lam_true[0]) / lam_true[0] < 1e-10
print(f"{'PASS' if ok else 'FAIL'} - power iteration converged to correct eigenvalue")
8. Condition Number and Gradient Descent Convergence
Show that directly determines GD convergence rate.
Code cell 23
# === 8.1 Gradient Descent Convergence vs kappa ===
def gradient_descent(H, b, eta=None, n_iters=500):
"""GD on quadratic f(x) = 0.5 x^T H x - b^T x."""
n = len(b)
x = np.zeros(n)
x_star = nla.solve(H, b)
lam_max = nla.eigvalsh(H).max()
lam_min = nla.eigvalsh(H).min()
if eta is None:
eta = 2.0 / (lam_max + lam_min) # Optimal step
errors = [nla.norm(x - x_star)]
for _ in range(n_iters):
grad = H @ x - b
x -= eta * grad
errors.append(nla.norm(x - x_star))
return x, errors
np.random.seed(42)
n = 50
kappa_vals = [1, 10, 100, 1000]
gd_errors = {}
for kappa in kappa_vals:
lam = np.concatenate([np.ones(n//2), np.ones(n//2) * kappa])
Q, _ = nla.qr(np.random.randn(n, n))
H = Q @ np.diag(lam) @ Q.T
b_vec = np.random.randn(n)
_, errors = gradient_descent(H, b_vec, n_iters=300)
gd_errors[kappa] = [e / errors[0] for e in errors]
if HAS_MPL:
fig, ax = plt.subplots(figsize=(10, 5))
color_list = [COLORS['primary'], COLORS['secondary'],
COLORS['tertiary'], COLORS['error']]
for (kappa, errors), col in zip(gd_errors.items(), color_list):
iters = np.arange(len(errors))
rate = (kappa - 1) / (kappa + 1)
bound = rate ** iters
ax.semilogy(iters, errors, color=col, label=f'κ = {kappa}')
ax.semilogy(iters, bound, color=col, linestyle='--', alpha=0.5)
ax.set_xlabel('GD iteration k')
ax.set_ylabel('Relative error (log scale)')
ax.set_title('GD convergence vs condition number (solid: actual, dashed: bound)')
ax.legend()
plt.tight_layout()
plt.show()
print('Plot displayed.')
Summary
| Algorithm | Stability | Cost | When to use |
|---|---|---|---|
| LU (no pivot) | UNSTABLE | Never | |
| LU (partial pivot) | Backward stable* | General square systems | |
| Cholesky | Perfectly stable | SPD matrices | |
| Householder QR | Backward stable | Least squares | |
| Normal equations | UNSTABLE (squares κ) | Avoid for ill-cond. | |
| CG | Stable | Sparse SPD systems | |
| GMRES | Stable | General sparse systems |
Key rule: Forward error ≤ κ(A) × backward error. A backward stable algorithm gives the best possible accuracy — the condition number is the fundamental limit.
Next: Numerical Optimization →