Exercises Notebook
Converted from
exercises.ipynbfor web reading.
Interpolation and Approximation — Exercises
Practice connects theory to the algorithms that matter.
Companion: notes.md | theory.ipynb
Difficulty: ★ = Straightforward | ★★ = Moderate | ★★★ = Challenging
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
import numpy as np
import numpy.linalg as nla
from scipy import stats
import scipy.linalg as la
from scipy.fft import fft, ifft, fftfreq
from scipy import interpolate
try:
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style='whitegrid', palette='colorblind')
except ImportError:
plt.style.use('seaborn-v0_8-whitegrid')
mpl.rcParams.update({
'figure.figsize': (10, 6), 'figure.dpi': 120,
'font.size': 13, 'axes.titlesize': 15,
'axes.spines.top': False, 'axes.spines.right': False,
})
HAS_MPL = True
except ImportError:
HAS_MPL = False
COLORS = {'primary': '#0077BB', 'secondary': '#EE7733',
'tertiary': '#009988', 'error': '#CC3311', 'neutral': '#555555'}
np.set_printoptions(precision=8, suppress=True)
np.random.seed(42)
print('Setup complete.')
print("Chapter helper setup complete.")
def header(title):
print("\n" + "=" * len(title))
print(title)
print("=" * len(title))
def check_true(name, cond):
print(f"{'PASS' if cond else 'FAIL'} - {name}")
return bool(cond)
def check_close(name, got, expected, tol=1e-8):
ok = np.allclose(got, expected, atol=tol, rtol=tol)
print(f"{'PASS' if ok else 'FAIL'} - {name}")
if not ok:
print(' expected:', expected)
print(' got :', got)
return bool(ok)
print("Chapter helper setup complete.")
Exercise 1 ★ — Newton Divided Differences and Horner's Rule
Newton's form of the interpolating polynomial uses divided differences:
(a) Implement divided_differences(nodes, values) that returns the Newton coefficients (first row of the divided difference table).
(b) Implement newton_eval(nodes, coeffs, x) using Horner's rule: per evaluation.
(c) For the data :
- Compute and print the full divided difference table
- Find the underlying polynomial form
- Verify that the interpolant at matches the analytical value
(d) Show that the 4th divided difference . Test with at 5 equispaced points.
Code cell 5
# Your Solution
# Exercise 1 — Scaffold
def divided_differences(nodes, values):
"""
Compute Newton divided difference coefficients.
Returns: 1D array of n+1 coefficients.
"""
print("Learner scaffold loaded; write your solution here, then compare below.")
Code cell 6
# Solution
def divided_differences_sol(nodes, values):
n = len(nodes)
table = np.zeros((n, n))
table[:, 0] = values.copy()
for j in range(1, n):
for i in range(n - j):
table[i, j] = (table[i+1, j-1] - table[i, j-1]) / (nodes[i+j] - nodes[i])
return table[0, :]
def newton_eval_sol(nodes, coeffs, x):
n = len(coeffs) - 1
p = coeffs[n]
for i in range(n-1, -1, -1):
p = p * (x - nodes[i]) + coeffs[i]
return p
# (c)
nodes = np.array([0.0, 1.0, 2.0, 3.0, 4.0])
values = np.array([1.0, 3.0, 7.0, 13.0, 21.0])
coeffs = divided_differences_sol(nodes, values)
print('Newton coefficients:', coeffs)
print('Polynomial: p(x) = c0 + c1(x-0) + c2(x-0)(x-1) + ...')
# f(x) = (x+1)^2 = x^2 + 2x + 1
x_test = 2.5
p_newton = newton_eval_sol(nodes, coeffs, x_test)
f_true = (x_test+1)**2
print(f'\np(2.5) = {p_newton:.6f}, (x+1)^2 at x=2.5 = {f_true:.6f}')
print(f'Error: {abs(p_newton - f_true):.2e}')
ok = abs(p_newton - f_true) < 1e-10
print(f'PASS: Newton interpolant exact' if ok else 'FAIL')
# (d) Divided difference as derivative
f_exp = np.exp
h = 0.5
nodes4 = np.arange(5) * h
coeffs4 = divided_differences_sol(nodes4, f_exp(nodes4))
xi = np.mean(nodes4) # midpoint
print(f'\n[f0,...,f4] = {coeffs4[4]:.8f}')
print(f'f^(4)(xi)/4! = {np.exp(xi)/24:.8f} (f^(4) = f for exp)')
print(f'Error: {abs(coeffs4[4] - np.exp(xi)/24):.2e}')
Exercise 2 ★ — Runge's Phenomenon and Lebesgue Constants
(a) For on , compute and print the maximum interpolation error at using equispaced nodes. Show the error grows.
(b) Repeat with Chebyshev nodes of the 2nd kind. Show the error decreases.
(c) Implement lebesgue_constant(nodes) that samples the Lebesgue function at 1000 points and returns its maximum. Compute for both node types at and show the exponential growth for equispaced vs logarithmic for Chebyshev.
(d) Plot: (i) the Runge function and both degree-15 interpolants; (ii) the Lebesgue function for equispaced and Chebyshev nodes at .
Code cell 8
# Your Solution
# Exercise 2 — Scaffold
f_runge = lambda x: 1.0 / (1 + 25*x**2)
def barycentric_weights_general(nodes):
"""Barycentric weights for arbitrary nodes."""
print("Learner scaffold loaded; write your solution here, then compare below.")
Code cell 9
# Solution
f_runge = lambda x: 1.0 / (1 + 25*x**2)
def bary_weights_sol(nodes):
n = len(nodes)
w = np.ones(n)
for j in range(n):
for k in range(n):
if k != j:
w[j] *= (nodes[j] - nodes[k])
return 1.0 / w
def cheb2_w_sol(n):
w = np.ones(n+1)
w[0] = 0.5; w[n] = 0.5
w[1::2] = -1.0
return w
def bary_eval_pt(x, nodes, vals, weights):
diffs = x - nodes
mask = np.abs(diffs) < 1e-15
if mask.any(): return vals[mask][0]
wd = weights / diffs
return np.dot(wd, vals) / wd.sum()
def lebesgue_const_sol(nodes, n_sample=1000):
w = bary_weights_sol(nodes)
x_s = np.linspace(-1, 1, n_sample)
Lambda = []
for x in x_s:
diffs = x - nodes
if np.any(np.abs(diffs) < 1e-15):
Lambda.append(1.0)
continue
wd = w / diffs
Lambda.append(np.abs(wd / wd.sum()).sum())
return max(Lambda)
x_plot = np.linspace(-1, 1, 500)
f_exact = f_runge(x_plot)
print(f'{'n':>4} {'Equispaced error':>18} {'Chebyshev error':>16} {'Lambda_eq':>10} {'Lambda_ch':>10}')
for n in [5, 10, 15, 20]:
# Equispaced
neq = np.linspace(-1, 1, n+1)
weq = bary_weights_sol(neq)
yeq = np.array([bary_eval_pt(xi, neq, f_runge(neq), weq) for xi in x_plot])
err_eq = np.abs(yeq - f_exact).max()
# Chebyshev
nch = np.cos(np.pi * np.arange(n+1) / n)
wch = cheb2_w_sol(n)
ych = np.array([bary_eval_pt(xi, nch, f_runge(nch), wch) for xi in x_plot])
err_ch = np.abs(ych - f_exact).max()
# Lebesgue constants
Leq = lebesgue_const_sol(neq)
Lch = lebesgue_const_sol(nch)
print(f'{n:>4} {err_eq:>18.4e} {err_ch:>16.4e} {Leq:>10.2f} {Lch:>10.4f}')
Exercise 3 ★ — Natural Cubic Spline from Scratch
(a) Implement thomas_solve(lower, diag, upper, rhs) for solving tridiagonal systems in operations.
(b) Use it to implement natural_cubic_spline(x, y) that returns the second derivatives (with ).
(c) Test on with equispaced nodes. Verify the convergence by showing the ratio of consecutive errors approaches as doubles.
(d) Demonstrate the minimum curvature property: compute for the natural spline and for a piecewise linear interpolant. Show the spline has smaller 'bending energy'.
Code cell 11
# Your Solution
# Exercise 3 — Scaffold
def thomas_solve(lower, diag, upper, rhs):
"""
Thomas algorithm for tridiagonal system Ax = rhs.
lower: subdiagonal (length n-1)
diag: main diagonal (length n)
upper: superdiagonal (length n-1)
rhs: right-hand side (length n)
Returns: solution x (length n)
"""
n = len(diag)
print("Learner scaffold loaded; write your solution here, then compare below.")
Code cell 12
# Solution
def thomas_solve_sol(lower, diag, upper, rhs):
n = len(diag)
d = diag.copy().astype(float)
r = rhs.copy().astype(float)
# Forward elimination
for i in range(1, n):
m = lower[i-1] / d[i-1]
d[i] -= m * upper[i-1]
r[i] -= m * r[i-1]
# Back substitution
x = np.zeros(n)
x[-1] = r[-1] / d[-1]
for i in range(n-2, -1, -1):
x[i] = (r[i] - upper[i] * x[i+1]) / d[i]
return x
def nat_spline_sol(x, y):
n = len(x) - 1
h = np.diff(x)
rhs = 6*((np.diff(y[1:])/h[1:]) - (np.diff(y[:-1])/h[:-1]))
diag = 2*(h[:-1] + h[1:])
off = h[1:-1]
M_inner = thomas_solve_sol(off, diag, off, rhs)
M = np.zeros(n+1)
M[1:-1] = M_inner
return M
def eval_spline_sol(xi, xn, yn, M):
h = np.diff(xn)
i = int(np.clip(np.searchsorted(xn, xi, 'right')-1, 0, len(xn)-2))
hi = h[i]
t = xi - xn[i]; tp = xn[i+1] - xi
return (M[i]*tp**3/(6*hi) + M[i+1]*t**3/(6*hi)
+ (yn[i] - M[i]*hi**2/6)*tp/hi
+ (yn[i+1] - M[i+1]*hi**2/6)*t/hi)
f_test = lambda x: np.sin(np.pi * x)
xf = np.linspace(0, 1, 1000)
errors = []
for n in [8, 16, 32, 64]:
xn = np.linspace(0, 1, n+1)
yn = f_test(xn)
M = nat_spline_sol(xn, yn)
ys = np.array([eval_spline_sol(xi, xn, yn, M) for xi in xf])
errors.append(np.abs(ys - f_test(xf)).max())
print('Cubic spline convergence:')
ns = [8, 16, 32, 64]
for i, (n, e) in enumerate(zip(ns, errors)):
ratio = errors[i-1]/e if i > 0 else float('nan')
print(f' n={n:>3}: error={e:.4e}, ratio={ratio:.2f} (target 16)')
ok = all(abs(errors[i-1]/errors[i] - 16) < 3 for i in range(1, len(errors)))
print(f'PASS: O(h^4) convergence' if ok else 'WARN: check convergence')
Exercise 4 ★★ — Vandermonde Conditioning and QR Stability
(a) Construct the Vandermonde matrix for noisy observations from on , polynomial degree . Compute and .
(b) Fit the degree-15 polynomial via: (i) normal equations ; (ii) QR decomposition. Compare the fitted polynomial's coefficients and test-set error.
(c) Add increasing levels of noise (). Show that the normal equations are much more sensitive to noise than QR.
(d) Repeat with Legendre polynomial basis instead of monomials. Show the Legendre Vandermonde is much better conditioned.
Code cell 14
# Your Solution
# Exercise 4 — Scaffold
# Data setup
np.random.seed(42)
m = 50 # observations
n = 15 # polynomial degree
x_data = np.linspace(-1, 1, m)
f_true = np.cos(3 * x_data)
def vandermonde(x, n):
"""Vandermonde matrix V with V[i,j] = x[i]^j."""
print("Learner scaffold loaded; write your solution here, then compare below.")
Code cell 15
# Solution
np.random.seed(42)
m, n = 50, 15
x_data = np.linspace(-1, 1, m)
f_true = np.cos(3 * x_data)
x_test = np.linspace(-1, 1, 200)
def vander_sol(x, n):
return np.vander(x, n+1, increasing=True)
V = vander_sol(x_data, n)
kappa_V = np.linalg.cond(V)
kappa_VtV = np.linalg.cond(V.T @ V)
print(f'kappa(V) = {kappa_V:.2e}')
print(f'kappa(V^T V) = {kappa_VtV:.2e}')
print(f'Ratio kappa(V^T V)/kappa(V)^2 = {kappa_VtV/kappa_V**2:.4f} (should be ~1)')
# (b)
sigma = 1e-3
y_noisy = f_true + sigma * np.random.randn(m)
# Normal equations
try:
c_ne = la.solve(V.T @ V, V.T @ y_noisy)
V_test = vander_sol(x_test, n)
pred_ne = V_test @ c_ne
except Exception as e:
c_ne = np.nan * np.ones(n+1)
print(f'Normal equations failed: {e}')
# QR
Q, R = la.qr(V, mode='economic')
c_qr = la.solve_triangular(R, Q.T @ y_noisy)
V_test = vander_sol(x_test, n)
pred_qr = V_test @ c_qr
true_test = np.cos(3 * x_test)
print(f'\nTest error (sigma={sigma:.0e}):')
if not np.any(np.isnan(c_ne)):
print(f' Normal eq: {np.abs(pred_ne - true_test).max():.4e}')
print(f' QR: {np.abs(pred_qr - true_test).max():.4e}')
# (d) Legendre basis
from numpy.polynomial.legendre import legvander
V_leg = legvander(x_data, n)
kappa_leg = np.linalg.cond(V_leg)
print(f'\nLegendre Vandermonde kappa: {kappa_leg:.4f} (vs monomial {kappa_V:.2e})')
Exercise 5 ★★ — FFT, Spectral Differentiation, and Filtering
(a) Implement fft_diff(y, L) that computes the derivative of a periodic signal on via FFT:
Test on and compare to the analytical derivative.
(b) Implement a low-pass FFT filter that zeros out all frequency components with . Apply it to a noisy signal and compare with a moving average filter of equivalent width.
(c) Demonstrate Gibbs phenomenon: Fourier series truncated at terms for a square wave shows overshoot near discontinuities regardless of . Show the overshoot for .
Code cell 17
# Your Solution
# Exercise 5 — Scaffold
def fft_diff(y, L=2*np.pi):
"""
Compute spectral derivative of periodic signal y on [0, L].
Returns: dy/dx at the same grid points.
"""
n = len(y)
print("Learner scaffold loaded; write your solution here, then compare below.")
Code cell 18
# Solution
def fft_diff_sol(y, L=2*np.pi):
n = len(y)
Y = fft(y)
k = fftfreq(n) * n # Frequencies: 0, 1, ..., n/2-1, -n/2, ..., -1
dY = 1j * 2 * np.pi / L * k * Y
return np.real(ifft(dY))
# (a)
n = 256; L = 2*np.pi
x = np.linspace(0, L, n, endpoint=False)
y = np.sin(3*x) + np.cos(5*x)
dy_true = 3*np.cos(3*x) - 5*np.sin(5*x)
dy_fft = fft_diff_sol(y, L)
err = np.abs(dy_fft - dy_true).max()
print(f'Spectral differentiation error: {err:.2e}')
print(f'PASS: spectral diff near machine precision' if err < 1e-10 else f'FAIL: {err}')
# (b) Low-pass filter
def lowpass_fft(y, k_cut):
Y = fft(y)
k = fftfreq(len(y)) * len(y)
Y_filtered = Y.copy()
Y_filtered[np.abs(k) > k_cut] = 0
return np.real(ifft(Y_filtered))
noise = 0.3 * np.random.randn(n)
y_noisy = np.sin(3*x) + noise
y_filtered = lowpass_fft(y_noisy, k_cut=5)
err_fft = np.abs(y_filtered - np.sin(3*x)).max()
print(f'Low-pass filter error: {err_fft:.4f}')
# (c) Gibbs phenomenon
def square_wave_fourier(x, N):
"""Truncated Fourier series of square wave (period 2pi)."""
s = np.zeros_like(x)
for k in range(1, N+1, 2): # odd harmonics
s += 4/(np.pi*k) * np.sin(k*x)
return s
print('\nGibbs phenomenon - max overshoot near discontinuity:')
x_gibbs = np.linspace(0, 2*np.pi, 10000, endpoint=False)
for N in [10, 50, 200]:
s = square_wave_fourier(x_gibbs, N)
overshoot = (s.max() - 1.0) * 100 # Percent above 1
print(f' N={N:>4}: max = {s.max():.4f}, overshoot ≈ {overshoot:.1f}%')
print('Gibbs constant: ~8.9% regardless of N')
Exercise 6 ★★ — Chebyshev Approximation
(a) Compute the first 20 Chebyshev expansion coefficients of on using the DCT. Show the decay of coefficients (since but not ).
(b) Compute the first 20 Chebyshev coefficients of . Show the exponential decay consistent with the function being analytic.
(c) Implement Clenshaw's algorithm for evaluating a Chebyshev series in . Compare its accuracy vs naive evaluation for large .
(d) For , compare: (i) minimax error for (approximated using the Chebyshev truncation), (ii) equispaced interpolation error, (iii) Chebyshev interpolation error.
Code cell 20
# Your Solution
# Exercise 6 — Scaffold
def cheb2_nodes(n):
"""Chebyshev nodes of 2nd kind on [-1,1]."""
return np.cos(np.pi * np.arange(n+1) / n)
def cheb_coefficients(f_vals):
"""
Chebyshev expansion coefficients c_k via DCT-I.
f_vals: function values at Chebyshev 2nd kind nodes (n+1 values).
"""
from scipy.fft import dct
print("Learner scaffold loaded; write your solution here, then compare below.")
Code cell 21
# Solution
from scipy.fft import dct
def cheb_nodes_sol(n):
return np.cos(np.pi * np.arange(n+1) / n)
def cheb_coeffs_sol(f_vals):
n = len(f_vals) - 1
c = dct(f_vals, type=1) / n
c[0] /= 2; c[-1] /= 2
return c
def clenshaw_sol(c, x):
n = len(c) - 1
b2, b1 = 0.0, 0.0
for k in range(n, 0, -1):
b2, b1 = b1, 2*x*b1 - b2 + c[k]
return x*b1 - b2 + c[0]
# (a) |x|
n_cheb = 30
xn = cheb_nodes_sol(n_cheb)
c_abs = cheb_coeffs_sol(np.abs(xn))
print('Chebyshev coefficients of |x|:')
print(f'k |c_k| k^2 |c_k|')
for k in [1, 2, 4, 8, 16, 24]:
print(f'{k:>3} {abs(c_abs[k]):.6f} {k**2*abs(c_abs[k]):.6f}')
# (b) e^x
c_exp = cheb_coeffs_sol(np.exp(xn))
print('\nChebyshev coefficients of e^x:')
for k in [1, 2, 4, 8, 16, 24]:
print(f'k={k}: |c_k| = {abs(c_exp[k]):.6e}')
# (c) Clenshaw accuracy
x_eval = 0.7
exact = np.exp(x_eval)
clenshaw_val = clenshaw_sol(c_exp, x_eval)
naive_val = sum(c_exp[k]*np.polynomial.chebyshev.chebval(x_eval, [0]*k+[1]) for k in range(len(c_exp)))
print(f'\ne^(0.7) = {exact:.10f}')
print(f'Clenshaw: {clenshaw_val:.10f}, error = {abs(clenshaw_val-exact):.2e}')
print(f'Naive: {naive_val:.10f}, error = {abs(naive_val-exact):.2e}')
Exercise 7 ★★ — RBF Interpolation and Kernel Ridge Regression
(a) Implement Gaussian RBF interpolation for 1D scattered data: build the kernel matrix and solve .
(b) Show that kernel ridge regression (KRR) with regularization :
interpolates exactly when and smooths when . Plot the KRR predictor for .
(c) The condition number of grows rapidly as . For equispaced points and , plot vs and find the optimal that minimizes test error on the function .
Code cell 23
# Your Solution
# Exercise 7 — Scaffold
def gaussian_rbf_matrix(x, eps):
"""Build n×n Gaussian RBF kernel matrix."""
print("Learner scaffold loaded; write your solution here, then compare below.")
Code cell 24
# Solution
def gauss_rbf_mat(x, eps):
X = np.array(x).reshape(-1, 1)
dists_sq = (X - X.T)**2
return np.exp(-eps**2 * dists_sq)
def rbf_eval_vec(x_new, x_data, c, eps):
x_new = np.array(x_new).reshape(-1, 1)
x_data_r = np.array(x_data).reshape(1, -1)
K = np.exp(-eps**2 * (x_new - x_data_r)**2)
return K @ c
f_rbf = np.sin
x_data = np.linspace(-np.pi, np.pi, 15)
y_data = f_rbf(3*x_data)
x_test = np.linspace(-np.pi, np.pi, 200)
eps = 1.0
# (b) KRR lambda sweep
print('KRR test error for different lambda:')
for lam in [0, 1e-6, 1e-3, 0.1]:
Phi = gauss_rbf_mat(x_data, eps)
c = la.solve(Phi + lam * np.eye(len(x_data)), y_data)
y_pred = rbf_eval_vec(x_test, x_data, c, eps)
err = np.abs(y_pred - f_rbf(3*x_test)).max()
print(f' lambda={lam:.0e}: test error = {err:.4f}')
# (c) Optimal epsilon
x_d2 = np.linspace(-2, 2, 20)
y_d2 = np.sin(3*x_d2)
x_te2 = np.linspace(-2, 2, 300)
eps_vals = np.linspace(0.1, 5, 40)
test_errs, conds = [], []
for ep in eps_vals:
Phi = gauss_rbf_mat(x_d2, ep)
conds.append(np.linalg.cond(Phi))
try:
c2 = la.solve(Phi + 1e-10*np.eye(20), y_d2)
y_pr = rbf_eval_vec(x_te2, x_d2, c2, ep)
test_errs.append(np.abs(y_pr - np.sin(3*x_te2)).max())
except Exception:
test_errs.append(np.inf)
best_eps = eps_vals[np.argmin(test_errs)]
print(f'\nOptimal epsilon: {best_eps:.2f}, test error: {min(test_errs):.4e}')
print(f'kappa(Phi) at optimal eps: {conds[np.argmin(test_errs)]:.2e}')
Exercise 8 ★★★ — Positional Encoding: Interpolation and Extrapolation
(a) Implement sinusoidal_pe(max_len, d) producing the standard transformer positional encodings. Verify the rotation property: .
(b) Implement linear position interpolation: when extending from training length to target length , scale positions as . For a model trained on and tested on , compare:
- Original PEs (no scaling, pure extrapolation)
- Linear interpolation Metric: cosine similarity between PE() and PE() should vary smoothly.
(c) Implement NTK-scaled interpolation (YaRN): scale only the high-frequency dimensions () and leave low-frequency dimensions unchanged. Show this better preserves the local position resolution than uniform linear interpolation.
(d) Compute the information content of each PE dimension: the effective period . Show that dimensions range from period (smallest, highest resolution) to (largest, global context).
Code cell 26
# Your Solution
# Exercise 8 — Scaffold
def sinusoidal_pe(max_len, d):
"""Sinusoidal positional encodings, shape (max_len, d)."""
pe = np.zeros((max_len, d))
print("Learner scaffold loaded; write your solution here, then compare below.")
Code cell 27
# Solution
def sinusoidal_pe_sol(max_len, d):
pe = np.zeros((max_len, d))
pos = np.arange(max_len)[:, np.newaxis]
div = np.exp(np.arange(0, d, 2) * (-np.log(10000.0) / d))
pe[:, 0::2] = np.sin(pos * div)
pe[:, 1::2] = np.cos(pos * div[:d//2])
return pe
d = 64
L_train = 128
PE_train = sinusoidal_pe_sol(L_train, d)
# (a) Rotation property
errors = []
for pos in [10, 50, 100]:
for k in [1, 5, 20]:
for i_dim in range(0, d, 8):
theta = 10000.0 ** (-(i_dim//2 * 2) / d)
R = np.array([[np.cos(k*theta), -np.sin(k*theta)],
[np.sin(k*theta), np.cos(k*theta)]])
expected = R @ PE_train[pos, i_dim:i_dim+2]
actual = PE_train[pos+k, i_dim:i_dim+2]
errors.append(np.abs(expected - actual).max())
print(f'Rotation property max error: {max(errors):.2e}')
print(f'PASS' if max(errors) < 1e-10 else 'FAIL')
# (b) Linear interpolation for context extension
L_target = 512
scale = L_train / L_target
PE_target_orig = sinusoidal_pe_sol(L_target, d)
PE_target_interp = sinusoidal_pe_sol(L_target, d)
def pe_interp(max_len, d, scale):
"""Linear interpolation PE: compress positions."""
pe = np.zeros((max_len, d))
pos = np.arange(max_len)[:, np.newaxis] * scale # Scaled positions
div = np.exp(np.arange(0, d, 2) * (-np.log(10000.0) / d))
pe[:, 0::2] = np.sin(pos * div)
pe[:, 1::2] = np.cos(pos * div[:d//2])
return pe
PE_interp = pe_interp(L_target, d, scale)
def cosine_sim(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
print('\nLocal similarity PE(pos) vs PE(pos+1) at various positions:')
print(f'{'pos':>6} {'Original':>10} {'Interpolated':>14}')
for pos in [100, 200, 300, 400, 500]:
if pos + 1 < L_target:
sim_orig = cosine_sim(PE_target_orig[pos], PE_target_orig[pos+1])
sim_interp = cosine_sim(PE_interp[pos], PE_interp[pos+1])
print(f'{pos:>6} {sim_orig:>10.4f} {sim_interp:>14.4f}')
# (d) Period analysis
print('\nPE dimension periods:')
for i in [0, 4, 8, 16, 24, 31]:
theta = 10000.0 ** (-(i * 2) / d)
T = 2 * np.pi / theta
print(f' dim {2*i:>3}: theta={theta:.4e}, period={T:.1f} tokens')
Exercise 9 *** - Chebyshev Approximation of GELU
Fit a low-degree Chebyshev approximation to the GELU activation on and measure max error.
Code cell 29
# Your Solution
print("Write your solution here, then compare with the reference solution below.")
Code cell 30
# Solution
from numpy.polynomial.chebyshev import chebfit, chebval
from scipy.special import erf
def gelu(x): return 0.5*x*(1+erf(x/np.sqrt(2)))
x=np.linspace(-3,3,400); y=gelu(x)
coef=chebfit(x/3,y,deg=8)
yhat=chebval(x/3,coef)
err=np.max(np.abs(y-yhat))
header('Exercise 9: Chebyshev GELU Approximation')
print(f'max error={err:.4e}')
check_true('degree-8 Chebyshev approximation is accurate on [-3,3]', err<0.03)
print('Takeaway: polynomial activation approximations are practical when the basis is numerically stable.')
Exercise 10 *** - RBF Kernel Interpolation
Interpolate noisy samples with an RBF kernel and compare interpolation error at training points.
Code cell 32
# Your Solution
print("Write your solution here, then compare with the reference solution below.")
Code cell 33
# Solution
def rbf(X,Y,ell): return np.exp(-((X[:,None]-Y[None,:])**2)/(2*ell**2))
x=np.linspace(0,1,20); y=np.sin(2*np.pi*x)
ell=0.15; lam=1e-8
K=rbf(x,x,ell)+lam*np.eye(len(x)); alpha=np.linalg.solve(K,y)
y_fit=rbf(x,x,ell)@alpha
err=np.max(np.abs(y_fit-y))
header('Exercise 10: RBF Interpolation')
print(f'train interpolation max error={err:.3e}')
check_true('RBF interpolant matches training values', err<1e-5)
print('Takeaway: kernel interpolation is the deterministic ancestor of many kernel and GP methods.')