Theory Notebook

Converted from theory.ipynb for web reading.

Interpolation and Approximation — Theory Notebook

"The art of interpolation is the art of making the most of what you know."

Interactive derivations covering: Lagrange/barycentric interpolation, Runge's phenomenon, Chebyshev nodes, cubic splines, least-squares fitting, FFT, and RBF/kernel methods.

Companion: notes.md | exercises.ipynb

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Code cell 3

import numpy as np
import scipy.linalg as la
from scipy import interpolate
from scipy.fft import fft, ifft, fftfreq, dct

try:
    import matplotlib.pyplot as plt
    import matplotlib as mpl
    try:
        import seaborn as sns
        sns.set_theme(style='whitegrid', palette='colorblind')
        HAS_SNS = True
    except ImportError:
        plt.style.use('seaborn-v0_8-whitegrid')
        HAS_SNS = False
    mpl.rcParams.update({
        'figure.figsize': (10, 6), 'figure.dpi': 120,
        'font.size': 13, 'axes.titlesize': 15, 'axes.labelsize': 13,
        'xtick.labelsize': 11, 'ytick.labelsize': 11,
        'legend.fontsize': 11, 'lines.linewidth': 2.0,
        'axes.spines.top': False, 'axes.spines.right': False,
    })
    HAS_MPL = True
except ImportError:
    HAS_MPL = False

COLORS = {
    'primary':   '#0077BB',
    'secondary': '#EE7733',
    'tertiary':  '#009988',
    'error':     '#CC3311',
    'neutral':   '#555555',
    'highlight': '#EE3377',
}

np.set_printoptions(precision=6, suppress=True)
np.random.seed(42)
print('Setup complete.')

print("Chapter helper setup complete.")

1. Polynomial Interpolation

We implement Lagrange and Newton divided difference interpolation, then compare their numerical properties.

Code cell 5

# === 1.1 Lagrange Interpolation ===

def lagrange_basis(nodes, j, x):
    """Evaluate the j-th Lagrange basis polynomial at x."""
    n = len(nodes) - 1
    result = 1.0
    for k in range(n + 1):
        if k != j:
            result *= (x - nodes[k]) / (nodes[j] - nodes[k])
    return result

def lagrange_interp(nodes, values, x):
    """Evaluate Lagrange interpolant at x."""
    return sum(values[j] * lagrange_basis(nodes, j, x)
               for j in range(len(nodes)))

# Test on simple data
nodes = np.array([0.0, 1.0, 2.0, 3.0])
values = np.array([1.0, 3.0, 7.0, 13.0])  # 1 + 2x + 0x^2 + 0x^3
x_test = 1.5
p = lagrange_interp(nodes, values, x_test)
print(f'Lagrange at x=1.5: {p:.6f} (expected: {1 + 2*1.5:.6f})')

x_plot = np.linspace(0, 3, 200)
y_interp = [lagrange_interp(nodes, values, xi) for xi in x_plot]
y_exact = 1 + 2*x_plot  # The true underlying function
print(f'Max error: {np.abs(np.array(y_interp) - y_exact).max():.2e}')

Code cell 6

# === 1.2 Barycentric Lagrange Interpolation ===

def barycentric_weights(nodes):
    """Compute barycentric weights w_j = 1 / prod_{k!=j}(x_j - x_k)."""
    n = len(nodes)
    w = np.ones(n)
    for j in range(n):
        for k in range(n):
            if k != j:
                w[j] *= (nodes[j] - nodes[k])
    return 1.0 / w

def cheb2_weights(n):
    """O(n) barycentric weights for Chebyshev 2nd kind nodes."""
    w = np.ones(n + 1)
    w[0] = 0.5; w[n] = 0.5
    w[1::2] = -1.0
    return w

def bary_eval(x, nodes, values, weights):
    """Evaluate barycentric interpolant at scalar x."""
    diffs = x - nodes
    exact_idx = np.where(np.abs(diffs) < 1e-15)[0]
    if len(exact_idx) > 0:
        return values[exact_idx[0]]
    wd = weights / diffs
    return np.dot(wd, values) / wd.sum()

# Compare general vs Chebyshev weights for n=5 Chebyshev nodes
n = 5
nodes_ch = np.cos(np.pi * np.arange(n+1) / n)  # Chebyshev 2nd kind
w_general = barycentric_weights(nodes_ch)
w_fast = cheb2_weights(n)

# Weights should be proportional
ratio = w_general / w_fast
print(f'Ratio w_general/w_fast (should be constant): {ratio.round(4)}')
print(f'Max deviation from constant: {np.std(ratio):.2e}')

ok = np.std(ratio) < 1e-10
print(f'PASS: barycentric weights consistent' if ok else 'FAIL')

2. Runge's Phenomenon and Chebyshev Nodes

Demonstrating that equispaced nodes cause divergence while Chebyshev nodes achieve near-optimal interpolation.

Code cell 8

# === 2.1 Runge's Phenomenon ===

f_runge = lambda x: 1.0 / (1 + 25*x**2)
x_plot = np.linspace(-1, 1, 500)
f_exact = f_runge(x_plot)

max_errors_eq = []
max_errors_ch = []
ns = range(2, 24)

for n in ns:
    # Equispaced
    nodes_eq = np.linspace(-1, 1, n+1)
    w_eq = barycentric_weights(nodes_eq)
    y_eq = np.array([bary_eval(xi, nodes_eq, f_runge(nodes_eq), w_eq)
                     for xi in x_plot])
    max_errors_eq.append(np.abs(y_eq - f_exact).max())

    # Chebyshev (2nd kind)
    nodes_ch = np.cos(np.pi * np.arange(n+1) / n)
    w_ch = cheb2_weights(n)
    y_ch = np.array([bary_eval(xi, nodes_ch, f_runge(nodes_ch), w_ch)
                     for xi in x_plot])
    max_errors_ch.append(np.abs(y_ch - f_exact).max())

print('Interpolation error for 1/(1+25x^2):')
print(f'  n=5:  equispaced={max_errors_eq[3]:.2e}, Chebyshev={max_errors_ch[3]:.2e}')
print(f'  n=10: equispaced={max_errors_eq[8]:.2e}, Chebyshev={max_errors_ch[8]:.2e}')
print(f'  n=20: equispaced={max_errors_eq[18]:.2e}, Chebyshev={max_errors_ch[18]:.2e}')
print(f'\nEquispaced diverges: {max_errors_eq[-1] > max_errors_eq[0]}')
print(f'Chebyshev converges: {max_errors_ch[-1] < max_errors_ch[0] * 0.1}')

Code cell 9

# === 2.2 Runge's Phenomenon Visualization ===

if HAS_MPL:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    # Left: convergence plot
    ax = axes[0]
    ax.semilogy(list(ns), max_errors_eq, 'o-', color=COLORS['error'],
                label='Equispaced (Runge diverges)', linewidth=2)
    ax.semilogy(list(ns), max_errors_ch, 's-', color=COLORS['primary'],
                label='Chebyshev (converges)', linewidth=2)
    ax.set_title("Runge's Phenomenon: Error vs Degree")
    ax.set_xlabel('Polynomial degree n')
    ax.set_ylabel('Max interpolation error')
    ax.legend()

    # Right: interpolants at n=15
    n = 15
    ax = axes[1]
    ax.plot(x_plot, f_exact, 'k-', linewidth=2, label='f(x) = 1/(1+25x²)')

    nodes_eq = np.linspace(-1, 1, n+1)
    w_eq = barycentric_weights(nodes_eq)
    y_eq = np.array([bary_eval(xi, nodes_eq, f_runge(nodes_eq), w_eq) for xi in x_plot])
    ax.plot(x_plot, y_eq, '--', color=COLORS['error'],
            label=f'Equispaced n={n}', linewidth=1.5)

    nodes_ch = np.cos(np.pi * np.arange(n+1) / n)
    w_ch = cheb2_weights(n)
    y_ch = np.array([bary_eval(xi, nodes_ch, f_runge(nodes_ch), w_ch) for xi in x_plot])
    ax.plot(x_plot, y_ch, '-', color=COLORS['primary'],
            label=f'Chebyshev n={n}', linewidth=2)

    ax.scatter(nodes_eq, f_runge(nodes_eq), color=COLORS['error'], s=30, zorder=5)
    ax.scatter(nodes_ch, f_runge(nodes_ch), color=COLORS['primary'], s=40, zorder=5)
    ax.set_ylim(-2, 2)
    ax.set_title(f"Interpolants at n={n}")
    ax.set_xlabel('x'); ax.set_ylabel('p(x)')
    ax.legend()

    fig.tight_layout()
    plt.show()
    print('Plot displayed.')

Code cell 10

# === 2.3 Lebesgue Constants ===

def lebesgue_constant(nodes):
    """Compute Lebesgue constant for given nodes."""
    w = barycentric_weights(nodes)
    x_test = np.linspace(-1, 1, 1000)
    lebesgue_fn = []
    for x in x_test:
        diffs = x - nodes
        if np.any(np.abs(diffs) < 1e-15):
            lebesgue_fn.append(1.0)
            continue
        wd = w / diffs
        total = wd.sum()
        # |ell_j(x)| = |wd[j]/total|
        lebesgue_fn.append(np.abs(wd / total).sum())
    return max(lebesgue_fn)

print('Lebesgue constants:')
print(f'{'n':>4}  {'Equispaced':>12}  {'Chebyshev':>12}')
for n in [5, 10, 15, 20]:
    nodes_eq = np.linspace(-1, 1, n+1)
    nodes_ch = np.cos(np.pi * np.arange(n+1) / n)
    L_eq = lebesgue_constant(nodes_eq)
    L_ch = lebesgue_constant(nodes_ch)
    print(f'{n:>4}  {L_eq:>12.2f}  {L_ch:>12.4f}')

print('\nObservation: Equispaced constants grow exponentially,')
print('Chebyshev constants grow only logarithmically.')

3. Newton Divided Differences

The divided difference table enables incremental polynomial construction and connects interpolation to numerical differentiation.

Code cell 12

# === 3.1 Newton Divided Differences ===

def divided_differences(nodes, values):
    """Compute divided difference table. Returns coefficients."""
    n = len(nodes)
    table = np.zeros((n, n))
    table[:, 0] = values.copy()
    for j in range(1, n):
        for i in range(n - j):
            table[i, j] = (table[i+1, j-1] - table[i, j-1]) / (nodes[i+j] - nodes[i])
    return table[0, :]  # Newton coefficients

def newton_eval(nodes, coeffs, x):
    """Evaluate Newton interpolant via Horner's rule."""
    n = len(coeffs) - 1
    p = coeffs[n]
    for i in range(n-1, -1, -1):
        p = p * (x - nodes[i]) + coeffs[i]
    return p

# Example from notes
nodes = np.array([0.0, 1.0, 2.0, 3.0])
values = np.array([1.0, 3.0, 7.0, 13.0])
coeffs = divided_differences(nodes, values)
print('Newton coefficients (divided differences):')
print(coeffs.round(6))

# Evaluate at several points
x_tests = [0.5, 1.5, 2.5]
true_vals = [1 + 2*x for x in x_tests]  # f(x) = 1 + 2x
for x, y_true in zip(x_tests, true_vals):
    y_newton = newton_eval(nodes, coeffs, x)
    print(f'  x={x}: Newton={y_newton:.6f}, true={y_true:.6f}, err={abs(y_newton-y_true):.2e}')

# Verify 3rd divided difference ≈ f'''(xi)/6 = 0 for linear function
print(f'\n3rd div diff (should be 0 for f=1+2x): {coeffs[3]:.2e}')

Code cell 13

# === 3.2 Divided Difference as Derivative ===

# For f(x) = sin(x), [f0,f1,f2] ≈ f''(xi)/2
f = np.sin
x0 = 0.5
h_vals = [0.5, 0.1, 0.01, 0.001]

print('2nd divided difference vs f\'\'(x0)/2:')
print(f'f\'\'(x0)/2 = {-np.sin(x0)/2:.8f}')
for h in h_vals:
    nodes3 = np.array([x0, x0+h, x0+2*h])
    coeffs3 = divided_differences(nodes3, f(nodes3))
    print(f'  h={h:.3f}: [f0,f1,f2] = {coeffs3[2]:.8f}, '
          f'error = {abs(coeffs3[2] + np.sin(x0)/2):.2e}')

4. Cubic Spline Interpolation

Construct a natural cubic spline from scratch and verify its properties.

Code cell 15

# === 4.1 Natural Cubic Spline Construction ===

def natural_cubic_spline(x, y):
    """
    Compute natural cubic spline M values (2nd derivatives at nodes).
    Returns: M array of shape (n+1,)
    """
    n = len(x) - 1
    h = np.diff(x)

    # Right-hand side: 6 * second divided differences of y
    rhs = np.zeros(n - 1)
    for i in range(1, n):
        rhs[i-1] = 6 * ((y[i+1]-y[i])/h[i] - (y[i]-y[i-1])/h[i-1])

    # Tridiagonal system (Thomas algorithm)
    diag = 2*(h[:-1] + h[1:])  # Main diagonal
    off  = h[1:-1]             # Off diagonal

    # Forward elimination
    for i in range(1, n-1):
        factor = off[i-1] / diag[i-1]
        diag[i] -= factor * off[i-1]
        rhs[i]  -= factor * rhs[i-1]

    # Back substitution
    M = np.zeros(n + 1)
    M[n-1] = rhs[-1] / diag[-1]
    for i in range(n-2, 0, -1):
        M[i] = (rhs[i-1] - off[i-1] * M[i+1]) / diag[i-1]
    # M[0] = M[n] = 0 (natural BC)

    return M

def eval_spline(x_eval, x_nodes, y_nodes, M):
    """Evaluate natural cubic spline at x_eval."""
    h = np.diff(x_nodes)
    i = np.searchsorted(x_nodes, x_eval, side='right') - 1
    i = int(np.clip(i, 0, len(x_nodes) - 2))
    hi = h[i]
    t = x_eval - x_nodes[i]
    tp = x_nodes[i+1] - x_eval
    return (M[i]*tp**3/(6*hi) + M[i+1]*t**3/(6*hi)
            + (y_nodes[i] - M[i]*hi**2/6)*tp/hi
            + (y_nodes[i+1] - M[i+1]*hi**2/6)*t/hi)

# Test: interpolate sin(pi*x) on [0,1]
n_nodes = 8
x_nodes = np.linspace(0, 1, n_nodes + 1)
f_test = lambda x: np.sin(np.pi * x)
y_nodes = f_test(x_nodes)

M = natural_cubic_spline(x_nodes, y_nodes)
x_fine = np.linspace(0, 1, 500)
y_spline = np.array([eval_spline(xi, x_nodes, y_nodes, M) for xi in x_fine])
y_exact = f_test(x_fine)

h = x_nodes[1] - x_nodes[0]
error = np.abs(y_spline - y_exact).max()
print(f'n={n_nodes} nodes, h={h:.4f}')
print(f'Max error: {error:.2e}')
print(f'Expected O(h^4): {h**4:.2e}')
print(f'Ratio error/h^4: {error/h**4:.4f} (should be ~5/384 * ||f^(4)||)')

# Verify natural BC: M[0] and M[n] should be 0
print(f'M[0]={M[0]:.2e}, M[n]={M[-1]:.2e} (should be 0 for natural BC)')

Code cell 16

# === 4.2 Spline Error Scaling Verification ===

# Verify O(h^4) convergence
n_vals = [4, 8, 16, 32, 64]
errors = []
hs = []

for n_n in n_vals:
    xn = np.linspace(0, 1, n_n + 1)
    yn = f_test(xn)
    Mn = natural_cubic_spline(xn, yn)
    xf = np.linspace(0, 1, 1000)
    ys = np.array([eval_spline(xi, xn, yn, Mn) for xi in xf])
    errors.append(np.abs(ys - f_test(xf)).max())
    hs.append(1.0/n_n)

print('Cubic spline convergence (should be O(h^4)):')
print(f'  {'n':>5}  {'h':>8}  {'error':>12}  {'ratio':>8}')
for i, (n_n, h_val, e) in enumerate(zip(n_vals, hs, errors)):
    ratio = errors[i-1]/e if i > 0 else float('nan')
    print(f'  {n_n:>5}  {h_val:>8.5f}  {e:>12.4e}  {ratio:>8.2f}')

# Theoretical: halving h should reduce error by 2^4 = 16
ok = all(1.0/errors[i+1] * errors[i] > 10 for i in range(len(errors)-1))
print(f'\nPASS: O(h^4) convergence' if ok else 'WARN: check convergence')

Code cell 17

# === 4.3 Spline Visualization ===

if HAS_MPL:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    # Left: spline vs exact
    ax = axes[0]
    ax.plot(x_fine, y_exact, 'k-', linewidth=2, label='$\\sin(\\pi x)$')
    ax.plot(x_fine, y_spline, '--', color=COLORS['primary'],
            linewidth=2, label='Natural cubic spline')
    ax.scatter(x_nodes, y_nodes, color=COLORS['error'], s=60, zorder=5, label='Nodes')
    ax.set_title('Natural Cubic Spline Interpolation')
    ax.set_xlabel('x'); ax.set_ylabel('y')
    ax.legend()

    # Right: convergence
    ax = axes[1]
    ax.loglog(hs, errors, 'o-', color=COLORS['primary'], linewidth=2, label='Spline error')
    h_ref = np.array(hs)
    ax.loglog(h_ref, h_ref**4 * errors[0]/hs[0]**4, '--',
              color=COLORS['neutral'], label='$O(h^4)$ reference')
    ax.set_title('Cubic Spline Convergence')
    ax.set_xlabel('Mesh spacing h')
    ax.set_ylabel('Max error')
    ax.legend()

    fig.tight_layout()
    plt.show()
    print('Plot displayed.')

5. Least-Squares Polynomial Approximation

Demonstrating the Vandermonde conditioning problem and QR as the stable solution.

Code cell 19

# === 5.1 Vandermonde Conditioning ===

print('Vandermonde matrix condition numbers:')
print(f'  {'n':>4}  {'kappa(V)':>14}  {'kappa(V^T V)':>16}')

for n in [5, 10, 15, 20]:
    nodes = np.linspace(-1, 1, n + 1)
    V = np.vander(nodes, n+1, increasing=True)  # Vandermonde
    kappa_V = np.linalg.cond(V)
    kappa_VtV = np.linalg.cond(V.T @ V)
    print(f'  {n:>4}  {kappa_V:>14.2e}  {kappa_VtV:>16.2e}')

print('\nNormal equations square the condition number: kappa(V^T V) = kappa(V)^2')

# For n=15: compare normal equations vs QR
np.random.seed(42)
n = 15
m = 100  # More data than parameters
x_data = np.linspace(-1, 1, m)
f_true = np.sin(3*x_data)
noise = 0.05 * np.random.randn(m)
y_data = f_true + noise

V = np.vander(x_data, n+1, increasing=True)

# Normal equations (unstable for high n)
try:
    c_ne = la.solve(V.T @ V, V.T @ y_data)
    resid_ne = np.linalg.norm(V @ c_ne - y_data)
    print(f'\nNormal eq residual: {resid_ne:.4e}')
except Exception as e:
    print(f'Normal eq failed: {e}')

# QR (stable)
Q, R = la.qr(V, mode='economic')
c_qr = la.solve_triangular(R, Q.T @ y_data)
resid_qr = np.linalg.norm(V @ c_qr - y_data)
print(f'QR decomp residual: {resid_qr:.4e}')
print(f'Coefficient norm: NE={np.linalg.norm(c_ne):.2f}, QR={np.linalg.norm(c_qr):.2f}')

6. Fourier Approximation and the FFT

Trigonometric interpolation via FFT, with application to convolution.

Code cell 21

# === 6.1 DFT and FFT Verification ===

# Create a signal with known frequency content
n = 128
t = np.linspace(0, 1, n, endpoint=False)
f_signal = np.sin(2*np.pi*5*t) + 0.5*np.cos(2*np.pi*13*t) + 0.2*np.sin(2*np.pi*25*t)

# FFT
F = fft(f_signal)
freqs = fftfreq(n) * n  # Frequencies in cycles

# Dominant frequencies
power = np.abs(F)**2 / n
top_idx = np.argsort(power)[::-1][:6]
print('Top 6 frequency components:')
for idx in top_idx:
    if freqs[idx] >= 0:
        print(f'  freq={freqs[idx]:.1f} Hz, power={power[idx]:.4f}')

# Reconstruct via IFFT
f_reconstructed = np.real(ifft(F))
ok = np.allclose(f_reconstructed, f_signal, atol=1e-12)
print(f'\nPASS: FFT/IFFT reconstruction' if ok else 'FAIL')

# Parseval's theorem: ||f||^2 = (1/n)||F||^2
lhs = np.sum(f_signal**2)
rhs = np.sum(np.abs(F)**2) / n
print(f'Parseval: ||f||^2={lhs:.6f}, ||F||^2/n={rhs:.6f}')
print(f'PASS: Parseval theorem' if abs(lhs-rhs) < 1e-10 else 'FAIL')

Code cell 22

# === 6.2 FFT Convolution ===

def fft_convolve(x, h):
    """Compute convolution x * h via FFT. Zero-padded to avoid aliasing."""
    n = len(x) + len(h) - 1
    # Next power of 2 for efficiency
    n_pad = 1 << int(np.ceil(np.log2(n)))
    X = fft(x, n=n_pad)
    H = fft(h, n=n_pad)
    return np.real(ifft(X * H))[:n]

# Test with Gaussian smoothing
n_sig = 256
t_sig = np.linspace(-4, 4, n_sig)
signal = np.zeros(n_sig)
signal[n_sig//3] = 1.0  # Impulse
signal[2*n_sig//3] = 0.5  # Smaller impulse

# Gaussian kernel
sigma = 0.3
kernel_size = 31
t_k = np.linspace(-3*sigma, 3*sigma, kernel_size)
kernel = np.exp(-t_k**2 / (2*sigma**2))
kernel /= kernel.sum()

# FFT convolution
smoothed_fft = fft_convolve(signal, kernel)

# Direct convolution (NumPy)
smoothed_direct = np.convolve(signal, kernel, mode='full')[:n_sig]

err = np.abs(smoothed_fft[:n_sig] - smoothed_direct).max()
print(f'FFT vs direct convolution max error: {err:.2e}')
ok = err < 1e-10
print(f'PASS: FFT convolution matches direct' if ok else f'FAIL: error {err}')

Code cell 23

# === 6.3 Chebyshev Coefficients via DCT ===

# Compute Chebyshev expansion of f(x) = |x| on [-1,1]

def cheb_coefficients(f_vals, n_coeffs=None):
    """Chebyshev coefficients via DCT-I (using Chebyshev 2nd kind nodes)."""
    n = len(f_vals) - 1
    from scipy.fft import dct
    c = dct(f_vals, type=1) / n
    c[0] /= 2; c[-1] /= 2
    if n_coeffs is not None:
        c = c[:n_coeffs]
    return c

# f(x) = |x|, smooth but with kink at x=0 -> algebraic convergence O(n^{-2})
n_cheb = 64
nodes_ch = np.cos(np.pi * np.arange(n_cheb+1) / n_cheb)
f_abs = np.abs(nodes_ch)
c = cheb_coefficients(f_abs)

print('Chebyshev coefficients of |x|:')
print('k  |c_k|')
for k in [0, 1, 2, 4, 8, 16, 32]:
    print(f'{k:>3}  {abs(c[k]):.6f}')

# Convergence rate: c_k ~ C * k^{-2} for |x| (C^1 but not C^2)
ks = np.arange(2, 30)
ratios = [abs(c[k]) * k**2 for k in ks if k < len(c)]
print(f'\nk^2 * |c_k| (should be ~constant for k^(-2) decay):')
print(np.array(ratios[:8]).round(4))

7. Radial Basis Function Interpolation

Implementing Gaussian RBF interpolation and connecting to kernel methods.

Code cell 25

# === 7.1 Gaussian RBF Interpolation ===

def rbf_kernel(x, y, eps=1.0):
    """Gaussian RBF: phi(r) = exp(-eps^2 * r^2)."""
    r = np.abs(x - y) if np.isscalar(x) else np.linalg.norm(x - y)
    return np.exp(-eps**2 * r**2)

def rbf_interpolate(x_data, y_data, eps=1.0):
    """Fit Gaussian RBF interpolant. Returns coefficients c."""
    n = len(x_data)
    Phi = np.array([[rbf_kernel(x_data[i], x_data[j], eps)
                    for j in range(n)] for i in range(n)])
    c = la.solve(Phi, y_data)
    return c, Phi

def rbf_eval(x_new, x_data, c, eps=1.0):
    """Evaluate RBF interpolant at x_new."""
    return sum(c[j] * rbf_kernel(x_new, x_data[j], eps)
               for j in range(len(x_data)))

# Test on sin(3x) data
f_rbf = lambda x: np.sin(3*x)
x_data = np.linspace(-2, 2, 15)
y_data = f_rbf(x_data)

x_fine = np.linspace(-2, 2, 300)
errors_by_eps = {}

for eps in [0.5, 1.0, 3.0]:
    c, Phi = rbf_interpolate(x_data, y_data, eps)
    kappa = np.linalg.cond(Phi)
    y_rbf = np.array([rbf_eval(xi, x_data, c, eps) for xi in x_fine])
    err = np.abs(y_rbf - f_rbf(x_fine)).max()
    errors_by_eps[eps] = err
    print(f'eps={eps:.1f}: kappa(Phi)={kappa:.2e}, max_error={err:.4e}')

print(f'\nBest epsilon: {min(errors_by_eps, key=errors_by_eps.get)}')

Code cell 26

# === 7.2 RBF Shape Parameter Effect ===

if HAS_MPL:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    # Left: interpolants for different epsilon
    ax = axes[0]
    ax.plot(x_fine, f_rbf(x_fine), 'k-', linewidth=2, label='True f')
    ax.scatter(x_data, y_data, s=60, color='k', zorder=5)

    for eps, color in [(0.5, COLORS['error']), (1.0, COLORS['primary']), (3.0, COLORS['tertiary'])]:
        c_eps, _ = rbf_interpolate(x_data, y_data, eps)
        y_eps = np.array([rbf_eval(xi, x_data, c_eps, eps) for xi in x_fine])
        ax.plot(x_fine, y_eps, '--', color=color, label=f'eps={eps}', linewidth=1.5)

    ax.set_ylim(-3, 3)
    ax.set_title('Gaussian RBF: Shape Parameter Effect')
    ax.set_xlabel('x'); ax.set_ylabel('y')
    ax.legend()

    # Right: condition number vs epsilon
    ax = axes[1]
    eps_vals = np.logspace(-1, 1.5, 40)
    kappas = []
    for eps in eps_vals:
        Phi = np.array([[rbf_kernel(x_data[i], x_data[j], eps)
                        for j in range(len(x_data))] for i in range(len(x_data))])
        kappas.append(np.linalg.cond(Phi))

    ax.semilogy(eps_vals, kappas, color=COLORS['primary'], linewidth=2)
    ax.axhline(1/np.finfo(float).eps, color=COLORS['error'],
               linestyle='--', label='Machine precision limit')
    ax.set_title('RBF Condition Number vs Shape Parameter')
    ax.set_xlabel('epsilon (shape parameter)')
    ax.set_ylabel('Condition number kappa(Phi)')
    ax.legend()

    fig.tight_layout()
    plt.show()
    print('Plot displayed.')

8. ML Application: Sinusoidal Positional Encodings

Analyzing transformer positional encodings from an interpolation theory perspective.

Code cell 28

# === 8.1 Sinusoidal Positional Encodings ===

def get_sinusoidal_pe(max_len, d_model):
    """Compute transformer sinusoidal positional encodings."""
    pe = np.zeros((max_len, d_model))
    pos = np.arange(max_len)[:, np.newaxis]
    div = np.exp(np.arange(0, d_model, 2) * (-np.log(10000) / d_model))
    pe[:, 0::2] = np.sin(pos * div)
    pe[:, 1::2] = np.cos(pos * div[:d_model//2])
    return pe

d = 64
max_seq = 512
PE = get_sinusoidal_pe(max_seq, d)

print(f'PE shape: {PE.shape}')
print(f'PE[0] (position 0, first 8 dims): {PE[0,:8].round(4)}')
print(f'PE[1] (position 1, first 8 dims): {PE[1,:8].round(4)}')

# Verify: PE(pos+k) = R_k * PE(pos) for a rotation matrix
# Check for a single dimension pair (i, i+1)
pos = 10
k = 50
i = 0  # dimension index
theta = 1.0 / (10000**(2*i/d))

# Expected: PE(pos+k)[i:i+2] = R(k*theta) @ PE(pos)[i:i+2]
pe_pos = PE[pos, i:i+2]
pe_pk = PE[pos+k, i:i+2]
R_k = np.array([[np.cos(k*theta), -np.sin(k*theta)],
                [np.sin(k*theta),  np.cos(k*theta)]])
pe_pk_pred = R_k @ pe_pos

err = np.abs(pe_pk - pe_pk_pred).max()
print(f'\nRotation property error (dim 0-1, pos={pos}, k={k}): {err:.2e}')
print(f'PASS: PE(pos+k) = R_k @ PE(pos)' if err < 1e-10 else 'FAIL')

Code cell 29

# === 8.2 PE Similarity Analysis ===

# Compute cosine similarity between PE vectors as function of position distance
def cosine_sim(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

pos0 = 100  # Reference position
distances = np.arange(0, 200)
similarities = [cosine_sim(PE[pos0], PE[min(pos0+d_pos, max_seq-1)])
                for d_pos in distances]

print(f'Cosine similarity PE[{pos0}] with PE[{pos0}+d]:')
for d_pos in [0, 10, 50, 100, 150, 199]:
    sim = cosine_sim(PE[pos0], PE[min(pos0+d_pos, max_seq-1)])
    print(f'  d={d_pos:>3}: sim = {sim:.4f}')

if HAS_MPL:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    # PE similarity decay
    ax = axes[0]
    ax.plot(distances, similarities, color=COLORS['primary'], linewidth=2)
    ax.axhline(0, color=COLORS['neutral'], linestyle='--', alpha=0.5)
    ax.set_title(f'PE Cosine Similarity vs Distance (reference pos={pos0})')
    ax.set_xlabel('Position distance |pos2 - pos1|')
    ax.set_ylabel('Cosine similarity')

    # PE heatmap (positions 0-64, dims 0-32)
    ax = axes[1]
    im = ax.imshow(PE[:64, :32].T, aspect='auto', cmap='RdBu_r',
                   vmin=-1, vmax=1)
    plt.colorbar(im, ax=ax)
    ax.set_title('Sinusoidal PE: positions 0-63, dims 0-31')
    ax.set_xlabel('Position'); ax.set_ylabel('Embedding dimension')

    fig.tight_layout()
    plt.show()

9. Summary Verification

Comprehensive tests of all implemented methods.

Code cell 31

# === 9. Summary Verification Suite ===

print('=' * 60)
print('SUMMARY: Interpolation and Approximation Verification')
print('=' * 60)

results = []

# Test 1: Barycentric interpolation exactness
f_poly = lambda x: x**3 - 2*x + 1
nodes_t = np.array([-1.0, -0.5, 0.0, 0.5, 1.0])
vals_t = f_poly(nodes_t)
w_t = cheb2_weights(4)
x_mid = 0.3
p_t = bary_eval(x_mid, nodes_t, vals_t, w_t)
ok1 = abs(p_t - f_poly(x_mid)) < 1e-12
results.append(('Barycentric exactness', ok1))

# Test 2: Newton divided differences match Lagrange
nodes_nd = np.array([0.0, 0.5, 1.0, 1.5])
f_nd = lambda x: np.exp(x)
vals_nd = f_nd(nodes_nd)
coeffs_nd = divided_differences(nodes_nd, vals_nd)
x_ev = 0.75
p_newton = newton_eval(nodes_nd, coeffs_nd, x_ev)
w_nd = barycentric_weights(nodes_nd)
p_bary = bary_eval(x_ev, nodes_nd, vals_nd, w_nd)
ok2 = abs(p_newton - p_bary) < 1e-10
results.append(('Newton == Lagrange', ok2))

# Test 3: Cubic spline O(h^4) convergence
xn = np.linspace(0, 1, 17)
yn = np.sin(np.pi*xn)
Mn = natural_cubic_spline(xn, yn)
xf2 = np.linspace(0, 1, 1000)
err3 = np.abs(np.array([eval_spline(xi, xn, yn, Mn) for xi in xf2]) - np.sin(np.pi*xf2)).max()
ok3 = err3 < 1e-7  # h=1/16, h^4 ~ 1.5e-7
results.append((f'Cubic spline error < 1e-7 (h=1/16)', ok3))

# Test 4: FFT Parseval
y_par = np.random.randn(256)
Y_par = fft(y_par)
ok4 = abs(np.sum(y_par**2) - np.sum(np.abs(Y_par)**2)/256) < 1e-10
results.append(('FFT Parseval theorem', ok4))

# Test 5: RBF interpolation at nodes
x_rbf_t = np.array([-1.0, 0.0, 1.0])
y_rbf_t = np.array([2.0, -1.0, 3.0])
c_rbf_t, _ = rbf_interpolate(x_rbf_t, y_rbf_t, eps=1.0)
y_at_nodes = np.array([rbf_eval(xi, x_rbf_t, c_rbf_t, 1.0) for xi in x_rbf_t])
ok5 = np.allclose(y_at_nodes, y_rbf_t, atol=1e-10)
results.append(('RBF exact at nodes', ok5))

# Test 6: PE rotation property
PE_test = get_sinusoidal_pe(200, 32)
ok6_list = []
for pos_t in [10, 50, 100]:
    k_t = 20
    for i_t in [0, 2, 4]:
        theta_t = 1.0 / (10000**(2*(i_t//2)/32))
        R = np.array([[np.cos(k_t*theta_t), -np.sin(k_t*theta_t)],
                      [np.sin(k_t*theta_t),  np.cos(k_t*theta_t)]])
        ok6_list.append(np.allclose(PE_test[pos_t+k_t, i_t:i_t+2],
                                    R @ PE_test[pos_t, i_t:i_t+2], atol=1e-10))
ok6 = all(ok6_list)
results.append(('Sinusoidal PE rotation property', ok6))

print()
all_pass = True
for name, ok in results:
    status = 'PASS' if ok else 'FAIL'
    print(f'  {status}  {name}')
    if not ok:
        all_pass = False

print()
print('=' * 60)
print(f'  {'ALL TESTS PASS' if all_pass else 'SOME TESTS FAILED'}')
print('=' * 60)