Exercises Notebook

Converted from exercises.ipynb for web reading.

Gradient Descent - Exercises

Ten graded exercises. Each exercise has a problem, scaffold, and solution cell.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

Exercise 1 [*]: Constant Step Size

State the relevant definition for constant step size.
Compute the requested toy quantity.
Explain the optimization diagnostic you would log in a real model-training run.

Code cell 4

# Your Solution
print("Exercise 1 scaffold: fill in the missing computation for constant step size.")
answer = None
print("answer =", answer)

Code cell 5

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 1: Constant Step Size")
vector = np.array([1.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 4.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: constant step size is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 2 [*]: Backtracking Line Search

State the relevant definition for backtracking line search.
Compute the requested toy quantity.
Explain the optimization diagnostic you would log in a real model-training run.

Code cell 7

# Your Solution
print("Exercise 2 scaffold: fill in the missing computation for backtracking line search.")
answer = None
print("answer =", answer)

Code cell 8

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 2: Backtracking Line Search")
vector = np.array([2.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 7.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: backtracking line search is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 3 [*]: Wolfe Conditions

State the relevant definition for Wolfe conditions.
Compute the requested toy quantity.
Explain the optimization diagnostic you would log in a real model-training run.

Code cell 10

# Your Solution
print("Exercise 3 scaffold: fill in the missing computation for Wolfe conditions.")
answer = None
print("answer =", answer)

Code cell 11

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 3: Wolfe Conditions")
vector = np.array([3.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 12.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: Wolfe conditions is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 4 [**]: Strongly Convex Convergence

State the relevant definition for strongly convex convergence.
Compute the requested toy quantity.
Explain the optimization diagnostic you would log in a real model-training run.

Code cell 13

# Your Solution
print("Exercise 4 scaffold: fill in the missing computation for strongly convex convergence.")
answer = None
print("answer =", answer)

Code cell 14

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 4: Strongly Convex Convergence")
vector = np.array([4.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 19.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: strongly convex convergence is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 5 [**]: Pl Condition

State the relevant definition for PL condition.
Compute the requested toy quantity.
Explain the optimization diagnostic you would log in a real model-training run.

Code cell 16

# Your Solution
print("Exercise 5 scaffold: fill in the missing computation for PL condition.")
answer = None
print("answer =", answer)

Code cell 17

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 5: Pl Condition")
vector = np.array([5.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 28.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: PL condition is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 6 [**]: Polyak Momentum

State the relevant definition for Polyak momentum.
Compute the requested toy quantity.
Explain the optimization diagnostic you would log in a real model-training run.

Code cell 19

# Your Solution
print("Exercise 6 scaffold: fill in the missing computation for Polyak momentum.")
answer = None
print("answer =", answer)

Code cell 20

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 6: Polyak Momentum")
vector = np.array([6.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 39.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: Polyak momentum is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 7 [**]: Gradient Flow

State the relevant definition for gradient flow.
Compute the requested toy quantity.
Explain the optimization diagnostic you would log in a real model-training run.

Code cell 22

# Your Solution
print("Exercise 7 scaffold: fill in the missing computation for gradient flow.")
answer = None
print("answer =", answer)

Code cell 23

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 7: Gradient Flow")
vector = np.array([7.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 52.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: gradient flow is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 8 [***]: Edge Of Stability Preview

State the relevant definition for edge of stability preview.
Compute the requested toy quantity.
Explain the optimization diagnostic you would log in a real model-training run.

Code cell 25

# Your Solution
print("Exercise 8 scaffold: fill in the missing computation for edge of stability preview.")
answer = None
print("answer =", answer)

Code cell 26

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 8: Edge Of Stability Preview")
vector = np.array([8.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 67.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: edge of stability preview is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 9 [***]: Linear Regression By Gd

State the relevant definition for linear regression by GD.
Compute the requested toy quantity.
Explain the optimization diagnostic you would log in a real model-training run.

Code cell 28

# Your Solution
print("Exercise 9 scaffold: fill in the missing computation for linear regression by GD.")
answer = None
print("answer =", answer)

Code cell 29

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 9: Linear Regression By Gd")
vector = np.array([9.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 84.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: linear regression by GD is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Exercise 10 [***]: Learning-Rate Diagnostics

State the relevant definition for learning-rate diagnostics.
Compute the requested toy quantity.
Explain the optimization diagnostic you would log in a real model-training run.

Code cell 31

# Your Solution
print("Exercise 10 scaffold: fill in the missing computation for learning-rate diagnostics.")
answer = None
print("answer =", answer)

Code cell 32

# Solution
import numpy as np

def header(title):
    print("\n" + "=" * 72)
    print(title)
    print("=" * 72)

def check_close(name, value, target, tol=1e-8):
    ok = abs(float(value) - float(target)) <= tol
    print(f"{'PASS' if ok else 'FAIL'} - {name}: value={value:.8f}, target={target:.8f}")
    if not ok:
        raise AssertionError(name)

def check_true(name, condition):
    ok = bool(condition)
    print(f"{'PASS' if ok else 'FAIL'} - {name}")
    if not ok:
        raise AssertionError(name)

header("Exercise 10: Learning-Rate Diagnostics")
vector = np.array([10.0, 1.0, -1.0])
answer = float(vector[0] ** 2 + 3.0)
check_close("toy scalar computation", answer, 103.0)
check_true("finite answer", np.isfinite(answer))
print("Definition anchor: learning-rate diagnostics is interpreted through the objective, update, or diagnostic in Gradient Descent.")
print("\nTakeaway: a tiny verified computation is the fastest way to test intuition before scaling an optimizer experiment.")

Gradient Descent

Exercises Notebook

Gradient Descent - Exercises

Code cell 2

Exercise 1 [*]: Constant Step Size

Code cell 4

Code cell 5

Exercise 2 [*]: Backtracking Line Search

Code cell 7

Code cell 8

Exercise 3 [*]: Wolfe Conditions

Code cell 10

Code cell 11

Exercise 4 [**]: Strongly Convex Convergence

Code cell 13

Code cell 14

Exercise 5 [**]: Pl Condition

Code cell 16

Code cell 17

Exercise 6 [**]: Polyak Momentum

Code cell 19

Code cell 20

Exercise 7 [**]: Gradient Flow

Code cell 22

Code cell 23

Exercise 8 [***]: Edge Of Stability Preview

Code cell 25

Code cell 26

Exercise 9 [***]: Linear Regression By Gd

Code cell 28

Code cell 29

Exercise 10 [***]: Learning-Rate Diagnostics

Code cell 31

Code cell 32

Test this lesson

Which module does this lesson belong to?

Which section is covered in this lesson content?

Which term is most central to this lesson?

What is the best way to use this lesson for real learning?