Theory Notebook
Converted from
theory.ipynbfor web reading.
§03 Graph Algorithms — Theory Notebook
"An algorithm must be seen to be believed." — Donald Knuth
Interactive implementations of every classical graph algorithm: BFS, DFS, Dijkstra, Bellman-Ford, Floyd-Warshall, A*, Kruskal, Prim, topological sort, Tarjan SCC, and max-flow. Every algorithm is traced step-by-step with correctness verification.
Code cell 2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
try:
import seaborn as sns
sns.set_theme(style="whitegrid", palette="colorblind")
HAS_SNS = True
except ImportError:
plt.style.use("seaborn-v0_8-whitegrid")
HAS_SNS = False
mpl.rcParams.update({
"figure.figsize": (10, 6),
"figure.dpi": 120,
"font.size": 13,
"axes.titlesize": 15,
"axes.labelsize": 13,
"xtick.labelsize": 11,
"ytick.labelsize": 11,
"legend.fontsize": 11,
"legend.framealpha": 0.85,
"lines.linewidth": 2.0,
"axes.spines.top": False,
"axes.spines.right": False,
"savefig.bbox": "tight",
"savefig.dpi": 150,
})
np.random.seed(42)
print("Plot setup complete.")
Code cell 3
import numpy as np
import heapq
from collections import deque, defaultdict
try:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['font.size'] = 12
HAS_MPL = True
except ImportError:
HAS_MPL = False
try:
import networkx as nx
HAS_NX = True
except ImportError:
HAS_NX = False
np.random.seed(42)
print(f'Setup complete. HAS_MPL={HAS_MPL}, HAS_NX={HAS_NX}')
2. Graph Traversal
2.1 Breadth-First Search (BFS)
BFS visits nodes layer by layer. Starting from source , it discovers all nodes at hop distance 1 before any at hop distance 2, etc.
Key properties:
- Computes exact shortest hop distances in
- Queue data structure ensures FIFO (layer-by-layer) ordering
- BFS tree encodes shortest paths via
parent[]pointers
Code cell 5
# === 2.1 BFS Implementation ===
def bfs(adj, s, n):
"""BFS from source s. Returns distance array d and parent array."""
d = [float('inf')] * n
parent = [-1] * n
d[s] = 0
queue = deque([s])
while queue:
u = queue.popleft()
for v in adj[u]:
if d[v] == float('inf'):
d[v] = d[u] + 1
parent[v] = u
queue.append(v)
return d, parent
def reconstruct_path(parent, s, t):
"""Reconstruct shortest path from s to t using parent array."""
path = []
cur = t
while cur != -1:
path.append(cur)
cur = parent[cur]
return list(reversed(path)) if path[-1] == s else None
# Example graph (adjacency list, undirected)
# 0-1-2-3 with extra edge 1-4 and 2-4
n_bfs = 5
adj_bfs = {
0: [1, 2],
1: [0, 3, 4],
2: [0, 4],
3: [1, 5] if False else [1], # 5 doesn't exist, keep simple
4: [1, 2],
}
adj_bfs = {0: [1,2], 1: [0,3,4], 2: [0,4], 3: [1], 4: [1,2]}
d_bfs, parent_bfs = bfs(adj_bfs, 0, n_bfs)
print('BFS from node 0:')
print(f' distances: {d_bfs}')
print(f' parent: {parent_bfs}')
# Reconstruct path to node 3
path_to_3 = reconstruct_path(parent_bfs, 0, 3)
print(f' Shortest path 0→3: {path_to_3} (length {d_bfs[3]})')
# Verify BFS distances
assert d_bfs == [0, 1, 1, 2, 2], f'Expected [0,1,1,2,2], got {d_bfs}'
print('PASS — BFS distances verified')
Code cell 6
# === 2.1 BFS Layer Visualisation ===
def bfs_layers(adj, s, n):
"""Return list of layers: layers[k] = set of nodes at hop distance k."""
d, _ = bfs(adj, s, n)
max_d = max(x for x in d if x != float('inf'))
layers = [[] for _ in range(max_d + 1)]
for v, dist in enumerate(d):
if dist != float('inf'):
layers[dist].append(v)
return layers
layers = bfs_layers(adj_bfs, 0, n_bfs)
print('BFS layers from node 0:')
for k, layer in enumerate(layers):
print(f' Layer {k} (hop distance {k}): {layer}')
print()
print('GNN connection: a k-layer GNN can see each node\'s layer-k neighbourhood.')
print(f' 2-layer GNN receptive field of node 0: {[v for k,l in enumerate(layers) for v in l if k<=2]}')
2.3 Depth-First Search (DFS)
DFS uses a stack (implicit via recursion) to explore as deep as possible
before backtracking. Discovery timestamps disc[] and finish timestamps fin[]
reveal the structure of the DFS tree.
Code cell 8
# === 2.3 DFS with Timestamps and Edge Classification ===
def dfs_full(adj_directed, n):
"""DFS on directed graph. Returns disc[], fin[], edge classifications."""
colour = ['WHITE'] * n
disc = [-1] * n
fin = [-1] * n
parent = [-1] * n
time = [0]
edges = {} # (u,v) -> type
def visit(u):
colour[u] = 'GREY'
disc[u] = time[0]; time[0] += 1
for v in adj_directed.get(u, []):
if colour[v] == 'WHITE':
edges[(u,v)] = 'TREE'
parent[v] = u
visit(v)
elif colour[v] == 'GREY':
edges[(u,v)] = 'BACK' # cycle!
elif colour[v] == 'BLACK':
if disc[v] > disc[u]:
edges[(u,v)] = 'FORWARD'
else:
edges[(u,v)] = 'CROSS'
colour[u] = 'BLACK'
fin[u] = time[0]; time[0] += 1
for v in range(n):
if colour[v] == 'WHITE':
visit(v)
return disc, fin, parent, edges
# Directed graph: 0→1, 1→2, 2→0 (cycle), 1→3, 3→4
adj_d = {0:[1], 1:[2,3], 2:[0], 3:[4], 4:[]}
n_d = 5
disc, fin, par, edge_cls = dfs_full(adj_d, n_d)
print('DFS timestamps:')
for v in range(n_d):
print(f' node {v}: disc={disc[v]:2d}, fin={fin[v]:2d}, parent={par[v]}')
print('\nEdge classifications:')
for (u,v), etype in sorted(edge_cls.items()):
print(f' ({u},{v}): {etype}')
back_edges = [(u,v) for (u,v),t in edge_cls.items() if t=='BACK']
print(f'\nBack edges (cycles): {back_edges}')
print(f'Graph has cycle: {len(back_edges) > 0}')
3. Shortest Paths
3.2 Dijkstra's Algorithm
Dijkstra solves SSSP for graphs with non-negative edge weights using a min-heap. The key invariant: when a vertex is extracted, its distance estimate is final.
Code cell 10
# === 3.2 Dijkstra's Algorithm ===
def dijkstra(adj_w, s, n):
"""
Dijkstra's SSSP. adj_w[u] = list of (v, weight) pairs.
Returns distance array d and parent array.
"""
d = [float('inf')] * n
d[s] = 0
parent = [-1] * n
# Min-heap: (distance, vertex)
heap = [(0, s)]
visited = [False] * n
while heap:
dist_u, u = heapq.heappop(heap)
if visited[u]:
continue # stale entry (lazy deletion)
visited[u] = True
for v, w in adj_w.get(u, []):
if d[u] + w < d[v]:
d[v] = d[u] + w
parent[v] = u
heapq.heappush(heap, (d[v], v))
return d, parent
# Graph: s=0, a=1, b=2, c=3, d=4
# Edges: (s,a,4),(s,b,2),(b,a,1),(b,c,5),(a,c,1),(a,d,7),(c,d,2)
adj_dijk = {
0: [(1,4),(2,2)], # s→a=4, s→b=2
1: [(3,1),(4,7)], # a→c=1, a→d=7
2: [(1,1),(3,5)], # b→a=1, b→c=5
3: [(4,2)], # c→d=2
4: [],
}
n_dijk = 5
names = ['s','a','b','c','d']
d_dijk, par_dijk = dijkstra(adj_dijk, 0, n_dijk)
print('Dijkstra from s:')
for i, name in enumerate(names):
path = []
cur = i
while cur != -1:
path.append(names[cur])
cur = par_dijk[cur]
path_str = '→'.join(reversed(path))
print(f' d[{name}] = {d_dijk[i]:6.1f} path: {path_str}')
# Verify: s→b→a→c→d = 2+1+1+2 = 6
assert d_dijk[4] == 6, f'Expected d[d]=6, got {d_dijk[4]}'
print('\nPASS — Dijkstra distances verified')
print(f' s→b→a→c→d: 2+1+1+2 = {2+1+1+2} ✓')
3.3 Bellman-Ford Algorithm
Bellman-Ford handles negative edge weights by relaxing all edges times. The DP recurrence is identical to the Bellman equation for value iteration in RL.
Code cell 12
# === 3.3 Bellman-Ford with DP Table Trace ===
def bellman_ford(edges, s, n):
"""
Bellman-Ford SSSP. edges = list of (u,v,w).
Returns (d, parent, has_neg_cycle).
"""
INF = float('inf')
d = [INF] * n
d[s] = 0
parent = [-1] * n
table = [d.copy()] # track DP table
for i in range(n - 1):
changed = False
for u, v, w in edges:
if d[u] != INF and d[u] + w < d[v]:
d[v] = d[u] + w
parent[v] = u
changed = True
table.append(d.copy())
if not changed:
break # early termination
# Negative cycle check
has_neg = False
for u, v, w in edges:
if d[u] != INF and d[u] + w < d[v]:
has_neg = True
break
return d, parent, has_neg, table
# Graph: s=0,u=1,v=2,t=3 with negative edge u→v
# Edges: s→u=5, s→v=8, u→v=-3, u→t=6, v→t=2
edges_bf = [(0,1,5),(0,2,8),(1,2,-3),(1,3,6),(2,3,2)]
n_bf = 4
names_bf = ['s','u','v','t']
d_bf, par_bf, neg, table = bellman_ford(edges_bf, 0, n_bf)
print('Bellman-Ford DP table:')
header = ' Round | ' + ' | '.join(f'{n:>4}' for n in names_bf)
print(header)
print(' ' + '-' * len(header))
for i, row in enumerate(table):
vals = ' | '.join(f'{(v if v != float("inf") else "inf"):>4}' for v in row)
print(f' {i:5d} | {vals}')
print(f'\nFinal distances: {d_bf}')
print(f'Negative cycle: {neg}')
# Verify: s→u→v→t = 5-3+2=4
assert d_bf[3] == 4, f'Expected d[t]=4, got {d_bf[3]}'
assert not neg
print('PASS — Bellman-Ford verified: d[t] = 4 via s→u→v→t')
Code cell 13
# === 3.4 Floyd-Warshall All-Pairs Shortest Paths ===
def floyd_warshall(W):
"""
Floyd-Warshall APSP.
W: n×n weight matrix (np.inf for no edge, 0 on diagonal)
Returns distance matrix D.
"""
n = len(W)
D = W.copy()
for k in range(n):
for i in range(n):
for j in range(n):
if D[i][k] + D[k][j] < D[i][j]:
D[i][j] = D[i][k] + D[k][j]
return D
# 4-node directed graph
INF = float('inf')
W_fw = np.array([
[0, 3, INF, 7 ],
[8, 0, 2, INF],
[5, INF, 0, 1 ],
[2, INF, INF, 0 ],
])
D_fw = floyd_warshall(W_fw)
print('Floyd-Warshall distance matrix:')
print(' 0 1 2 3')
for i, row in enumerate(D_fw):
vals = ' '.join(f'{v:5.0f}' if v != INF else ' inf' for v in row)
print(f' row {i}: {vals}')
# Verify: D[0][2] = min direct=INF, via 1: 3+2=5, via 3: 7+INF=INF, via 1,2: 3+2=5
assert D_fw[0][2] == 5, f'Expected D[0][2]=5, got {D_fw[0][2]}'
print('\nPASS — D[0][2]=5: path 0→1→2')
# Negative cycle detection
neg_cycle = any(D_fw[i][i] < 0 for i in range(len(D_fw)))
print(f'Negative cycle detected: {neg_cycle}')
3.5 A* Search
A* is Dijkstra guided by an admissible heuristic . The priority key is , directing search toward the goal.
Code cell 15
# === 3.5 A* Search on a Grid ===
def astar(grid_adj, s, t, n, heuristic):
"""
A* search. grid_adj[u] = list of (v, weight).
heuristic: function node → estimated remaining cost to t.
"""
d = [float('inf')] * n
d[s] = 0
parent = [-1] * n
heap = [(heuristic(s), 0, s)] # (f, g, node)
visited = set()
while heap:
f, g, u = heapq.heappop(heap)
if u in visited:
continue
visited.add(u)
if u == t:
break
for v, w in grid_adj.get(u, []):
new_g = g + w
if new_g < d[v]:
d[v] = new_g
parent[v] = u
heapq.heappush(heap, (new_g + heuristic(v), new_g, v))
return d[t], parent, len(visited)
# Simple 5-node graph
# Positions for heuristic: nodes 0..4 at x positions [0,1,2,3,4]
positions = {0:0, 1:1, 2:2, 3:1, 4:4}
astar_adj = {0:[(1,2),(3,5)], 1:[(2,3),(4,10)], 2:[(4,1)], 3:[(4,8)], 4:[]}
def h_euclidean(v):
# Admissible heuristic: distance to goal (node 4) in x-axis
return abs(positions[v] - positions[4])
dist_astar, par_astar, expanded = astar(astar_adj, 0, 4, 5, h_euclidean)
dist_dijkstra, par_dijk2 = dijkstra(astar_adj, 0, 5)
print(f'A* distance s→t: {dist_astar}, nodes expanded: {expanded}')
print(f'Dijkstra distance s→t: {dist_dijkstra[4]}')
assert dist_astar == dist_dijkstra[4], 'A* and Dijkstra disagree!'
print('PASS — A* gives optimal distance matching Dijkstra')
4. Minimum Spanning Trees
4.2 Kruskal's Algorithm
Kruskal greedily adds the minimum-weight edge that doesn't create a cycle. Union-Find tracks which vertices are in the same component.
Code cell 17
# === 4.2 Kruskal's Algorithm with Union-Find ===
class UnionFind:
def __init__(self, n):
self.parent = list(range(n))
self.rank = [0] * n
def find(self, x):
while self.parent[x] != x:
self.parent[x] = self.parent[self.parent[x]] # path halving
x = self.parent[x]
return x
def union(self, x, y):
px, py = self.find(x), self.find(y)
if px == py:
return False # already in same component
if self.rank[px] < self.rank[py]:
px, py = py, px
self.parent[py] = px
if self.rank[px] == self.rank[py]:
self.rank[px] += 1
return True
def kruskal(n, edges):
"""Kruskal MST. edges = list of (w, u, v). Returns MST edge list and total weight."""
sorted_edges = sorted(edges)
uf = UnionFind(n)
mst = []
trace = []
for w, u, v in sorted_edges:
if uf.union(u, v):
mst.append((w, u, v))
trace.append(f' ADD ({u},{v}) weight={w}')
else:
trace.append(f' SKIP ({u},{v}) weight={w} — would form cycle')
if len(mst) == n - 1:
break
return mst, sum(w for w,_,_ in mst), trace
# Graph: A=0,B=1,C=2,D=3,E=4
# Edges: (A,B,4),(A,C,2),(B,C,5),(B,D,10),(C,D,3),(D,E,7),(C,E,8)
n_kruskal = 5
edges_k = [(4,0,1),(2,0,2),(5,1,2),(10,1,3),(3,2,3),(7,3,4),(8,2,4)]
names_k = ['A','B','C','D','E']
mst_edges, mst_weight, trace_k = kruskal(n_kruskal, edges_k)
print('Kruskal MST trace:')
for t in trace_k: print(t)
print(f'\nMST edges: {[(names_k[u],names_k[v],w) for w,u,v in mst_edges]}')
print(f'MST total weight: {mst_weight}')
# Optimal MST: A-C(2) + C-D(3) + A-B(4) + D-E(7) = 16
assert mst_weight == 16, f'Expected 16, got {mst_weight}'
print('PASS — MST weight = 16')
Code cell 18
# === 4.3 Prim's Algorithm ===
def prim(adj_w_undirected, n, root=0):
"""
Prim's MST. adj_w_undirected[u] = list of (v, weight).
Returns MST edge list and total weight.
"""
import heapq
key = [float('inf')] * n
parent = [-1] * n
in_mst = [False] * n
key[root] = 0
heap = [(0, root)] # (key, vertex)
mst_edges = []
total_w = 0
while heap:
k, u = heapq.heappop(heap)
if in_mst[u]:
continue
in_mst[u] = True
if parent[u] != -1:
mst_edges.append((parent[u], u, k))
total_w += k
for v, w in adj_w_undirected.get(u, []):
if not in_mst[v] and w < key[v]:
key[v] = w
parent[v] = u
heapq.heappush(heap, (w, v))
return mst_edges, total_w
# Same graph, undirected adjacency list
adj_prim = {
0: [(1,4),(2,2)],
1: [(0,4),(2,5),(3,10)],
2: [(0,2),(1,5),(3,3),(4,8)],
3: [(1,10),(2,3),(4,7)],
4: [(2,8),(3,7)],
}
mst_prim, w_prim = prim(adj_prim, n_kruskal, root=0)
print('Prim MST edges:', [(names_k[u],names_k[v],w) for u,v,w in mst_prim])
print(f'Prim MST weight: {w_prim}')
assert w_prim == 16, f'Expected 16, got {w_prim}'
print('PASS — Prim gives same total weight as Kruskal')
5. Topological Sort and DAG Algorithms
5.2 Topological Sort
A topological ordering of a DAG lists vertices so every edge goes from earlier to later. Kahn's algorithm (BFS-based) and DFS-based sort are both .
Code cell 20
# === 5.2 Topological Sort: Kahn's (BFS) and DFS-based ===
def topological_sort_kahn(adj, n):
"""Kahn's algorithm. Returns topological order or None if cycle exists."""
indegree = [0] * n
for u in range(n):
for v in adj.get(u, []):
indegree[v] += 1
queue = deque(v for v in range(n) if indegree[v] == 0)
order = []
while queue:
u = queue.popleft()
order.append(u)
for v in adj.get(u, []):
indegree[v] -= 1
if indegree[v] == 0:
queue.append(v)
return order if len(order) == n else None # None = has cycle
def topological_sort_dfs(adj, n):
"""DFS-based topological sort. Returns list in topo order."""
visited = [False] * n
result = []
def dfs(u):
visited[u] = True
for v in adj.get(u, []):
if not visited[v]:
dfs(v)
result.append(u) # finish = prepend
for v in range(n):
if not visited[v]:
dfs(v)
return list(reversed(result))
# DAG representing computation: matmul→relu→matmul2→loss
# Nodes: 0=x, 1=W1, 2=z1(matmul), 3=a1(relu), 4=W2, 5=z2(matmul2), 6=y, 7=L(loss)
adj_dag = {0:[2], 1:[2], 2:[3], 3:[5], 4:[5], 5:[7], 6:[7], 7:[]}
names_dag = ['x','W1','z1','a1','W2','z2','y','L']
n_dag = 8
from collections import deque
topo_kahn = topological_sort_kahn(adj_dag, n_dag)
topo_dfs = topological_sort_dfs(adj_dag, n_dag)
print('Computation DAG topological orders:')
print(f' Kahn\'s: {[names_dag[v] for v in topo_kahn]}')
print(f' DFS: {[names_dag[v] for v in topo_dfs]}')
# Verify: in both, z1 comes after x and W1, a1 after z1, etc.
def verify_topo(order, adj, n):
pos = {v: i for i, v in enumerate(order)}
for u in range(n):
for v in adj.get(u, []):
assert pos[u] < pos[v], f'Topo violation: {u} should come before {v}'
return True
verify_topo(topo_kahn, adj_dag, n_dag)
verify_topo(topo_dfs, adj_dag, n_dag)
print('PASS — both orderings are valid topological sorts')
print()
print('Forward pass order:', [names_dag[v] for v in topo_kahn])
print('Backward pass order (reverse):', [names_dag[v] for v in reversed(topo_kahn)])
6. Strongly Connected Components
6.3 Tarjan's Algorithm
Tarjan finds all SCCs in a single DFS pass using low-link values.
Vertex is an SCC root when low[v] == disc[v].
Code cell 22
# === 6.3 Tarjan's SCC Algorithm ===
def tarjan_scc(adj, n):
"""Tarjan's algorithm. Returns list of SCCs (each SCC is a list of nodes)."""
disc = [-1] * n
low = [0] * n
on_stack = [False] * n
stack = []
sccs = []
time = [0]
def visit(v):
disc[v] = low[v] = time[0]; time[0] += 1
stack.append(v); on_stack[v] = True
for w in adj.get(v, []):
if disc[w] == -1:
visit(w)
low[v] = min(low[v], low[w])
elif on_stack[w]:
low[v] = min(low[v], disc[w])
if low[v] == disc[v]: # v is SCC root
scc = []
while True:
w = stack.pop(); on_stack[w] = False
scc.append(w)
if w == v: break
sccs.append(scc)
for v in range(n):
if disc[v] == -1:
visit(v)
return sccs
# Graph: 0→1, 1→2, 2→0 (SCC {0,1,2}), 1→3, 3→4, 4→5, 5→3 (SCC {3,4,5}), 2→6, 6→7
adj_scc = {0:[1], 1:[2,3], 2:[0,6], 3:[4], 4:[5], 5:[3], 6:[7], 7:[]}
n_scc = 8
sccs = tarjan_scc(adj_scc, n_scc)
print('Tarjan SCCs:')
for i, scc in enumerate(sccs):
print(f' SCC {i}: {sorted(scc)}')
# Verify: {0,1,2}, {3,4,5}, {6}, {7}
scc_sets = [frozenset(s) for s in sccs]
expected = [frozenset({0,1,2}), frozenset({3,4,5}), frozenset({6}), frozenset({7})]
assert set(scc_sets) == set(expected), f'SCC mismatch: {scc_sets}'
print('PASS — Tarjan correctly identifies all 4 SCCs')
print()
print('Condensation DAG (SCC → SCC):')
print(' {0,1,2} → {3,4,5} (via edge 1→3)')
print(' {0,1,2} → {6} (via edge 2→6)')
print(' {6} → {7} (via edge 6→7)')
7. Maximum Flow and Minimum Cut
7.2 Ford-Fulkerson / Edmonds-Karp
Edmonds-Karp uses BFS to find the shortest augmenting path in the residual graph, guaranteeing polynomial complexity.
Code cell 24
# === 7.2 Edmonds-Karp Max-Flow ===
def edmonds_karp(n, cap):
"""
Edmonds-Karp max-flow.
cap: n×n capacity matrix (use 0 for no edge)
Returns (max_flow, residual_cap, min_cut_S)
"""
s, t = 0, n - 1
flow = 0
cap = [row[:] for row in cap] # copy
steps = []
while True:
# BFS to find shortest augmenting path
parent = [-1] * n
parent[s] = s
queue = deque([s])
while queue and parent[t] == -1:
u = queue.popleft()
for v in range(n):
if parent[v] == -1 and cap[u][v] > 0:
parent[v] = u
queue.append(v)
if parent[t] == -1:
break # no augmenting path
# Find bottleneck
delta = float('inf')
v = t
path = []
while v != s:
u = parent[v]
delta = min(delta, cap[u][v])
path.append((u, v))
v = u
path.reverse()
# Augment
v = t
while v != s:
u = parent[v]
cap[u][v] -= delta
cap[v][u] += delta
v = u
flow += delta
steps.append((path, delta))
# Min-cut: BFS reachable from s in residual
reachable = [False] * n
queue = deque([s])
reachable[s] = True
while queue:
u = queue.popleft()
for v in range(n):
if not reachable[v] and cap[u][v] > 0:
reachable[v] = True
queue.append(v)
S = [v for v in range(n) if reachable[v]]
return flow, cap, S, steps
# Flow network: s=0, a=1, b=2, c=3, d=4, t=5
# Capacities:
n_flow = 6
cap_flow = [[0]*n_flow for _ in range(n_flow)]
def add_edge(u,v,c): cap_flow[u][v] = c
add_edge(0,1,10); add_edge(0,2,8) # s→a=10, s→b=8
add_edge(1,3,7); add_edge(1,4,5) # a→c=7, a→d=5
add_edge(2,3,6); add_edge(2,4,4) # b→c=6, b→d=4
add_edge(3,5,9); add_edge(4,5,6) # c→t=9, d→t=6
names_flow = ['s','a','b','c','d','t']
from collections import deque
max_flow, residual, S_cut, steps_flow = edmonds_karp(n_flow, cap_flow)
print('Edmonds-Karp augmenting paths:')
for i, (path, delta) in enumerate(steps_flow):
pstr = '→'.join(f'{names_flow[u]}' for u,v in path) + f'→{names_flow[path[-1][1]]}'
print(f' Step {i+1}: {pstr} bottleneck={delta}')
print(f'\nMax flow: {max_flow}')
T_cut = [v for v in range(n_flow) if v not in S_cut]
print(f'Min cut: S={[names_flow[v] for v in S_cut]}, T={[names_flow[v] for v in T_cut]}')
print('Max-flow = Min-cut capacity verified by theorem.')
5.4 AI Connection: Autograd Computation Graphs
Topological sort is the forward-pass ordering algorithm in PyTorch/JAX. The backward pass is the reverse topological order.
Code cell 26
# === 5.4 Autograd DAG Simulation ===
import numpy as np
from collections import deque
# Simulate a simple computation DAG: x → matmul(W) → z → relu → a → matmul(W2) → loss
# We track forward values and backward gradients
np.random.seed(7)
x = np.array([1.0, 2.0, 3.0]) # input
W1 = np.random.randn(4, 3) * 0.1 # first weight matrix
W2 = np.random.randn(1, 4) * 0.1 # second weight matrix
y = np.array([1.0]) # target
# Forward pass (topological order: x,W1 → z → a → W2 → y_hat → L)
z = W1 @ x # shape (4,)
a = np.maximum(0, z) # ReLU
y_hat = W2 @ a # shape (1,)
L = 0.5 * np.sum((y_hat - y)**2) # MSE loss
print('Forward pass (topological order):')
print(f' x = {x}')
print(f' z = W1 @ x = {z.round(4)}')
print(f' a = relu(z) = {a.round(4)}')
print(f' y_hat = W2 @ a = {y_hat.round(4)}')
print(f' L = 0.5*||y_hat-y||^2 = {L:.4f}')
# Backward pass (reverse topological order)
dL_dyhat = y_hat - y # dL/dy_hat
dL_da = W2.T @ dL_dyhat # dL/da (via chain rule)
dL_dW2 = np.outer(dL_dyhat, a) # dL/dW2
dL_dz = dL_da * (z > 0).astype(float) # dL/dz (ReLU backward)
dL_dW1 = np.outer(dL_dz, x) # dL/dW1
print('\nBackward pass (reverse topological order):')
print(f' dL/dy_hat = {dL_dyhat.round(4)}')
print(f' dL/da = {dL_da.flatten().round(4)}')
print(f' dL/dW2 shape = {dL_dW2.shape}')
print(f' dL/dz = {dL_dz.flatten().round(4)}')
print(f' dL/dW1 shape = {dL_dW1.shape}')
print('\nKey insight: backward pass is reverse topological sort of the DAG.')
print('Each node applies chain rule and passes gradient to its predecessors.')
2.5 BFS vs DFS Comparison Visualisation
Code cell 28
# === 2.5 BFS vs DFS Exploration Order ===
def bfs_order(adj, s, n):
"""Return BFS visit order."""
visited = [False] * n
visited[s] = True
order = [s]
queue = deque([s])
while queue:
u = queue.popleft()
for v in sorted(adj.get(u, [])):
if not visited[v]:
visited[v] = True
order.append(v)
queue.append(v)
return order
def dfs_order(adj, s, n):
"""Return DFS visit order."""
visited = [False] * n
order = []
def dfs(u):
visited[u] = True
order.append(u)
for v in sorted(adj.get(u, [])):
if not visited[v]:
dfs(v)
dfs(s)
return order
# Binary tree-like graph
# 0 is root, children: 0→{1,2}, 1→{3,4}, 2→{5,6}
adj_tree = {0:[1,2], 1:[3,4], 2:[5,6], 3:[], 4:[], 5:[], 6:[]}
n_tree = 7
bfs_o = bfs_order(adj_tree, 0, n_tree)
dfs_o = dfs_order(adj_tree, 0, n_tree)
print('Binary tree BFS vs DFS visit order:')
print(f' BFS: {bfs_o} (level by level)')
print(f' DFS: {dfs_o} (depth first, left subtree first)')
print()
print('BFS is level-order (Layer 0: [0], Layer 1: [1,2], Layer 2: [3,4,5,6])')
print('DFS explores left subtree completely before right')
try:
import matplotlib.pyplot as plt
HAS_MPL = True
except ImportError:
HAS_MPL = False
if HAS_MPL:
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
colors_bfs = plt.cm.Blues([(i+1)/(n_tree+1) for i in range(n_tree)])
colors_dfs = plt.cm.Oranges([(i+1)/(n_tree+1) for i in range(n_tree)])
pos = {0:(4,3), 1:(2,2), 2:(6,2), 3:(1,1), 4:(3,1), 5:(5,1), 6:(7,1)}
for ax, order, colors, title in [
(axes[0], bfs_o, colors_bfs, 'BFS (layer by layer)'),
(axes[1], dfs_o, colors_dfs, 'DFS (depth first)'),
]:
# Draw edges
for u, nbrs in adj_tree.items():
for v in nbrs:
ax.plot([pos[u][0], pos[v][0]], [pos[u][1], pos[v][1]], 'k-', lw=1.5, zorder=1)
# Draw nodes
rank = {v: i for i, v in enumerate(order)}
for v in range(n_tree):
c = colors[rank[v]]
ax.scatter(*pos[v], s=800, c=[c], zorder=3)
ax.text(pos[v][0], pos[v][1], f'{v}\n({rank[v]+1})', ha='center',
va='center', fontsize=10, fontweight='bold', zorder=4)
ax.set_title(title, fontsize=13)
ax.set_xlim(0,8); ax.set_ylim(0,4)
ax.axis('off')
plt.suptitle('BFS vs DFS: Visit Order on Binary Tree (numbers = visit rank)', fontsize=12)
plt.tight_layout()
plt.show()
Code cell 29
# === 3.2 Dijkstra Step-by-Step Trace Table ===
def dijkstra_trace(adj_w, s, n, names=None):
"""Dijkstra with step-by-step table output."""
import heapq
names = names or list(range(n))
d = [float('inf')] * n
d[s] = 0
heap = [(0, s)]
visited = [False] * n
steps = []
while heap:
dist_u, u = heapq.heappop(heap)
if visited[u]: continue
visited[u] = True
steps.append((names[u], dist_u, d[:]))
for v, w in adj_w.get(u, []):
if d[u] + w < d[v]:
d[v] = d[u] + w
heapq.heappush(heap, (d[v], v))
return d, steps
# Graph: s=0,a=1,b=2,c=3,d=4 as before
adj_dijk2 = {0:[(1,4),(2,2)], 1:[(3,1),(4,7)], 2:[(1,1),(3,5)], 3:[(4,2)], 4:[]}
names_d = ['s','a','b','c','d']
d_final, steps = dijkstra_trace(adj_dijk2, 0, 5, names_d)
print('Dijkstra step-by-step:')
header = f' {'Step':>4} | {'Extract':>7} | ' + ' | '.join(f'{n:>5}' for n in names_d)
print(header)
print(' ' + '-'*len(header))
for i, (ext, dist, d_state) in enumerate(steps):
vals = ' | '.join(f'{v if v != float("inf") else "∞":>5}' for v in d_state)
print(f' {i+1:4d} | {ext:>7}({dist}) | {vals}')
print(f'\nFinal: {dict(zip(names_d, d_final))}')
2.6 AI Connection: GNN Receptive Fields as BFS
A -layer GNN's receptive field for node is exactly the BFS -hop neighbourhood. Here we compute and compare BFS hop sets vs GNN layer aggregations.
Code cell 31
# === 2.6 GNN Receptive Field = BFS k-hop Neighbourhood ===
def k_hop_neighbourhood(adj, v, k, n):
"""Return set of nodes within k hops of v (inclusive)."""
d, _ = bfs(adj, v, n)
return {u for u, dist in enumerate(d) if dist <= k}
def bfs(adj, s, n):
d = [float('inf')] * n
parent = [-1] * n
d[s] = 0
queue = deque([s])
while queue:
u = queue.popleft()
for v in adj.get(u, []):
if d[v] == float('inf'):
d[v] = d[u] + 1
parent[v] = u
queue.append(v)
return d, parent
# Karate club-like small graph
adj_g = {
0:[1,2,3], 1:[0,4,5], 2:[0,5,6], 3:[0,6,7],
4:[1,8], 5:[1,2,9], 6:[2,3,10], 7:[3,10],
8:[4], 9:[5], 10:[6,7],
}
n_g = 11
print('GNN receptive fields for node 0:')
for k in range(4):
rf = k_hop_neighbourhood(adj_g, 0, k, n_g)
print(f' k={k}: {sorted(rf)} ({len(rf)} nodes)')
print()
print('Implication for GNN design:')
print(' - A 1-layer GNN sees only immediate neighbours')
print(' - A 2-layer GNN sees up to 2 hops away')
print(' - For diameter-d graphs, d layers needed for full context')
# Simulate GCN aggregation step
import numpy as np
np.random.seed(42)
H0 = np.random.randn(n_g, 4) # initial features
# Build adjacency matrix
A_gcn = np.zeros((n_g, n_g))
for u, nbrs in adj_g.items():
for v in nbrs:
A_gcn[u,v] = A_gcn[v,u] = 1.0
# Normalised GCN aggregation
A_tilde = A_gcn + np.eye(n_g)
D_inv_sqrt = np.diag(1.0 / np.sqrt(A_tilde.sum(axis=1)))
A_hat = D_inv_sqrt @ A_tilde @ D_inv_sqrt
# One GCN layer (linear, no activation for clean demo)
H1 = A_hat @ H0
print(f'\n1-layer GCN: H0 shape {H0.shape} → H1 shape {H1.shape}')
print(f'Row 0 of H1 is a weighted avg of H0 rows for node 0 and its neighbours')
print(f' neighbours of 0: {adj_g[0]}')
print(f' A_hat[0, neighbours]: {A_hat[0, adj_g[0]].round(3)}')
9. Algorithm Complexity Benchmark
Empirical timing comparison of BFS, Dijkstra, Bellman-Ford, and Floyd-Warshall on random graphs of increasing size.
Code cell 33
# === 9. Timing Benchmark: Graph Algorithm Complexity ===
import time
import numpy as np
from collections import deque
import heapq
def make_random_graph(n, m, seed=42):
"""Generate random directed graph with n nodes, m edges, positive weights."""
rng = np.random.default_rng(seed)
adj = {i: [] for i in range(n)}
edges = []
for _ in range(m):
u = int(rng.integers(0, n))
v = int(rng.integers(0, n))
w = float(rng.uniform(1, 10))
if u != v:
adj[u].append((v, w))
edges.append((u, v, w))
return adj, edges, n
sizes = [(100, 500), (300, 1500), (500, 2500)]
results = []
for n, m in sizes:
adj, edges, _ = make_random_graph(n, m)
# BFS (unit weights)
adj_unw = {u: [v for v,_ in nbrs] for u, nbrs in adj.items()}
t0 = time.perf_counter()
bfs(adj_unw, 0, n)
t_bfs = time.perf_counter() - t0
# Dijkstra
t0 = time.perf_counter()
dijkstra(adj, 0, n)
t_dijk = time.perf_counter() - t0
# Bellman-Ford
t0 = time.perf_counter()
bellman_ford(edges, 0, n)
t_bf = time.perf_counter() - t0
results.append({'n':n,'m':m,'bfs':t_bfs,'dijkstra':t_dijk,'bellman':t_bf})
print(f'{'n':>5} {'m':>6} | {'BFS(ms)':>10} {'Dijkstra(ms)':>14} {'B-Ford(ms)':>12}')
print('-' * 55)
for r in results:
print(f"{r['n']:>5} {r['m']:>6} | {r['bfs']*1e3:>10.2f} {r['dijkstra']*1e3:>14.2f} {r['bellman']*1e3:>12.2f}")
print('\nObserved scaling:')
if len(results) >= 2:
ratio_n = results[-1]['n'] / results[0]['n']
for alg in ['bfs','dijkstra','bellman']:
ratio_t = results[-1][alg] / max(results[0][alg], 1e-9)
print(f' {alg:10s}: n×{ratio_n:.0f} → t×{ratio_t:.1f}')
Summary
| Algorithm | Problem | Complexity | Key Constraint |
|---|---|---|---|
| BFS | SSSP (hop) | Unweighted only | |
| DFS | Cycles, SCC structure | Any graph | |
| Dijkstra | SSSP (weighted) | Non-negative weights | |
| Bellman-Ford | SSSP + neg cycle | Any weights | |
| Floyd-Warshall | APSP | No negative cycles | |
| A* | Single-pair SSSP | avg | Admissible heuristic |
| Kruskal | MST | Undirected | |
| Prim | MST | Undirected | |
| Kahn / DFS-topo | Topological sort | DAG only | |
| Tarjan SCC | All SCCs | Directed | |
| Edmonds-Karp | Max flow | Non-neg capacities |
Next: §04 Spectral Graph Theory — eigenvalues of the Laplacian, Fiedler vector, and the algebraic view of graph connectivity and clustering.