Theory Notebook

1 min read18 headings

Theory Notebook

Converted from theory.ipynb for web reading.

RAG Math and Retrieval: Theory Notebook

This notebook makes RAG math concrete: similarity scores, sparse retrieval intuition, contrastive loss, recall metrics, MMR, reranking, and context packing.

Code cell 2

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

try:
    import seaborn as sns
    sns.set_theme(style="whitegrid", palette="colorblind")
    HAS_SNS = True
except ImportError:
    plt.style.use("seaborn-v0_8-whitegrid")
    HAS_SNS = False

mpl.rcParams.update({
    "figure.figsize":    (10, 6),
    "figure.dpi":         120,
    "font.size":           13,
    "axes.titlesize":      15,
    "axes.labelsize":      13,
    "xtick.labelsize":     11,
    "ytick.labelsize":     11,
    "legend.fontsize":     11,
    "legend.framealpha":   0.85,
    "lines.linewidth":      2.0,
    "axes.spines.top":     False,
    "axes.spines.right":   False,
    "savefig.bbox":       "tight",
    "savefig.dpi":         150,
})
np.random.seed(42)
print("Plot setup complete.")

1. Cosine retrieval

Code cell 4

def normalize(X):
    X = np.asarray(X, dtype=float)
    return X / np.linalg.norm(X, axis=-1, keepdims=True)

query = normalize(np.array([[1.0, 1.0, 0.0]]))[0]
docs = normalize(np.array([
    [1.0, 0.9, 0.1],
    [-1.0, 0.0, 0.0],
    [0.2, 0.1, 1.0],
    [0.8, 0.7, 0.0],
]))
scores = docs @ query
print("cosine scores:", np.round(scores, 3))
print("top-2 docs:", np.argsort(scores)[-2:][::-1])

2. Norm effects

Code cell 6

q = np.array([1.0, 0.0])
d1 = np.array([1.0, 0.0])
d2 = np.array([5.0, 1.0])
dot_scores = np.array([q @ d1, q @ d2])
cos_scores = np.array([q @ d1 / (np.linalg.norm(q)*np.linalg.norm(d1)), q @ d2 / (np.linalg.norm(q)*np.linalg.norm(d2))])
print("dot scores:", dot_scores)
print("cosine scores:", np.round(cos_scores, 3))

3. BM25-style lexical intuition

Code cell 8

query_terms = ["vector", "search"]
docs_terms = [
    ["vector", "search", "index", "vector"],
    ["language", "model", "generation"],
    ["search", "engine", "retrieval", "search"],
]
idf = {"vector": 1.3, "search": 0.9}
k1 = 1.2
scores = []
for doc in docs_terms:
    score = 0.0
    for term in query_terms:
        tf = doc.count(term)
        score += idf[term] * (tf * (k1 + 1)) / (tf + k1) if tf else 0.0
    scores.append(score)
print("BM25-style scores:", np.round(scores, 3))

4. Dense contrastive loss

Code cell 10

query = normalize(np.array([[1.0, 0.5, 0.0]]))[0]
doc_mat = normalize(np.array([
    [1.0, 0.4, 0.0],
    [0.0, 1.0, 0.2],
    [-1.0, 0.0, 0.1],
]))
scores = doc_mat @ query
exp_scores = np.exp(scores)
loss = -np.log(exp_scores[0] / exp_scores.sum())
print("scores:", np.round(scores, 3))
print("contrastive loss:", loss)

5. Recall@k and MRR

Code cell 12

ranked = ["d3", "d7", "d2", "d4", "d1"]
relevant = {"d2", "d5"}
for k in [1, 3, 5]:
    recall = len(set(ranked[:k]) & relevant) / len(relevant)
    print(f"Recall@{k}:", recall)
rr = 0.0
for rank, doc in enumerate(ranked, 1):
    if doc in relevant:
        rr = 1 / rank
        break
print("MRR for this query:", rr)

6. MMR diversity selection

Code cell 14

rel = np.array([0.95, 0.90, 0.88, 0.70])
sim = np.array([
    [1.0, 0.9, 0.2, 0.1],
    [0.9, 1.0, 0.3, 0.2],
    [0.2, 0.3, 1.0, 0.8],
    [0.1, 0.2, 0.8, 1.0],
])
lam = 0.7
selected = []
candidates = set(range(len(rel)))
while len(selected) < 3:
    best, best_score = None, -1e9
    for c in candidates:
        diversity_penalty = 0 if not selected else max(sim[c, s] for s in selected)
        score = lam * rel[c] - (1 - lam) * diversity_penalty
        if score > best_score:
            best, best_score = c, score
    selected.append(best)
    candidates.remove(best)
print("MMR selected docs:", selected)

7. Context packing

Code cell 16

chunks = [
    {"id": "a", "score": 0.95, "tokens": 120},
    {"id": "b", "score": 0.85, "tokens": 300},
    {"id": "c", "score": 0.80, "tokens": 180},
    {"id": "d", "score": 0.70, "tokens": 90},
]
budget = 400
packed, used = [], 0
for c in sorted(chunks, key=lambda x: x["score"] / x["tokens"], reverse=True):
    if used + c["tokens"] <= budget:
        packed.append(c["id"])
        used += c["tokens"]
print("packed:", packed, "tokens used:", used)

8. Reciprocal rank fusion

Code cell 18

rank_dense = ["a", "b", "c", "d"]
rank_sparse = ["c", "a", "e", "b"]
k = 60
scores = {}
for ranking in [rank_dense, rank_sparse]:
    for r, doc in enumerate(ranking, 1):
        scores[doc] = scores.get(doc, 0) + 1 / (k + r)
print("RRF scores:", {d: round(s, 4) for d, s in sorted(scores.items(), key=lambda x: -x[1])})

9. ANN recall toy

Code cell 20

exact_top = {"d1", "d2", "d3", "d4", "d5"}
ann_top = {"d1", "d2", "d4", "d8", "d9"}
recall = len(exact_top & ann_top) / len(exact_top)
print("ANN recall against exact top-5:", recall)

10. Failure decomposition

Code cell 22

cases = [
    {"retrieved": False, "used": False, "correct": False},
    {"retrieved": True, "used": False, "correct": False},
    {"retrieved": True, "used": True, "correct": True},
]
for i, c in enumerate(cases, 1):
    if not c["retrieved"]:
        reason = "retrieval miss"
    elif not c["used"]:
        reason = "generation ignored evidence"
    elif not c["correct"]:
        reason = "generation error"
    else:
        reason = "success"
    print(i, reason)

11. RAG trace checklist

Code cell 24

checks = [
    "query text and normalized query embedding norm",
    "top-k ids, scores, and chunk text",
    "reranker scores and final selected chunks",
    "full prompt after context packing",
    "answer claims mapped to supporting chunks",
    "retrieval metrics and answer metrics logged separately",
]
for i, check in enumerate(checks, 1):
    print(f"{i}. {check}")

RAG Math and Retrieval

Theory Notebook

RAG Math and Retrieval: Theory Notebook

Code cell 2

1. Cosine retrieval

Code cell 4

2. Norm effects

Code cell 6

3. BM25-style lexical intuition

Code cell 8

4. Dense contrastive loss

Code cell 10

5. Recall@k and MRR

Code cell 12

6. MMR diversity selection

Code cell 14

7. Context packing

Code cell 16

8. Reciprocal rank fusion

Code cell 18

9. ANN recall toy

Code cell 20

10. Failure decomposition

Code cell 22

11. RAG trace checklist

Code cell 24

Test this lesson

Which module does this lesson belong to?

Which section is covered in this lesson content?

Which term is most central to this lesson?

What is the best way to use this lesson for real learning?