ml exercisesPython

ml exercises

data science ml

Run notebook
Practice
Advanced
13 min

Learning Objective

Understand ml exercises well enough to explain it, recognize it in Python, and apply it in a small task.

Why It Matters

Practice exposes weak spots quickly, so you know whether you can actually use the concept.

ScienceExercise 1: Numpy Array OperationsExercise 2: Matrix OperationsExercise 3: Data CleaningExercise 6: Confusion Matrix And Metrics
Private notes
0/8000

Notes stay private to your browser until account sync is configured.

ml exercises
1 min read18 headings

Converted from 05_ml_exercises.ipynb for web reading.

Code cell 1

import numpy as np
from typing import List, Tuple, Dict, Any, Optional

Exercise 1: NumPy Array Operations

Create functions for common array operations.

Code cell 3

def create_identity_matrix(n: int) -> np.ndarray:
    """
    Create an n x n identity matrix.
    
    Example:
        >>> create_identity_matrix(3)
        array([[1., 0., 0.],
               [0., 1., 0.],
               [0., 0., 1.]])
    """
    # YOUR CODE HERE
    pass

# Test
create_identity_matrix(3)

Code cell 4

def normalize_array(arr: np.ndarray) -> np.ndarray:
    """
    Normalize array to have mean=0 and std=1 (z-score normalization).
    Formula: (x - mean) / std
    """
    # YOUR CODE HERE
    pass

# Test
arr = np.array([10, 20, 30, 40, 50])
normalize_array(arr)

Code cell 5

def min_max_scale(arr: np.ndarray) -> np.ndarray:
    """
    Scale array to range [0, 1].
    Formula: (x - min) / (max - min)
    """
    # YOUR CODE HERE
    pass

# Test
arr = np.array([10, 20, 30, 40, 50])
min_max_scale(arr)

Exercise 2: Matrix Operations

Code cell 7

def compute_statistics(arr: np.ndarray) -> Dict[str, float]:
    """
    Compute common statistics for an array.
    Returns: Dictionary with mean, median, std, var, min, max, range
    """
    # YOUR CODE HERE
    pass

# Test
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
compute_statistics(arr)

Exercise 3: Data Cleaning

Code cell 9

def find_missing_values(data: np.ndarray) -> Tuple[int, np.ndarray]:
    """
    Find missing values (NaN) in array.
    Returns: (count of missing, indices of missing values)
    """
    # YOUR CODE HERE
    pass

# Test
data = np.array([1, np.nan, 3, np.nan, 5])
find_missing_values(data)

Code cell 10

def fill_missing_with_mean(data: np.ndarray) -> np.ndarray:
    """
    Replace NaN values with the mean of non-NaN values.
    """
    # YOUR CODE HERE
    pass

# Test
data = np.array([1, np.nan, 3, np.nan, 5])
fill_missing_with_mean(data)

Exercise 4: Simple Linear Regression from Scratch

Code cell 12

class SimpleLinearRegression:
    """
    Simple linear regression using ordinary least squares.
    Formula: y = mx + b
    
    m = sum((x - x_mean)(y - y_mean)) / sum((x - x_mean)^2)
    b = y_mean - m * x_mean
    """
    
    def __init__(self):
        self.slope = None
        self.intercept = None
    
    def fit(self, X: np.ndarray, y: np.ndarray):
        """Fit the model to training data."""
        # YOUR CODE HERE
        pass
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        """Make predictions."""
        # YOUR CODE HERE
        pass
    
    def r_squared(self, X: np.ndarray, y: np.ndarray) -> float:
        """Calculate R² score."""
        # YOUR CODE HERE
        pass

Code cell 13

# Test Linear Regression
X = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 6, 8, 10])

model = SimpleLinearRegression()
model.fit(X, y)

print(f"Slope: {model.slope}")
print(f"Intercept: {model.intercept}")
print(f"Predictions: {model.predict(np.array([6, 7]))}")

Exercise 5: K-Nearest Neighbors from Scratch

Code cell 15

class SimpleKNN:
    """
    K-Nearest Neighbors classifier.
    """
    
    def __init__(self, k: int = 3):
        self.k = k
        self.X_train = None
        self.y_train = None
    
    def fit(self, X: np.ndarray, y: np.ndarray):
        """Store training data."""
        # YOUR CODE HERE
        pass
    
    def _euclidean_distance(self, x1: np.ndarray, x2: np.ndarray) -> float:
        """Calculate Euclidean distance between two points."""
        # YOUR CODE HERE
        pass
    
    def _predict_single(self, x: np.ndarray) -> int:
        """Predict class for a single sample."""
        # YOUR CODE HERE
        pass
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        """Predict classes for multiple samples."""
        # YOUR CODE HERE
        pass

Code cell 16

# Test KNN
X_train = np.array([[1, 1], [2, 2], [3, 3], [4, 4]])
y_train = np.array([0, 0, 1, 1])

knn = SimpleKNN(k=3)
knn.fit(X_train, y_train)
print(f"Prediction for [2.5, 2.5]: {knn.predict(np.array([[2.5, 2.5]]))}")

Exercise 6: Confusion Matrix and Metrics

Code cell 18

def confusion_matrix(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
    """
    Compute confusion matrix for binary classification.
    Returns: 2x2 array: [[TN, FP], [FN, TP]]
    """
    # YOUR CODE HERE
    pass

# Test
y_true = np.array([0, 0, 1, 1, 1])
y_pred = np.array([0, 1, 0, 1, 1])
confusion_matrix(y_true, y_pred)

Code cell 19

def precision_recall_f1(y_true: np.ndarray, y_pred: np.ndarray) -> Dict[str, float]:
    """
    Calculate precision, recall, and F1 score.
    
    Precision = TP / (TP + FP)
    Recall = TP / (TP + FN)
    F1 = 2 * (precision * recall) / (precision + recall)
    """
    # YOUR CODE HERE
    pass

# Test
precision_recall_f1(y_true, y_pred)

Solutions

Uncomment to check your work

Code cell 21

# # Solution 1: Array Operations
# def create_identity_matrix(n: int) -> np.ndarray:
#     return np.eye(n)

# def normalize_array(arr: np.ndarray) -> np.ndarray:
#     return (arr - arr.mean()) / arr.std()

# def min_max_scale(arr: np.ndarray) -> np.ndarray:
#     return (arr - arr.min()) / (arr.max() - arr.min())

# # Solution 2: Statistics
# def compute_statistics(arr: np.ndarray) -> Dict[str, float]:
#     return {
#         'mean': float(np.mean(arr)),
#         'median': float(np.median(arr)),
#         'std': float(np.std(arr)),
#         'var': float(np.var(arr)),
#         'min': float(np.min(arr)),
#         'max': float(np.max(arr)),
#         'range': float(np.max(arr) - np.min(arr))
#     }

# # Solution 3: Data Cleaning
# def find_missing_values(data: np.ndarray) -> Tuple[int, np.ndarray]:
#     mask = np.isnan(data)
#     return int(mask.sum()), np.where(mask)[0]

# def fill_missing_with_mean(data: np.ndarray) -> np.ndarray:
#     result = data.copy()
#     mean_val = np.nanmean(data)
#     result[np.isnan(result)] = mean_val
#     return result

Skill Check

Test this lesson

Answer 4 quick questions to lock in the lesson and feed your adaptive practice queue.

--
Score
0/4
Answered
Not attempted
Status
1

Which module does this lesson belong to?

2

Which section is covered in this lesson content?

3

Which term is most central to this lesson?

4

What is the best way to use this lesson for real learning?

Your answers save locally first, then sync when account storage is available.
Practice queue