Ml Exercises

ml exercises

1 min read18 headings

Converted from 05_ml_exercises.ipynb for web reading.

Code cell 1

import numpy as np
from typing import List, Tuple, Dict, Any, Optional

Exercise 1: NumPy Array Operations

Create functions for common array operations.

Code cell 3

def create_identity_matrix(n: int) -> np.ndarray:
    """
    Create an n x n identity matrix.
    
    Example:
        >>> create_identity_matrix(3)
        array([[1., 0., 0.],
               [0., 1., 0.],
               [0., 0., 1.]])
    """
    # YOUR CODE HERE
    pass

# Test
create_identity_matrix(3)

Code cell 4

def normalize_array(arr: np.ndarray) -> np.ndarray:
    """
    Normalize array to have mean=0 and std=1 (z-score normalization).
    Formula: (x - mean) / std
    """
    # YOUR CODE HERE
    pass

# Test
arr = np.array([10, 20, 30, 40, 50])
normalize_array(arr)

Code cell 5

def min_max_scale(arr: np.ndarray) -> np.ndarray:
    """
    Scale array to range [0, 1].
    Formula: (x - min) / (max - min)
    """
    # YOUR CODE HERE
    pass

# Test
arr = np.array([10, 20, 30, 40, 50])
min_max_scale(arr)

Exercise 2: Matrix Operations

Code cell 7

def compute_statistics(arr: np.ndarray) -> Dict[str, float]:
    """
    Compute common statistics for an array.
    Returns: Dictionary with mean, median, std, var, min, max, range
    """
    # YOUR CODE HERE
    pass

# Test
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
compute_statistics(arr)

Exercise 3: Data Cleaning

Code cell 9

def find_missing_values(data: np.ndarray) -> Tuple[int, np.ndarray]:
    """
    Find missing values (NaN) in array.
    Returns: (count of missing, indices of missing values)
    """
    # YOUR CODE HERE
    pass

# Test
data = np.array([1, np.nan, 3, np.nan, 5])
find_missing_values(data)

Code cell 10

def fill_missing_with_mean(data: np.ndarray) -> np.ndarray:
    """
    Replace NaN values with the mean of non-NaN values.
    """
    # YOUR CODE HERE
    pass

# Test
data = np.array([1, np.nan, 3, np.nan, 5])
fill_missing_with_mean(data)

Exercise 4: Simple Linear Regression from Scratch

Code cell 12

class SimpleLinearRegression:
    """
    Simple linear regression using ordinary least squares.
    Formula: y = mx + b
    
    m = sum((x - x_mean)(y - y_mean)) / sum((x - x_mean)^2)
    b = y_mean - m * x_mean
    """
    
    def __init__(self):
        self.slope = None
        self.intercept = None
    
    def fit(self, X: np.ndarray, y: np.ndarray):
        """Fit the model to training data."""
        # YOUR CODE HERE
        pass
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        """Make predictions."""
        # YOUR CODE HERE
        pass
    
    def r_squared(self, X: np.ndarray, y: np.ndarray) -> float:
        """Calculate R² score."""
        # YOUR CODE HERE
        pass

Code cell 13

# Test Linear Regression
X = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 6, 8, 10])

model = SimpleLinearRegression()
model.fit(X, y)

print(f"Slope: {model.slope}")
print(f"Intercept: {model.intercept}")
print(f"Predictions: {model.predict(np.array([6, 7]))}")

Exercise 5: K-Nearest Neighbors from Scratch

Code cell 15

class SimpleKNN:
    """
    K-Nearest Neighbors classifier.
    """
    
    def __init__(self, k: int = 3):
        self.k = k
        self.X_train = None
        self.y_train = None
    
    def fit(self, X: np.ndarray, y: np.ndarray):
        """Store training data."""
        # YOUR CODE HERE
        pass
    
    def _euclidean_distance(self, x1: np.ndarray, x2: np.ndarray) -> float:
        """Calculate Euclidean distance between two points."""
        # YOUR CODE HERE
        pass
    
    def _predict_single(self, x: np.ndarray) -> int:
        """Predict class for a single sample."""
        # YOUR CODE HERE
        pass
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        """Predict classes for multiple samples."""
        # YOUR CODE HERE
        pass

Code cell 16

# Test KNN
X_train = np.array([[1, 1], [2, 2], [3, 3], [4, 4]])
y_train = np.array([0, 0, 1, 1])

knn = SimpleKNN(k=3)
knn.fit(X_train, y_train)
print(f"Prediction for [2.5, 2.5]: {knn.predict(np.array([[2.5, 2.5]]))}")

Exercise 6: Confusion Matrix and Metrics

Code cell 18

def confusion_matrix(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
    """
    Compute confusion matrix for binary classification.
    Returns: 2x2 array: [[TN, FP], [FN, TP]]
    """
    # YOUR CODE HERE
    pass

# Test
y_true = np.array([0, 0, 1, 1, 1])
y_pred = np.array([0, 1, 0, 1, 1])
confusion_matrix(y_true, y_pred)

Code cell 19

def precision_recall_f1(y_true: np.ndarray, y_pred: np.ndarray) -> Dict[str, float]:
    """
    Calculate precision, recall, and F1 score.
    
    Precision = TP / (TP + FP)
    Recall = TP / (TP + FN)
    F1 = 2 * (precision * recall) / (precision + recall)
    """
    # YOUR CODE HERE
    pass

# Test
precision_recall_f1(y_true, y_pred)

Solutions

Uncomment to check your work

Code cell 21

# # Solution 1: Array Operations
# def create_identity_matrix(n: int) -> np.ndarray:
#     return np.eye(n)

# def normalize_array(arr: np.ndarray) -> np.ndarray:
#     return (arr - arr.mean()) / arr.std()

# def min_max_scale(arr: np.ndarray) -> np.ndarray:
#     return (arr - arr.min()) / (arr.max() - arr.min())

# # Solution 2: Statistics
# def compute_statistics(arr: np.ndarray) -> Dict[str, float]:
#     return {
#         'mean': float(np.mean(arr)),
#         'median': float(np.median(arr)),
#         'std': float(np.std(arr)),
#         'var': float(np.var(arr)),
#         'min': float(np.min(arr)),
#         'max': float(np.max(arr)),
#         'range': float(np.max(arr) - np.min(arr))
#     }

# # Solution 3: Data Cleaning
# def find_missing_values(data: np.ndarray) -> Tuple[int, np.ndarray]:
#     mask = np.isnan(data)
#     return int(mask.sum()), np.where(mask)[0]

# def fill_missing_with_mean(data: np.ndarray) -> np.ndarray:
#     result = data.copy()
#     mean_val = np.nanmean(data)
#     result[np.isnan(result)] = mean_val
#     return result

Code cell 1

Exercise 1: NumPy Array Operations

Code cell 3

Code cell 4

Code cell 5

Exercise 2: Matrix Operations

Code cell 7

Exercise 3: Data Cleaning

Code cell 9

Code cell 10

Exercise 4: Simple Linear Regression from Scratch

Code cell 12

Code cell 13

Exercise 5: K-Nearest Neighbors from Scratch

Code cell 15

Code cell 16

Exercise 6: Confusion Matrix and Metrics

Code cell 18

Code cell 19

Solutions

Code cell 21

Test this lesson

Which module does this lesson belong to?

Which section is covered in this lesson content?

Which term is most central to this lesson?

What is the best way to use this lesson for real learning?