Docs
debugging profiling
š Debugging and Profiling
š What You'll Learn
- ā¢Debugging techniques and tools
- ā¢Using pdb and VS Code debugger
- ā¢Logging best practices
- ā¢CPU and memory profiling
- ā¢Finding and fixing performance issues
š Debugging Strategies
1. Print Debugging (Quick & Simple)
def calculate(x, y):
print(f"DEBUG: x={x}, y={y}") # Simple debug print
result = x * y
print(f"DEBUG: result={result}")
return result
# Better: Use f-strings with variable names
name = "Alice"
age = 30
print(f"{name=}, {age=}") # Python 3.8+: name='Alice', age=30
2. Built-in Debugger (pdb)
import pdb
def problematic_function(data):
result = []
for item in data:
# Set breakpoint
pdb.set_trace() # Old way
# Or use built-in:
breakpoint() # Python 3.7+ (preferred)
processed = item * 2
result.append(processed)
return result
# PDB Commands:
# n (next) - Execute next line
# s (step) - Step into function
# c (continue) - Continue until next breakpoint
# p expr - Print expression
# pp expr - Pretty print expression
# l (list) - Show source code
# w (where) - Show call stack
# q (quit) - Quit debugger
# h (help) - Show help
3. Post-Mortem Debugging
import pdb
import traceback
def main():
try:
# Code that might fail
result = risky_operation()
except Exception as e:
print(f"Error: {e}")
traceback.print_exc()
pdb.post_mortem() # Debug at point of failure
# Or run script with pdb
# python -m pdb script.py
šÆ VS Code Debugger
Launch Configuration
Create .vscode/launch.json:
{
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
},
{
"name": "Python: FastAPI",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"args": ["main:app", "--reload"],
"jinja": true
},
{
"name": "Python: Flask",
"type": "debugpy",
"request": "launch",
"module": "flask",
"env": {
"FLASK_APP": "app.py",
"FLASK_DEBUG": "1"
},
"args": ["run"]
},
{
"name": "Python: pytest",
"type": "debugpy",
"request": "launch",
"module": "pytest",
"args": ["-v", "tests/"]
}
]
}
Debugging Features
| Feature | Description |
|---|---|
| Breakpoints | Click left of line number |
| Conditional Breakpoints | Right-click ā Conditional |
| Logpoints | Print without stopping |
| Watch | Monitor variable values |
| Call Stack | See function call history |
| Variables | Inspect all variables |
š Logging Best Practices
Basic Logging Setup
import logging
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('app.log'),
logging.StreamHandler() # Also print to console
]
)
logger = logging.getLogger(__name__)
# Log levels (from lowest to highest)
logger.debug("Detailed info for debugging")
logger.info("General information")
logger.warning("Something unexpected happened")
logger.error("An error occurred")
logger.critical("Critical error - app may crash")
# Log with exception info
try:
result = 1 / 0
except Exception:
logger.exception("Division failed") # Includes traceback
Advanced Logging Configuration
import logging
import logging.config
import json
from datetime import datetime
# Dictionary configuration
LOGGING_CONFIG = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"standard": {
"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
},
"json": {
"class": "pythonjsonlogger.jsonlogger.JsonFormatter",
"format": "%(asctime)s %(name)s %(levelname)s %(message)s"
}
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"level": "DEBUG",
"formatter": "standard",
"stream": "ext://sys.stdout"
},
"file": {
"class": "logging.handlers.RotatingFileHandler",
"level": "INFO",
"formatter": "standard",
"filename": "app.log",
"maxBytes": 10485760, # 10MB
"backupCount": 5
}
},
"loggers": {
"": { # Root logger
"handlers": ["console", "file"],
"level": "DEBUG"
},
"urllib3": { # Quiet noisy libraries
"level": "WARNING"
}
}
}
logging.config.dictConfig(LOGGING_CONFIG)
logger = logging.getLogger(__name__)
Structured Logging with Context
import logging
import json
from contextvars import ContextVar
# Context variable for request ID
request_id: ContextVar[str] = ContextVar('request_id', default='')
class ContextFilter(logging.Filter):
def filter(self, record):
record.request_id = request_id.get()
return True
# Custom JSON formatter
class JSONFormatter(logging.Formatter):
def format(self, record):
log_obj = {
"timestamp": self.formatTime(record),
"level": record.levelname,
"message": record.getMessage(),
"module": record.module,
"request_id": getattr(record, 'request_id', '')
}
if record.exc_info:
log_obj["exception"] = self.formatException(record.exc_info)
return json.dumps(log_obj)
# Usage
logger = logging.getLogger(__name__)
logger.addFilter(ContextFilter())
# In request handler
request_id.set("req-12345")
logger.info("Processing request") # Includes request_id
ā±ļø CPU Profiling
cProfile (Built-in)
import cProfile
import pstats
from pstats import SortKey
def slow_function():
total = 0
for i in range(1000000):
total += i ** 2
return total
# Profile a function
cProfile.run('slow_function()', 'output.prof')
# Analyze results
stats = pstats.Stats('output.prof')
stats.strip_dirs()
stats.sort_stats(SortKey.CUMULATIVE)
stats.print_stats(10) # Top 10 functions
# Or from command line:
# python -m cProfile -s cumulative script.py
line_profiler (Line-by-Line)
pip install line_profiler
# Add @profile decorator to functions
@profile
def slow_function():
result = []
for i in range(10000):
result.append(i ** 2)
return sum(result)
# Run with:
# kernprof -l -v script.py
Using timeit
import timeit
# Time a small code snippet
time = timeit.timeit(
'sum(range(1000))',
number=10000
)
print(f"Average: {time / 10000:.6f} seconds")
# Compare approaches
setup = "data = list(range(1000))"
time1 = timeit.timeit('sum(data)', setup=setup, number=10000)
time2 = timeit.timeit(
'total = 0\nfor x in data: total += x',
setup=setup,
number=10000
)
print(f"sum(): {time1:.4f}s")
print(f"loop: {time2:.4f}s")
š¾ Memory Profiling
memory_profiler
pip install memory_profiler
from memory_profiler import profile
@profile
def memory_hungry():
# Create large list
data = [i ** 2 for i in range(1000000)]
# Process data
filtered = [x for x in data if x % 2 == 0]
return sum(filtered)
# Run: python -m memory_profiler script.py
tracemalloc (Built-in)
import tracemalloc
# Start tracing
tracemalloc.start()
# Your code here
data = [i ** 2 for i in range(100000)]
processed = [x for x in data if x > 1000]
# Get current memory usage
current, peak = tracemalloc.get_traced_memory()
print(f"Current memory: {current / 1024 / 1024:.2f} MB")
print(f"Peak memory: {peak / 1024 / 1024:.2f} MB")
# Get top memory consumers
snapshot = tracemalloc.take_snapshot()
top_stats = snapshot.statistics('lineno')
print("\nTop 5 memory consumers:")
for stat in top_stats[:5]:
print(stat)
tracemalloc.stop()
objgraph (Object Analysis)
pip install objgraph
import objgraph
# Show most common types
objgraph.show_most_common_types(limit=10)
# Find objects by type
lists = objgraph.by_type('list')
print(f"Number of lists: {len(lists)}")
# Find reference chains (for debugging memory leaks)
# objgraph.show_backrefs(obj, max_depth=3)
š„ Common Performance Issues
1. N+1 Query Problem
# BAD: N+1 queries
for user in users:
print(user.posts) # Each access = 1 query!
# GOOD: Eager loading
users = User.query.options(joinedload(User.posts)).all()
for user in users:
print(user.posts) # No additional queries
2. Inefficient Loops
# BAD: Repeated concatenation
result = ""
for item in items:
result += str(item) # O(n²) - creates new string each time
# GOOD: Use join
result = "".join(str(item) for item in items) # O(n)
# BAD: Repeated list append with index check
for item in items:
if item not in result_list: # O(n) check each time!
result_list.append(item)
# GOOD: Use set
result_set = set(items) # O(n) total
3. Memory-Inefficient Data Loading
# BAD: Load entire file into memory
with open("huge_file.txt") as f:
data = f.read() # Entire file in memory!
for line in data.split("\n"):
process(line)
# GOOD: Process line by line
with open("huge_file.txt") as f:
for line in f: # Reads one line at a time
process(line)
# GOOD: Use generators for large data
def process_large_file(filename):
with open(filename) as f:
for line in f:
yield process(line)
4. Blocking I/O
import asyncio
import aiohttp
# BAD: Sequential requests
def fetch_all_sync(urls):
results = []
for url in urls:
response = requests.get(url) # Blocks!
results.append(response.text)
return results
# GOOD: Concurrent requests
async def fetch_all_async(urls):
async with aiohttp.ClientSession() as session:
tasks = [session.get(url) for url in urls]
responses = await asyncio.gather(*tasks)
return [await r.text() for r in responses]
š ļø Debugging Checklist
When Something Doesn't Work:
- ⢠Read the error message carefully
- ⢠Check the line number in traceback
- ⢠Add print statements or breakpoints
- ⢠Check input values
- ⢠Verify assumptions about data types
- ⢠Check edge cases (empty, None, negative)
- ⢠Look at recent changes
- ⢠Search the error online
When Code is Slow:
- ⢠Profile first, optimize later
- ⢠Check for N+1 queries
- ⢠Look for unnecessary loops
- ⢠Consider caching
- ⢠Check I/O operations
- ⢠Consider async for I/O-bound tasks
- ⢠Use appropriate data structures
- ⢠Batch operations when possible
š Visualization Tools
snakeviz (Profile Visualization)
pip install snakeviz
# Generate profile
python -m cProfile -o output.prof script.py
# View in browser
snakeviz output.prof
py-spy (Sampling Profiler)
pip install py-spy
# Profile running process
py-spy top --pid 12345
# Generate flame graph
py-spy record -o profile.svg -- python script.py
šÆ Next Steps
After learning debugging and profiling, proceed to 30_real_world_projects to apply everything you've learned!