pandas basics
1 min read18 headings
Converted from
02_pandas_basics.ipynbfor web reading.
Code cell 1
import numpy as np
import pandas as pd
print(f"Pandas version: {pd.__version__}")
Series
Code cell 3
s = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])
print(f"Series:\n{s}")
print(f"\ns['c'] = {s['c']}")
print(f"\ns > 25:\n{s[s > 25]}")
DataFrame Creation
Code cell 5
# From dictionary
data = {
'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'],
'age': [25, 30, 35, 28, 32],
'city': ['NYC', 'LA', 'Chicago', 'NYC', 'LA'],
'salary': [50000, 60000, 75000, 55000, 65000]
}
df = pd.DataFrame(data)
df
Code cell 6
print(f"Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print(f"\nDtypes:\n{df.dtypes}")
Data Selection
Code cell 8
print(f"df['name']:\n{df['name']}")
print(f"\ndf[['name', 'salary']]:\n{df[['name', 'salary']]}")
Code cell 9
print(f"df.loc[0] (first row):\n{df.loc[0]}")
print(f"\ndf.iloc[0:2] (first 2 rows):\n{df.iloc[0:2]}")
print(f"\ndf.loc[1, 'salary'] = {df.loc[1, 'salary']}")
Filtering
Code cell 11
# Boolean conditions
high_salary = df[df['salary'] > 55000]
print(f"Salary > 55000:\n{high_salary}")
Code cell 12
nyc_young = df[(df['city'] == 'NYC') & (df['age'] < 30)]
print(f"NYC and age < 30:\n{nyc_young}")
Adding and Modifying Columns
Code cell 14
df['bonus'] = df['salary'] * 0.1
df['total_comp'] = df['salary'] + df['bonus']
df['age_group'] = df['age'].apply(lambda x: 'Young' if x < 30 else 'Senior')
df
Grouping and Aggregation
Code cell 16
# Group by single column
by_city = df.groupby('city')['salary'].agg(['mean', 'sum', 'count'])
print(f"Salary by city:\n{by_city}")
Code cell 17
# Multiple aggregations
agg_result = df.groupby('city').agg({
'salary': ['mean', 'max'],
'age': 'mean'
})
agg_result
Missing Data
Code cell 19
df_missing = pd.DataFrame({
'A': [1, 2, np.nan, 4],
'B': [5, np.nan, np.nan, 8],
'C': [9, 10, 11, 12]
})
print(f"With missing data:\n{df_missing}")
print(f"\nMissing count:\n{df_missing.isna().sum()}")
Code cell 20
# Fill missing values
filled = df_missing.fillna(df_missing.mean())
print(f"Filled with mean:\n{filled}")
Merge Example
Code cell 22
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value1': [1, 2, 3]})
df2 = pd.DataFrame({'key': ['A', 'B', 'D'], 'value2': [4, 5, 6]})
print(f"df1:\n{df1}")
print(f"\ndf2:\n{df2}")
Code cell 23
merged = pd.merge(df1, df2, on='key', how='outer')
print(f"Outer merge:\n{merged}")