Pandas Cheatsheet¶

Author: Thomas Uhuru¶

Quick reference for essential Pandas operations in data analysis and manipulation.

In [59]:
import pandas as pd

1️⃣ Creating DataFrames & Series¶

In [60]:
s = pd.Series([10, 20, 30, 40])  # Create Series
In [61]:
data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35]}
df = pd.DataFrame(data)  # Create DataFrame
In [62]:
pd.read_csv('output.csv')  # Load CSV file
Out[62]:
Name Age Salary Bonus
0 Alice 25 50000 5000.0
1 Bob 30 60000 6000.0
2 Charlie 35 70000 7000.0

2️⃣ Inspecting Data¶

In [63]:
df.head()
Out[63]:
Name Age
0 Alice 25
1 Bob 30
2 Charlie 35
In [64]:
df.tail()
Out[64]:
Name Age
0 Alice 25
1 Bob 30
2 Charlie 35
In [65]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 180.0+ bytes
In [66]:
df.describe()
Out[66]:
Age
count 3.0
mean 30.0
std 5.0
min 25.0
25% 27.5
50% 30.0
75% 32.5
max 35.0

3️⃣ Selecting Data¶

In [67]:
df['Name']  # Select single column
Out[67]:
0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
In [68]:
df[['Name', 'Age']]  # Select multiple columns
Out[68]:
Name Age
0 Alice 25
1 Bob 30
2 Charlie 35
In [69]:
df.loc[0]  # Select row by label
Out[69]:
Name    Alice
Age        25
Name: 0, dtype: object
In [70]:
df.iloc[0]  # Select row by index
Out[70]:
Name    Alice
Age        25
Name: 0, dtype: object

4️⃣ Filtering & Conditional Selection¶

In [71]:
df[df['Age'] > 25]  # Filter rows
Out[71]:
Name Age
1 Bob 30
2 Charlie 35
In [72]:
df[(df['Age'] > 25) & (df['Name'] == 'Bob')]
Out[72]:
Name Age
1 Bob 30

5️⃣ Adding & Modifying Columns¶

In [73]:
df['Salary'] = [50000, 60000, 70000]
In [74]:
df['Bonus'] = df['Salary'] * 0.1

6️⃣ Handling Missing Data¶

In [75]:
df.isnull().sum()
Out[75]:
Name      0
Age       0
Salary    0
Bonus     0
dtype: int64
In [76]:
df.fillna(0)  # Replace NaN with 0
Out[76]:
Name Age Salary Bonus
0 Alice 25 50000 5000.0
1 Bob 30 60000 6000.0
2 Charlie 35 70000 7000.0
In [77]:
df.dropna()  # Drop rows with NaN values
Out[77]:
Name Age Salary Bonus
0 Alice 25 50000 5000.0
1 Bob 30 60000 6000.0
2 Charlie 35 70000 7000.0

7️⃣ Grouping & Aggregation¶

In [78]:
df.groupby('Name')['Salary'].mean()
Out[78]:
Name
Alice      50000.0
Bob        60000.0
Charlie    70000.0
Name: Salary, dtype: float64
In [79]:
df.groupby('Name').agg({'Salary': ['mean', 'max']})
Out[79]:
Salary
mean max
Name
Alice 50000.0 50000
Bob 60000.0 60000
Charlie 70000.0 70000

8️⃣ Sorting & Ranking¶

In [80]:
df.sort_values('Age', ascending=False)
Out[80]:
Name Age Salary Bonus
2 Charlie 35 70000 7000.0
1 Bob 30 60000 6000.0
0 Alice 25 50000 5000.0
In [81]:
df.sort_index()
Out[81]:
Name Age Salary Bonus
0 Alice 25 50000 5000.0
1 Bob 30 60000 6000.0
2 Charlie 35 70000 7000.0

9️⃣ Merging, Joining & Concatenation¶

In [82]:
df1 = pd.DataFrame({'ID': [1, 2], 'Name': ['Alice', 'Bob']})
df2 = pd.DataFrame({'ID': [1, 2], 'Salary': [50000, 60000]})
pd.merge(df1, df2, on='ID')
Out[82]:
ID Name Salary
0 1 Alice 50000
1 2 Bob 60000
In [83]:
pd.concat([df1, df2], axis=1)  # Concatenate horizontally
Out[83]:
ID Name ID Salary
0 1 Alice 1 50000
1 2 Bob 2 60000

🔟 Exporting Data¶

In [84]:
df.to_csv('output.csv', index=False)
In [85]:
pd.read_csv('output.csv')
Out[85]:
Name Age Salary Bonus
0 Alice 25 50000 5000.0
1 Bob 30 60000 6000.0
2 Charlie 35 70000 7000.0