In [59]:
import pandas as pd
1️⃣ Creating DataFrames & Series¶
In [60]:
s = pd.Series([10, 20, 30, 40]) # Create Series
In [61]:
data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35]}
df = pd.DataFrame(data) # Create DataFrame
In [62]:
pd.read_csv('output.csv') # Load CSV file
Out[62]:
| Name | Age | Salary | Bonus | |
|---|---|---|---|---|
| 0 | Alice | 25 | 50000 | 5000.0 |
| 1 | Bob | 30 | 60000 | 6000.0 |
| 2 | Charlie | 35 | 70000 | 7000.0 |
2️⃣ Inspecting Data¶
In [63]:
df.head()
Out[63]:
| Name | Age | |
|---|---|---|
| 0 | Alice | 25 |
| 1 | Bob | 30 |
| 2 | Charlie | 35 |
In [64]:
df.tail()
Out[64]:
| Name | Age | |
|---|---|---|
| 0 | Alice | 25 |
| 1 | Bob | 30 |
| 2 | Charlie | 35 |
In [65]:
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3 entries, 0 to 2 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Name 3 non-null object 1 Age 3 non-null int64 dtypes: int64(1), object(1) memory usage: 180.0+ bytes
In [66]:
df.describe()
Out[66]:
| Age | |
|---|---|
| count | 3.0 |
| mean | 30.0 |
| std | 5.0 |
| min | 25.0 |
| 25% | 27.5 |
| 50% | 30.0 |
| 75% | 32.5 |
| max | 35.0 |
3️⃣ Selecting Data¶
In [67]:
df['Name'] # Select single column
Out[67]:
0 Alice 1 Bob 2 Charlie Name: Name, dtype: object
In [68]:
df[['Name', 'Age']] # Select multiple columns
Out[68]:
| Name | Age | |
|---|---|---|
| 0 | Alice | 25 |
| 1 | Bob | 30 |
| 2 | Charlie | 35 |
In [69]:
df.loc[0] # Select row by label
Out[69]:
Name Alice Age 25 Name: 0, dtype: object
In [70]:
df.iloc[0] # Select row by index
Out[70]:
Name Alice Age 25 Name: 0, dtype: object
4️⃣ Filtering & Conditional Selection¶
In [71]:
df[df['Age'] > 25] # Filter rows
Out[71]:
| Name | Age | |
|---|---|---|
| 1 | Bob | 30 |
| 2 | Charlie | 35 |
In [72]:
df[(df['Age'] > 25) & (df['Name'] == 'Bob')]
Out[72]:
| Name | Age | |
|---|---|---|
| 1 | Bob | 30 |
5️⃣ Adding & Modifying Columns¶
In [73]:
df['Salary'] = [50000, 60000, 70000]
In [74]:
df['Bonus'] = df['Salary'] * 0.1
6️⃣ Handling Missing Data¶
In [75]:
df.isnull().sum()
Out[75]:
Name 0 Age 0 Salary 0 Bonus 0 dtype: int64
In [76]:
df.fillna(0) # Replace NaN with 0
Out[76]:
| Name | Age | Salary | Bonus | |
|---|---|---|---|---|
| 0 | Alice | 25 | 50000 | 5000.0 |
| 1 | Bob | 30 | 60000 | 6000.0 |
| 2 | Charlie | 35 | 70000 | 7000.0 |
In [77]:
df.dropna() # Drop rows with NaN values
Out[77]:
| Name | Age | Salary | Bonus | |
|---|---|---|---|---|
| 0 | Alice | 25 | 50000 | 5000.0 |
| 1 | Bob | 30 | 60000 | 6000.0 |
| 2 | Charlie | 35 | 70000 | 7000.0 |
7️⃣ Grouping & Aggregation¶
In [78]:
df.groupby('Name')['Salary'].mean()
Out[78]:
Name Alice 50000.0 Bob 60000.0 Charlie 70000.0 Name: Salary, dtype: float64
In [79]:
df.groupby('Name').agg({'Salary': ['mean', 'max']})
Out[79]:
| Salary | ||
|---|---|---|
| mean | max | |
| Name | ||
| Alice | 50000.0 | 50000 |
| Bob | 60000.0 | 60000 |
| Charlie | 70000.0 | 70000 |
8️⃣ Sorting & Ranking¶
In [80]:
df.sort_values('Age', ascending=False)
Out[80]:
| Name | Age | Salary | Bonus | |
|---|---|---|---|---|
| 2 | Charlie | 35 | 70000 | 7000.0 |
| 1 | Bob | 30 | 60000 | 6000.0 |
| 0 | Alice | 25 | 50000 | 5000.0 |
In [81]:
df.sort_index()
Out[81]:
| Name | Age | Salary | Bonus | |
|---|---|---|---|---|
| 0 | Alice | 25 | 50000 | 5000.0 |
| 1 | Bob | 30 | 60000 | 6000.0 |
| 2 | Charlie | 35 | 70000 | 7000.0 |
9️⃣ Merging, Joining & Concatenation¶
In [82]:
df1 = pd.DataFrame({'ID': [1, 2], 'Name': ['Alice', 'Bob']})
df2 = pd.DataFrame({'ID': [1, 2], 'Salary': [50000, 60000]})
pd.merge(df1, df2, on='ID')
Out[82]:
| ID | Name | Salary | |
|---|---|---|---|
| 0 | 1 | Alice | 50000 |
| 1 | 2 | Bob | 60000 |
In [83]:
pd.concat([df1, df2], axis=1) # Concatenate horizontally
Out[83]:
| ID | Name | ID | Salary | |
|---|---|---|---|---|
| 0 | 1 | Alice | 1 | 50000 |
| 1 | 2 | Bob | 2 | 60000 |
🔟 Exporting Data¶
In [84]:
df.to_csv('output.csv', index=False)
In [85]:
pd.read_csv('output.csv')
Out[85]:
| Name | Age | Salary | Bonus | |
|---|---|---|---|---|
| 0 | Alice | 25 | 50000 | 5000.0 |
| 1 | Bob | 30 | 60000 | 6000.0 |
| 2 | Charlie | 35 | 70000 | 7000.0 |