You’re the Expert!

pandas

Cheatsheets

Create a Pandas DataFrame and Display it
import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24],
    'Channel': ['TVS', 'Educational', 'Channel', 'Pynfinity']
}

df = pd.DataFrame(data)
print(df)
copy to clipboard
execute code
Code console output
Select a Column from DataFrame
import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24]
}

df = pd.DataFrame(data)

# Select a column
print(df['Name'])
copy to clipboard
execute code
Code console output
Filter Rows Based on Condition
import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24]
}

df = pd.DataFrame(data)

# Filter rows where Age is greater than 24
filtered_df = df[df['Age'] > 24]
print(filtered_df)
copy to clipboard
execute code
Code console output
Adding New Column to DataFrame
import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24]
}

df = pd.DataFrame(data)

# Add a new column
df['Channel'] = ['TVS', 'Educational', 'Channel', 'Pynfinity']
print(df)
copy to clipboard
execute code
Code console output
Handle Missing Data in Pandas
import pandas as pd
import numpy as np

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', np.nan],
    'Age': [25, 30, np.nan, 24]
}

df = pd.DataFrame(data)

# Fill missing data
df['Name'].fillna('Unknown', inplace=True)
df['Age'].fillna(df['Age'].mean(), inplace=True)

print(df)
copy to clipboard
execute code
Code console output

Group Data by Column
import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24],
    'Channel': ['TVS', 'Educational', 'Channel', 'Pynfinity']
}

df = pd.DataFrame(data)

# Group by Channel and calculate mean age
grouped = df.groupby('Channel')['Age'].mean()
print(grouped)
copy to clipboard
execute code
Code console output
Sorting Data in DataFrame
import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24]
}

df = pd.DataFrame(data)

# Sort by Age in descending order
sorted_df = df.sort_values(by='Age', ascending=False)
print(sorted_df)
copy to clipboard
execute code
Code console output
Merge Two DataFrames
import pandas as pd

data1 = {
    'Name': ['Santosh', 'Kumar', 'Dhruv'],
    'Age': [25, 30, 22]
}
data2 = {
    'Name': ['Santosh', 'Kumar', 'Dhruv'],
    'Channel': ['TVS', 'Educational', 'Channel']
}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

# Merge the DataFrames on 'Name'
merged_df = pd.merge(df1, df2, on='Name')
print(merged_df)
copy to clipboard
execute code
Code console output
Apply Functions to Columns
import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv'],
    'Age': [25, 30, 22]
}

df = pd.DataFrame(data)

# Apply a function to the Age column
df['Age'] = df['Age'].apply(lambda x: x + 1)
print(df)
copy to clipboard
execute code
Code console output

Pivot Table in Pandas
import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24],
    'Channel': ['TVS', 'Educational', 'Channel', 'Pynfinity']
}

df = pd.DataFrame(data)

# Create a pivot table
pivot_table = df.pivot_table(values='Age', index='Channel', aggfunc='mean')
print(pivot_table)
copy to clipboard
execute code
Code console output
Handle Duplicates in DataFrame
import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 22, 24]
}

df = pd.DataFrame(data)

# Remove duplicates based on 'Name'
df_no_duplicates = df.drop_duplicates(subset='Name')
print(df_no_duplicates)
copy to clipboard
execute code
Code console output
Save DataFrame to CSV
import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv'],
    'Age': [25, 30, 22]
}

df = pd.DataFrame(data)

# Save DataFrame to CSV file
df.to_csv('output.csv', index=False)
print("DataFrame saved to CSV.")
copy to clipboard
execute code
Code console output
Load DataFrame from CSV
import pandas as pd

# Load data from a CSV file
df = pd.read_csv('output.csv')
print(df)
copy to clipboard
execute code
Code console output