pandas

Create a Pandas DataFrame and Display it

import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24],
    'Channel': ['TVS', 'Educational', 'Channel', 'Pynfinity']
}

df = pd.DataFrame(data)
print(df)

Code console output

Select a Column from DataFrame

import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24]
}

df = pd.DataFrame(data)

# Select a column
print(df['Name'])

Code console output

Filter Rows Based on Condition

import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24]
}

df = pd.DataFrame(data)

# Filter rows where Age is greater than 24
filtered_df = df[df['Age'] > 24]
print(filtered_df)

Code console output

Adding New Column to DataFrame

import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24]
}

df = pd.DataFrame(data)

# Add a new column
df['Channel'] = ['TVS', 'Educational', 'Channel', 'Pynfinity']
print(df)

Code console output

Handle Missing Data in Pandas

import pandas as pd
import numpy as np

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', np.nan],
    'Age': [25, 30, np.nan, 24]
}

df = pd.DataFrame(data)

# Fill missing data
df['Name'].fillna('Unknown', inplace=True)
df['Age'].fillna(df['Age'].mean(), inplace=True)

print(df)

Code console output

Group Data by Column

import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24],
    'Channel': ['TVS', 'Educational', 'Channel', 'Pynfinity']
}

df = pd.DataFrame(data)

# Group by Channel and calculate mean age
grouped = df.groupby('Channel')['Age'].mean()
print(grouped)

Code console output

Sorting Data in DataFrame

import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24]
}

df = pd.DataFrame(data)

# Sort by Age in descending order
sorted_df = df.sort_values(by='Age', ascending=False)
print(sorted_df)

Code console output

Merge Two DataFrames

import pandas as pd

data1 = {
    'Name': ['Santosh', 'Kumar', 'Dhruv'],
    'Age': [25, 30, 22]
}
data2 = {
    'Name': ['Santosh', 'Kumar', 'Dhruv'],
    'Channel': ['TVS', 'Educational', 'Channel']
}

df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

# Merge the DataFrames on 'Name'
merged_df = pd.merge(df1, df2, on='Name')
print(merged_df)

Code console output

Apply Functions to Columns

import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv'],
    'Age': [25, 30, 22]
}

df = pd.DataFrame(data)

# Apply a function to the Age column
df['Age'] = df['Age'].apply(lambda x: x + 1)
print(df)

Code console output

Pivot Table in Pandas

import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 24],
    'Channel': ['TVS', 'Educational', 'Channel', 'Pynfinity']
}

df = pd.DataFrame(data)

# Create a pivot table
pivot_table = df.pivot_table(values='Age', index='Channel', aggfunc='mean')
print(pivot_table)

Code console output

Handle Duplicates in DataFrame

import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv', 'Dhruv', 'Pynfinity'],
    'Age': [25, 30, 22, 22, 24]
}

df = pd.DataFrame(data)

# Remove duplicates based on 'Name'
df_no_duplicates = df.drop_duplicates(subset='Name')
print(df_no_duplicates)

Code console output

Save DataFrame to CSV

import pandas as pd

data = {
    'Name': ['Santosh', 'Kumar', 'Dhruv'],
    'Age': [25, 30, 22]
}

df = pd.DataFrame(data)

# Save DataFrame to CSV file
df.to_csv('output.csv', index=False)
print("DataFrame saved to CSV.")

Code console output

Load DataFrame from CSV

import pandas as pd

# Load data from a CSV file
df = pd.read_csv('output.csv')
print(df)

Code console output

You’re the Expert!

BASICS

INTERMEDIATE

ADVANCED