This solution processes a CSV file of Indian customer details and applies conditional filtering, text-based filters, and column derivations.
import pandas as pd
# Load CSV
df = pd.read_csv("files_indian_customer_data.csv")
# Convert 'Date of Registration' to datetime
df['Date of Registration'] = pd.to_datetime(df['Date of Registration'], errors='coerce')
# Customers older than 40
over_40 = df[df['Age'] > 40]
# Tamil Nadu residents under 35
tn_under_35 = df[(df['State'].str.lower() == 'tamil nadu') & (df['Age'] < 35)]
# PAN starts with 'P'
pan_starts_p = df[df['PAN'].str.startswith('P')]
# Aadhaar contains '5405'
aadhaar_contains = df[df['Aadhaar'].str.contains('5405')]
# Email ends with @gmail.com
gmail_users = df[df['Email'].str.endswith('@gmail.com')]
# Address contains 'Nagar'
address_nagar = df[df['Address'].str.contains('Nagar', case=False, na=False)]
# Names start with 'L'
name_starts_l = df[df['Full Name'].str.startswith('L')]
# City name longer than 7 characters
long_city_names = df[df['City'].str.len() > 7]
# Registration month name
df['RegistrationMonth'] = df['Date of Registration'].dt.strftime('%B')
# ShortCode: first 3 letters of name + last 4 digits of Aadhaar
df['ShortCode'] = df['Full Name'].str[:3].str.upper() + df['Aadhaar'].str[-4:]
# Customers registered in 2024
registered_2024 = df[df['Date of Registration'].dt.year == 2024]
startswith, contains, and len().dt.year on datetime fields.