InĀ [1]:
import pandas as pd
import matplotlib.pyplot as plt
# Load dataset
df = pd.read_csv('netflix_titles.csv')
InĀ [2]:
# Filter for Movies and clean durations
movies_df = df[df['type'] == 'Movie'].copy()
movies_df['duration_mins'] = movies_df['duration'].str.extract(r'(\d+)').astype(float)
# Plot histogram
plt.figure(figsize=(10, 5))
plt.hist(movies_df['duration_mins'].dropna(), bins=30, color='skyblue', edgecolor='black')
plt.title('Distribution of Movie Durations (in Minutes)')
plt.xlabel('Duration (minutes)')
plt.ylabel('Number of Movies')
plt.grid(True)
plt.tight_layout()
plt.show()
InĀ [3]:
# Count number of genres per title
df['genre_count'] = df['listed_in'].fillna('').apply(lambda x: len(x.split(',')))
# Plot histogram
plt.figure(figsize=(8, 5))
plt.hist(df['genre_count'], bins=range(1, df['genre_count'].max() + 2), color='lightcoral', edgecolor='black', align='left')
plt.title('Distribution of Number of Genres per Title')
plt.xlabel('Number of Genres')
plt.ylabel('Number of Titles')
plt.xticks(range(1, df['genre_count'].max() + 1))
plt.grid(True)
plt.tight_layout()
plt.show()