import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('netflix_titles.csv')

# Q1: How many titles are there per content type (Movie vs TV Show)?

df.groupby('type')['title'].count()

type
Movie      6131
TV Show    2676
Name: title, dtype: int64

# Q2: What is the number of titles released each year?
df.groupby('release_year')['title'].count().sort_index(ascending=False).head(10)

release_year
2021     592
2020     953
2019    1030
2018    1147
2017    1032
2016     902
2015     560
2014     352
2013     288
2012     237
Name: title, dtype: int64

# Q3: Which countries produce the most content on Netflix?

df.groupby('country')['title'].count().sort_values(ascending=False).head(10)

country
United States     2818
India              972
United Kingdom     419
Japan              245
South Korea        199
Canada             181
Spain              145
France             124
Mexico             110
Egypt              106
Name: title, dtype: int64

# Q4: What is the number of titles per rating?

df.groupby('rating')['title'].count().sort_values(ascending=False)

rating
TV-MA       3207
TV-14       2160
TV-PG        863
R            799
PG-13        490
TV-Y7        334
TV-Y         307
PG           287
TV-G         220
NR            80
G             41
TV-Y7-FV       6
UR             3
NC-17          3
74 min         1
84 min         1
66 min         1
Name: title, dtype: int64

# Q5: How many titles were added to Netflix each month of the year?

df['date_added'] = pd.to_datetime(df['date_added'], errors='coerce')
df['year_added'] = df['date_added'].dt.year.astype('Int64')
df['month_added'] = df['date_added'].dt.month_name()
df.groupby(['year_added', 'month_added'])['title'].count().sort_index(ascending=False).reset_index().head(20)

# Q6: Which directors have the most titles on Netflix?

df.groupby('director')['title'].count().sort_values(ascending=False).dropna().head(10)

director
Rajiv Chilaka             19
Raúl Campos, Jan Suter    18
Suhas Kadav               16
Marcus Raboy              16
Jay Karas                 14
Cathy Garcia-Molina       13
Jay Chapman               12
Youssef Chahine           12
Martin Scorsese           12
Steven Spielberg          11
Name: title, dtype: int64

# Q7: What are the top 5 most common genres or categories?

df_exploded = df.assign(genres=df['listed_in'].str.split(', ')).explode('genres')
df_exploded['genres'].value_counts().head(5)

genres
International Movies      2752
Dramas                    2427
Comedies                  1674
International TV Shows    1351
Documentaries              869
Name: count, dtype: int64

	year_added	month_added	title
0	2021	September	183
1	2021	May	132
2	2021	March	112
3	2021	June	207
4	2021	July	257
5	2021	January	132
6	2021	February	109
7	2021	August	178
8	2021	April	188
9	2020	September	168
10	2020	October	167
11	2020	November	154
12	2020	May	157
13	2020	March	137
14	2020	June	156
15	2020	July	146
16	2020	January	204
17	2020	February	114
18	2020	December	169
19	2020	August	129