In [8]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('netflix_titles.csv')
In [9]:
# Q1: How many titles are there per content type (Movie vs TV Show)?

df.groupby('type')['title'].count()
Out[9]:
type
Movie      6131
TV Show    2676
Name: title, dtype: int64
In [10]:
# Q2: What is the number of titles released each year?
df.groupby('release_year')['title'].count().sort_index(ascending=False).head(10)
Out[10]:
release_year
2021     592
2020     953
2019    1030
2018    1147
2017    1032
2016     902
2015     560
2014     352
2013     288
2012     237
Name: title, dtype: int64
In [11]:
# Q3: Which countries produce the most content on Netflix?

df.groupby('country')['title'].count().sort_values(ascending=False).head(10)
Out[11]:
country
United States     2818
India              972
United Kingdom     419
Japan              245
South Korea        199
Canada             181
Spain              145
France             124
Mexico             110
Egypt              106
Name: title, dtype: int64
In [12]:
# Q4: What is the number of titles per rating?

df.groupby('rating')['title'].count().sort_values(ascending=False)
Out[12]:
rating
TV-MA       3207
TV-14       2160
TV-PG        863
R            799
PG-13        490
TV-Y7        334
TV-Y         307
PG           287
TV-G         220
NR            80
G             41
TV-Y7-FV       6
UR             3
NC-17          3
74 min         1
84 min         1
66 min         1
Name: title, dtype: int64
In [ ]:
# Q5: How many titles were added to Netflix each month of the year?

df['date_added'] = pd.to_datetime(df['date_added'], errors='coerce')
df['year_added'] = df['date_added'].dt.year.astype('Int64')
df['month_added'] = df['date_added'].dt.month_name()
df.groupby(['year_added', 'month_added'])['title'].count().sort_index(ascending=False).reset_index().head(20)
Out[ ]:
year_added month_added title
0 2021 September 183
1 2021 May 132
2 2021 March 112
3 2021 June 207
4 2021 July 257
5 2021 January 132
6 2021 February 109
7 2021 August 178
8 2021 April 188
9 2020 September 168
10 2020 October 167
11 2020 November 154
12 2020 May 157
13 2020 March 137
14 2020 June 156
15 2020 July 146
16 2020 January 204
17 2020 February 114
18 2020 December 169
19 2020 August 129
In [30]:
# Q6: Which directors have the most titles on Netflix?

df.groupby('director')['title'].count().sort_values(ascending=False).dropna().head(10)
Out[30]:
director
Rajiv Chilaka             19
Raúl Campos, Jan Suter    18
Suhas Kadav               16
Marcus Raboy              16
Jay Karas                 14
Cathy Garcia-Molina       13
Jay Chapman               12
Youssef Chahine           12
Martin Scorsese           12
Steven Spielberg          11
Name: title, dtype: int64
In [31]:
# Q7: What are the top 5 most common genres or categories?

df_exploded = df.assign(genres=df['listed_in'].str.split(', ')).explode('genres')
df_exploded['genres'].value_counts().head(5)
Out[31]:
genres
International Movies      2752
Dramas                    2427
Comedies                  1674
International TV Shows    1351
Documentaries              869
Name: count, dtype: int64