-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnetflix_solutions.sql
222 lines (152 loc) · 5.03 KB
/
netflix_solutions.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
-- Netflix Project
-- Importing the Data
Drop table if exists netflix;
create table netflix(
show_id varchar(10),
type varchar(10),
title varchar(150),
director varchar(220),
casts varchar(1000),
country varchar(150),
date_added varchar(150),
release_year int,
rating varchar(10),
duration varchar(20),
listed_in varchar(100),
description varchar(270)
)
-- To view the imported data
select *
from netflix;
-- Need to check total length of the table
select count(*) as total_content
from netflix;
-- To get the type of shows
select distinct type
from netflix;
-- Business Problems
-- 1. Count the number of Movies and number of TV shows
select type, count(*) as shows_count
from netflix
group by type;
-- 2. Find the most common ratings for movies and TV Shows
select type,rating, count(*)
from netflix
group by 1,2
order by 1, 3 desc;
-- Same using window function and subquery
select t1.type, t1.rating
from (
select type,rating,count(*),
rank() over (partition by type order by count(*) desc) as ranking
from netflix
group by 1,2) as t1
where t1.ranking =1;
-- 3. List all the movies released in the specific year like 2020
select n.*
from netflix n
where release_year = 2020 and type = 'Movie';
-- 4.Find the top 5 countries with the most content on Netflix
-- Some rows having multiple entires like (india, canada,us) in same row
-- For this we using an uncut and unnest function --> (unnest(string_to_array(country, ',')))
select unnest(string_to_array(country, ',')) as country_name, count(show_id) as total_count
from netflix
group by 1
order by 2 desc
limit 5;
-- 5. Identify the longest movie
select movie_list.type, movie_list.duration, movie_list.title
from (
select type, duration, title,
rank () over(order by duration desc) total_duration
from netflix
where type = 'Movie' and duration <> '') as movie_list
where movie_list.total_duration = 1
-- 6. Find the Content released in the last 5-years
-- Here need to convert the date from varchar to string
select *
from netflix
where to_date(date_added, 'month DD,YYY') >= current_date - interval '5 years'
-- 7. Find all the Movie and Series directed by Rajiv Chilaka
select type, title
from netflix
where director = 'Rajiv Chilaka'
-- If there are more then 1-direcor then we need to slipt the rows which have more than 1 name
select new_one.director_name, new_one.type, new_one.title
from
(select unnest(string_to_array(director, ',')) as director_name, type, title
from netflix) as new_one
where new_one.director_name = 'Rajiv Chilaka';
-- or simple
select *
from netflix
where director Ilike '%Rajiv Chilaka%' --Here Ilike will check the lower case also
-- 8. List the TV shows which have seasons >5
select *
from netflix
where duration >= '5 Seasons' and type = 'TV Show';
-- In text we not use the '>= are <, <=, >' it may give false result
-- Hence using an split function
select kt.*
from
(select *, split_part(duration, ' ',1):: numeric as seasons
from netflix
where type = 'TV Show') as kt
where kt.seasons > 5;
--or
select *
from netflix
where type = 'TV Show' and split_part(duration, ' ',1):: numeric > 5;
-- 9 Count the number of content items in each genre
select gn.genre, count(gn.title)
from(select *, unnest(string_to_array(listed_in, ',')) as genre
from netflix) as gn
group by 1
order by 2 desc;
/* 10. Find each year and the average numbers of content release only in India on netflix.
-- return top 5 year with highest avg content release ! */
SELECT Extract(Year from to_date(date_added, 'Month DD, YYYY')) as years, count(*) as yearly_content,
Round(count(*)::numeric / (select count(*) from netflix where country = 'India')*100,2) as Avg_content
FROM netflix
WHERE country = 'India'
group by 1
order by avg_content desc
limit 5;
-- 11. List all the movies that are Documentry
select *
from netflix
where type = 'Movie' and listed_in Ilike '%Documentaries%';
-- 12. Find all the content without director
select *
from netflix
where director is null;
-- 13. Find How Many movie actor "Salman Khan" appeared in last "10-years"
select *
from netflix
where casts Ilike '%Salman Khan%' and release_year > extract(year from current_date) - 10;
-- Or static (Need to enter the value Manually)
select *
from netflix
where casts Ilike '%Salman Khan%' and release_year between 2014 and 2024
-- 14. Find top 10 actors who appeared in the highest number of movies produced in India
select count(*), jk.actors
from(select *, unnest(string_to_array(casts, ',')) as actors
from netflix
where country Ilike '%India%' and type = 'Movie') as jk
group by 2
order by 1 desc
limit 10
/* 15 -
Categorize the content based on the presence of the keywords 'kill' and 'violence' in the description field.
Label content containing these keywords as 'Bad' and all other content as 'Good'.
Count how many items fall into each category. */
select cont.category, count(cont.description) as Total_count
from
(select *,
case
when
description Ilike '%kill%' or description Ilike '%violence%' then 'Bad Content'
else 'Good Content'
end category
from netflix) as cont
group by 1