-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathData Exploration.sql
145 lines (113 loc) · 4.79 KB
/
Data Exploration.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
/*
Covid-19 Data exploration
Skills used: Joins, CTE's, Temp Tables, Windows Functions, Aggregate Functions, Creating Views, Converting Data Types
*/
Select *
From MiniProject..CovidDeaths
where continent is not null
order by 3,4
Select *
from MiniProject..CovidVaccinations
where continent is not null
order by 3,4
Use MiniProject
--EXEC sp_columns CovidDeaths
Select location, date, total_cases, new_cases, total_deaths, population
from CovidDeaths order by 1,2
-- Total cases vs Total deaths
Select location, date, total_cases, total_deaths, (total_deaths/total_cases)*100 as DeathPercentage
from CovidDeaths
where location = 'India'
order by 1,2
-- Total cases vs population
Select location, date, total_cases, population, (total_cases/population)*100 as PeopleInfected
from CovidDeaths
where location like '%states%'
order by 1,2
Select location, date, MAX(total_cases) as HighestCases, population, MAX(total_cases/population)*100 as PeopleInfected
from CovidDeaths
--where location like '%states%'
group by location, population, date
order by PeopleInfected desc
-- Highest amount of people infected vs the Population
Select location, population, MAX(total_cases) as HighestCases, MAX((total_cases/population))*100 as PeopleInfected
from CovidDeaths
--where continent is null
group by location, population
order by PeopleInfected desc
--Now the deaths. Countries with highest death per populations
Select continent, population, MAX(cast(total_deaths as int)) as HighestDeaths, MAX((total_deaths/population))*100 as DeathsPerPopulation
from CovidDeaths
where continent is not null
group by continent, population
order by DeathsPerPopulation desc
--By Continents now
Select continent, MAX(cast(total_deaths as int)) as HighestDeaths, MAX((total_deaths/population))*100 as DeathsPerPopulation
from CovidDeaths
where continent is not null
group by continent
order by DeathsPerPopulation desc
--Numbers for the whole world
Select date, SUM(cast(new_cases as int)) as TotalCases, SUM(cast(new_deaths as int)) as HighestDeaths, SUM(cast(total_deaths as int))/SUM(total_cases)*100 as DeathsPerPopulation
from CovidDeaths
where continent is not null
group by date
order by 1,2
--Numbers for the whole world till now
Select SUM(cast(new_cases as int)) as TotalCases, SUM(cast(new_deaths as int)) as HighestDeaths, SUM(cast(total_deaths as int))/SUM(total_cases)*100 as DeathsPerPopulation
from CovidDeaths
where continent is not null
--group by date
order by 1,2
--Covid Vaccination
Select * from CovidDeaths
Select * from CovidDeaths deaths join CovidVaccinations vacc on deaths.location = vacc.location and deaths.date = vacc.date
-- Now we look at population vs the total vaccination
Select deaths.continent, deaths.location, deaths.continent, deaths.date, deaths.population, vacc.new_vaccinations
from CovidDeaths deaths
join CovidVaccinations vacc
on deaths.location = vacc.location
and deaths.date = vacc.date
where deaths.continent is not null
order by 2,3
-- Now we look at population vs the total vaccination
Select deaths.continent, deaths.location, deaths.date, deaths.population, vacc.new_vaccinations ,
SUM(cast(vacc.new_vaccinations as bigint)) over (partition by deaths.location order by deaths.location, deaths.date ) as RollingPeopleVaccinated
from CovidDeaths deaths
join CovidVaccinations vacc
on deaths.location = vacc.location
and deaths.date = vacc.date
where deaths.continent is not null
order by 2,3
--Creating a table for rolling people vaccinated
Drop table if exists #PercentPeopleVaccinated
Create table #PercentPeopleVaccinated
(
continent nvarchar(255),
location nvarchar(255),
date datetime,
population numeric,
new_vaccinations numeric,
RollingPeopleVaccinated numeric
)
Insert into #PercentPeopleVaccinated
Select deaths.continent, deaths.location, deaths.date, deaths.population, vacc.new_vaccinations ,
SUM(cast(vacc.new_vaccinations as bigint)) over (partition by deaths.location order by deaths.location, deaths.date ) as RollingPeopleVaccinated
from CovidDeaths deaths
join CovidVaccinations vacc
on deaths.location = vacc.location
and deaths.date = vacc.date
where deaths.continent is not null
--order by 2,3
Select *, (RollingPeopleVaccinated/population)*100
from #PercentPeopleVaccinated
--Creating view for later
Create View PercentPeopleVaccinated as
Select deaths.continent, deaths.location, deaths.date, deaths.population, vacc.new_vaccinations ,
SUM(cast(vacc.new_vaccinations as bigint)) over (partition by deaths.location order by deaths.location, deaths.date ) as RollingPeopleVaccinated
from CovidDeaths deaths
join CovidVaccinations vacc
on deaths.location = vacc.location
and deaths.date = vacc.date
where deaths.continent is not null
--order by 2,3