Skip to content

Commit 80813b1

Browse files
committed
Cleanup time-wise analysis
1 parent ddee7f6 commit 80813b1

File tree

10 files changed

+157
-88
lines changed

10 files changed

+157
-88
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ Tools configuration is stored in the `pyproject.toml` file.
189189
The application is not bound to any specific deployment environment; however,
190190
AWS is used for running the main instance. The setup for creating AWS
191191
infrastructure for the application using Terraform and Ansible deployment is
192-
placed int the `deployment` directory.
192+
placed int the `deployment` directory.
193193

194194
## License
195195

it_jobs_meta/__main__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ def main():
5454
data_pipeline.run()
5555

5656
case 'dashboard':
57-
5857
data_provider = MongodbDashboardDataProvider.from_config_file(parser.args['mongodb'])
5958
layout_parameters = LayoutTemplateParameters(navbar_label=parser.args['label'])
6059
app = DashboardApp(data_provider, layout_parameters)

it_jobs_meta/common/cli.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from pathlib import Path
66
from typing import Any
77

8-
from it_jobs_meta.dashboard.data_provision import DashboardDataProvider
98
from it_jobs_meta.data_pipeline.data_etl import EtlLoaderImpl
109
from it_jobs_meta.data_pipeline.data_lake import DataLakeImpl
1110

it_jobs_meta/dashboard/dashboard.py

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import dash_bootstrap_components as dbc
99
import pandas as pd
1010
from dash import Input, Output, callback, dcc, html
11-
from dash.development import base_component as DashComponent
11+
from dash.development.base_component import Component as DashComponent
1212
from flask_caching import Cache as AppCache
1313
from waitress import serve as wsgi_serve
1414

@@ -18,10 +18,8 @@
1818
from it_jobs_meta.dashboard.layout import (
1919
LayoutDynamicContent,
2020
LayoutTemplateParameters,
21-
make_categories_and_seniorities_graphs_layout,
21+
make_graphs_layout,
2222
make_layout,
23-
make_locations_and_remote_graphs_layout,
24-
make_salaries_breakdown_graphs_layout,
2523
)
2624

2725

@@ -62,18 +60,15 @@ def cache(self) -> AppCache:
6260
if self._cache is None:
6361
self._cache = AppCache(
6462
self.app.server,
65-
config={
66-
'CACHE_TYPE': 'SimpleCache',
67-
'CACHE_THRESHOLD': 2,
68-
},
63+
config={'CACHE_TYPE': 'SimpleCache', 'CACHE_THRESHOLD': 2},
6964
)
7065
return self._cache
7166

72-
def render_layout(self) -> DashComponent:
67+
def make_layout(self) -> DashComponent:
7368
logging.info('Rendering dashboard')
7469
logging.info('Attempting to retrieve data')
75-
metadata_df =self._data_provider.fetch_metadata()
76-
data_df = self._data_provider.fetch_data()
70+
metadata_df = self._data_provider.fetch_metadata()
71+
data_df = self._data_provider.fetch_data(metadata_df.iloc[-1]['batch_id'])
7772
logging.info('Data retrieval succeeded')
7873

7974
logging.info('Making layout')
@@ -87,24 +82,19 @@ def register_callbacks(self) -> DashComponent:
8782
@callback(
8883
Output('graphs', 'children'),
8984
Input('batch-slider', 'value'),
90-
prevent_initial_callback=True,
85+
prevent_initial_call=True,
9186
)
9287
def update_graphs_section(value):
9388
metadata_df = self._data_provider.fetch_metadata()
9489
data_df = self._data_provider.fetch_data(metadata_df.iloc[value]['batch_id'])
95-
dynamic_content = self.make_dynamic_content(metadata_df, data_df)
96-
graphs = dynamic_content.graphs
97-
return [
98-
make_categories_and_seniorities_graphs_layout(graphs),
99-
make_locations_and_remote_graphs_layout(graphs),
100-
make_salaries_breakdown_graphs_layout(graphs),
101-
]
90+
graphs = GraphRegistry.make(data_df)
91+
return make_graphs_layout(graphs)
10292

10393
def run(self, with_wsgi=False):
10494
try:
10595
render_layout_memoized = self.cache.memoize(
10696
timeout=int(self._cache_timeout.total_seconds())
107-
)(self.render_layout)
97+
)(self.make_layout)
10898
self.app.layout = render_layout_memoized
10999
self.register_callbacks()
110100

@@ -131,7 +121,7 @@ def main():
131121
setup_logging()
132122
layout_params = LayoutTemplateParameters()
133123
data_provider = MongodbDashboardDataProvider.from_config_file(Path('config/mongodb_config.yml'))
134-
app = DashboardApp(layout_params, data_provider, cache_timeout=timedelta(seconds=30))
124+
app = DashboardApp(data_provider, layout_params, cache_timeout=timedelta(seconds=5))
135125
app.run()
136126

137127

it_jobs_meta/dashboard/dashboard_components.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import numpy as np
88
import pandas as pd
9-
from dash import Input, Output, callback, dcc
9+
from dash import dcc
1010
from plotly import express as px
1111
from plotly import graph_objects as go
1212
from sklearn import preprocessing
@@ -111,8 +111,10 @@ def make_fig(cls, postings_df: pd.DataFrame) -> go.Figure:
111111
tech_most_freq_df = get_rows_with_n_most_frequent_vals_in_col(
112112
postings_df, 'technology', cls.N_MOST_FREQ
113113
)
114+
technology_counts = tech_most_freq_df['technology'].value_counts().reset_index()
115+
technology_counts.columns = ['technology', 'count']
114116

115-
fig = px.pie(tech_most_freq_df, names='technology', title=cls.TITLE)
117+
fig = px.pie(technology_counts, names='technology', values='count', title=cls.TITLE)
116118
fig.update_traces(textposition='inside')
117119
fig = center_title(fig)
118120
return fig
@@ -125,11 +127,15 @@ class CategoriesPieChart(GraphFigure):
125127

126128
@classmethod
127129
def make_fig(cls, postings_df: pd.DataFrame) -> go.Figure:
130+
# Get the most frequent categories and their counts
128131
cat_largest_df = get_rows_with_n_most_frequent_vals_in_col(
129132
postings_df, 'category', cls.N_MOST_FREQ
130133
)
134+
category_counts = cat_largest_df['category'].value_counts().reset_index()
135+
category_counts.columns = ['category', 'count']
131136

132-
fig = px.pie(cat_largest_df, names='category', title=cls.TITLE)
137+
# Create a pie chart with count values
138+
fig = px.pie(category_counts, names='category', values='count', title=cls.TITLE)
133139
fig.update_traces(textposition='inside')
134140
fig = center_title(fig)
135141
return fig
@@ -193,7 +199,9 @@ class SeniorityPieChart(GraphFigure):
193199
@classmethod
194200
def make_fig(cls, postings_df: pd.DataFrame) -> go.Figure:
195201
postings_df = postings_df.explode('seniority')
196-
fig = px.pie(postings_df, names='seniority', title=cls.TITLE)
202+
seniority_counts = postings_df['seniority'].value_counts().reset_index()
203+
seniority_counts.columns = ['seniority', 'count']
204+
fig = px.pie(seniority_counts, values='count', names='seniority', title=cls.TITLE)
197205
fig = center_title(fig)
198206
return fig
199207

@@ -252,6 +260,7 @@ def make_fig(cls, postings_df) -> go.Figure:
252260
)
253261
job_counts = postings_df.groupby('city')['_id'].count()
254262
salaries = postings_df.groupby('city')[['salary_mean', 'lat', 'lon']].mean()
263+
salaries['salary_mean'] = salaries['salary_mean'].round()
255264
cities_salaries = pd.concat([job_counts.rename('job_counts'), salaries], axis=1)
256265
cities_salaries = cities_salaries.reset_index()
257266

it_jobs_meta/dashboard/data_provision.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
"""Data provision and data source for the data dashboard."""
22

33
from abc import ABC, abstractmethod
4-
from enum import Enum, auto
54
from pathlib import Path
65
from typing import Self
76

87
import pandas as pd
98
from pymongo import MongoClient
9+
from pymongo.synchronous.database import Database
1010

1111
from it_jobs_meta.common.utils import load_yaml_as_dict
1212

@@ -34,19 +34,29 @@ def from_config_file(cls, config_file_path: Path) -> Self:
3434
return cls(**load_yaml_as_dict(config_file_path))
3535

3636
def fetch_metadata(self) -> pd.DataFrame:
37+
client: MongoClient
3738
with MongoClient(
3839
self.host, self.port, username=self.user_name, password=self.password
3940
) as client:
40-
db = client[self.db_name]
41-
return pd.json_normalize(db['metadata'].find())
41+
db: Database = client[self.db_name]
42+
df = pd.json_normalize(db['metadata'].find().sort('obtained_datetime'))
43+
if len(df) == 0:
44+
raise ValueError('Found no metadata, dashboard cannot be made')
45+
return df
4246

4347
def fetch_data(self, batch_id: str | None = None) -> pd.DataFrame:
48+
client: MongoClient
4449
with MongoClient(
4550
self.host, self.port, username=self.user_name, password=self.password
4651
) as client:
47-
db = client[self.db_name]
52+
db: Database = client[self.db_name]
4853
collection = db['postings']
54+
4955
if batch_id is not None:
50-
return pd.json_normalize(collection.find({'batch_id': batch_id}))
56+
df = pd.json_normalize(collection.find({'batch_id': batch_id}))
5157
else:
52-
return pd.json_normalize(collection.find())
58+
df = pd.json_normalize(collection.find())
59+
60+
if len(df) == 0:
61+
raise ValueError('Found no data, dashboard cannot be made')
62+
return df

it_jobs_meta/dashboard/layout.py

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44
from datetime import datetime
55

66
import dash_bootstrap_components as dbc
7-
from dash import Input, Output, callback, dcc, html
8-
from it_jobs_meta.dashboard.data_provision import DashboardDataProvider
9-
from dash.development import base_component as DashComponent
7+
from dash import dcc, html
8+
from dash.development.base_component import Component as DashComponent
109
from plotly import graph_objects as go
1110

1211
from it_jobs_meta.dashboard.dashboard_components import Graph
@@ -105,10 +104,7 @@ def make_about() -> DashComponent:
105104
return about
106105

107106

108-
def make_graphs_layout_header(obtained_datetime: datetime) -> DashComponent:
109-
datetime_str = [t.strftime('%-d %B %Y') for t in obtained_datetime]
110-
slider_marks = dict(enumerate(datetime_str))
111-
107+
def make_graphs_layout_header() -> DashComponent:
112108
div = html.Div(
113109
[
114110
html.H2('Data', id='data-container'),
@@ -117,13 +113,23 @@ def make_graphs_layout_header(obtained_datetime: datetime) -> DashComponent:
117113
is most convenient to explore more complex graphs on larger
118114
screens.'''
119115
),
120-
dcc.Slider(id="batch-slider", step=None, marks=slider_marks),
121116
],
122117
className='text-center mt-5',
123118
)
124119
return div
125120

126121

122+
def make_timeline_slider(obtained_datetime) -> DashComponent:
123+
slider_marks = dict(enumerate([t.strftime('%b %Y') for t in obtained_datetime]))
124+
return dcc.Slider(
125+
id='batch-slider',
126+
className='text-nowrap mx-3 my-5',
127+
step=None,
128+
marks=slider_marks,
129+
value=len(slider_marks) - 1,
130+
)
131+
132+
127133
def make_categories_and_seniorities_graphs_layout(graphs: dict[Graph, dcc.Graph]) -> DashComponent:
128134
div = html.Div(
129135
[
@@ -234,8 +240,7 @@ def make_salaries_breakdown_graphs_layout(graphs: dict[Graph, dcc.Graph]) -> Das
234240
[
235241
html.H3('Salaries breakdown', className='mt-4'),
236242
dbc.Card(
237-
graphs[Graph.TECHNOLOGIES_VIOLIN_PLOT],
238-
className='mt-4 p-1 border-0 rounded shadow'
243+
graphs[Graph.TECHNOLOGIES_VIOLIN_PLOT], className='mt-4 p-1 border-0 rounded shadow'
239244
),
240245
dbc.Card(
241246
graphs[Graph.CONTRACT_TYPE_VIOLIN_PLOT],
@@ -246,21 +251,22 @@ def make_salaries_breakdown_graphs_layout(graphs: dict[Graph, dcc.Graph]) -> Das
246251
return div
247252

248253

254+
def make_graphs_layout(graphs: dict[Graph, go.Figure]) -> list[DashComponent]:
255+
return [
256+
make_categories_and_seniorities_graphs_layout(graphs),
257+
make_locations_and_remote_graphs_layout(graphs),
258+
make_salaries_breakdown_graphs_layout(graphs),
259+
]
249260

250-
def make_graphs_layout(
261+
262+
def make_data_section_layout(
251263
obtained_datetime: datetime, graphs: dict[Graph, go.Figure]
252264
) -> DashComponent:
253265
data_section = html.Section(
254266
[
255-
make_graphs_layout_header(obtained_datetime),
256-
html.Section(
257-
[
258-
make_categories_and_seniorities_graphs_layout(graphs),
259-
make_locations_and_remote_graphs_layout(graphs),
260-
make_salaries_breakdown_graphs_layout(graphs),
261-
],
262-
id='graphs',
263-
),
267+
make_graphs_layout_header(),
268+
make_timeline_slider(obtained_datetime),
269+
html.Div(make_graphs_layout(graphs), id='graphs'),
264270
],
265271
id='data-section',
266272
)
@@ -295,7 +301,7 @@ def make_layout(
295301
[
296302
make_jumbotron(),
297303
make_about(),
298-
make_graphs_layout(
304+
make_data_section_layout(
299305
dynamic_content.obtained_datetime,
300306
dynamic_content.graphs,
301307
),

0 commit comments

Comments
 (0)