From 5c3b14ab23343ac02bdd400ebc8ef5b663f6492d Mon Sep 17 00:00:00 2001 From: "K. Shankari" Date: Thu, 25 Apr 2019 17:03:09 -0700 Subject: [PATCH] Change the webserver to initalize the database lazily I can technically merge this to master, although I am not sure whether I should :) Testing done: Started webserver. No message about loading database. ``` (emission) C02KT61MFFT0:e-mission-server shankari$ ./e-mission-py.bash emission/net/api/cfc_webapp.py analysis.debug.conf.json not configured, falling back to sample, default configuration Finished configuring logging for Replaced json_dumps in plugin with the one from bson Changing bt.json_loads from at 0x10deb3d90> to Running with HTTPS turned OFF - use a reverse proxy on production Bottle v0.13-dev server starting up (using CherootServer())... Listening on http://0.0.0.0:8080/ Hit Ctrl-C to quit. ``` Accessed webserver through a browser. We connect to the database only then. ``` START 2019-04-25 16:54:45.175715 GET / END 2019-04-25 16:54:45.181657 GET / 0.005769968032836914 Connecting to database URL localhost ``` Artificially introduced an error (changed the `if (_current_db is None)` to `if (_current_db is not None)`. Database starts up fine but we get an (as expected) error while serving files because we are unable to contact the database. ``` Running with HTTPS turned OFF - use a reverse proxy on production Bottle v0.13-dev server starting up (using CherootServer())... Listening on http://0.0.0.0:8080/ Hit Ctrl-C to quit. START 2019-04-25 17:02:11.109216 GET / END 2019-04-25 17:02:11.120386 GET / 0.011026859283447266 Traceback (most recent call last): File "/Users/shankari/e-mission/e-mission-server/emission/net/api/bottle.py", line 1012, in _handle self.trigger_hook('after_request') File "/Users/shankari/e-mission/e-mission-server/emission/net/api/bottle.py", line 706, in trigger_hook return [hook(*args, **kwargs) for hook in self._hooks[__name][:]] File "/Users/shankari/e-mission/e-mission-server/emission/net/api/bottle.py", line 706, in return [hook(*args, **kwargs) for hook in self._hooks[__name][:]] File "emission/net/api/cfc_webapp.py", line 425, in after_request msTimeNow, duration) File "/Users/shankari/e-mission/e-mission-server/emission/net/api/stats.py", line 13, in store_server_api_time esds.store_server_api_time(user_id, call, ts, reading) File "/Users/shankari/e-mission/e-mission-server/emission/storage/decorations/stats_queries.py", line 20, in store_server_api_time store_stats_entry(user_id, "stats/server_api_time", call, ts, reading) File "/Users/shankari/e-mission/e-mission-server/emission/storage/decorations/stats_queries.py", line 47, in store_stats_entry return esta.TimeSeries.get_time_series(user_id).insert(new_entry) File "/Users/shankari/e-mission/e-mission-server/emission/storage/timeseries/abstract_timeseries.py", line 20, in get_time_series return bits.BuiltinTimeSeries(user_id) File "/Users/shankari/e-mission/e-mission-server/emission/storage/timeseries/builtin_timeseries.py", line 37, in __init__ self.timeseries_db = get_ts_enum_map()[esta.EntryType.DATA_TYPE] File "/Users/shankari/e-mission/e-mission-server/emission/storage/timeseries/builtin_timeseries.py", line 24, in get_ts_enum_map esta.EntryType.DATA_TYPE: edb.get_timeseries_db(), File "/Users/shankari/e-mission/e-mission-server/emission/core/get_database.py", line 160, in get_timeseries_db TimeSeries = _get_current_db().Stage_timeseries AttributeError: 'NoneType' object has no attribute 'Stage_timeseries' ``` --- emission/analysis/section_features.py | 6 +----- emission/core/get_database.py | 10 ++++++---- .../storage/timeseries/builtin_timeseries.py | 20 ++++++++++++------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/emission/analysis/section_features.py b/emission/analysis/section_features.py index a62f81baa..b41ff11b0 100644 --- a/emission/analysis/section_features.py +++ b/emission/analysis/section_features.py @@ -14,7 +14,7 @@ from sklearn.cluster import DBSCAN # Our imports -from emission.core.get_database import get_section_db, get_mode_db, get_routeCluster_db,get_transit_db +import emission.core.get_database as edb from emission.core.common import calDistance, Include_place_2 from emission.analysis.modelling.tour_model.trajectory_matching.route_matching import getRoute,fullMatchDistance,matchTransitRoutes,matchTransitStops import emission.storage.timeseries.abstract_timeseries as esta @@ -25,10 +25,6 @@ from uuid import UUID -Sections = get_section_db() -Modes = get_mode_db() - - # The speed is in m/s def calOverallSectionSpeed(section): distanceDelta = section.distance diff --git a/emission/core/get_database.py b/emission/core/get_database.py index 35bf76e4f..fc14d1a75 100644 --- a/emission/core/get_database.py +++ b/emission/core/get_database.py @@ -18,12 +18,14 @@ config_data = json.load(config_file) url = config_data["timeseries"]["url"] - -print("Connecting to database URL "+url) -_current_db = MongoClient(url).Stage_database -#config_file.close() +_current_db = None def _get_current_db(): + global _current_db + if (_current_db is None): + print("Connecting to database URL "+url) + _current_db = MongoClient(url).Stage_database + #config_file.close() return _current_db def get_mode_db(): diff --git a/emission/storage/timeseries/builtin_timeseries.py b/emission/storage/timeseries/builtin_timeseries.py index fecd9c95d..220421ec3 100644 --- a/emission/storage/timeseries/builtin_timeseries.py +++ b/emission/storage/timeseries/builtin_timeseries.py @@ -15,10 +15,16 @@ import emission.core.wrapper.entry as ecwe -ts_enum_map = { - esta.EntryType.DATA_TYPE: edb.get_timeseries_db(), - esta.EntryType.ANALYSIS_TYPE: edb.get_analysis_timeseries_db() -} +ts_enum_map = None + +def get_ts_enum_map(): + global ts_enum_map + if (ts_enum_map is None): + ts_enum_map = { + esta.EntryType.DATA_TYPE: edb.get_timeseries_db(), + esta.EntryType.ANALYSIS_TYPE: edb.get_analysis_timeseries_db() + } + return ts_enum_map INVALID_QUERY = {'metadata.key': 'invalid'} @@ -28,8 +34,8 @@ def __init__(self, user_id): self.key_query = lambda key: {"metadata.key": key} self.type_query = lambda entry_type: {"metadata.type": entry_type} self.user_query = {"user_id": self.user_id} # UUID is mandatory for this version - self.timeseries_db = ts_enum_map[esta.EntryType.DATA_TYPE] - self.analysis_timeseries_db = ts_enum_map[esta.EntryType.ANALYSIS_TYPE] + self.timeseries_db = get_ts_enum_map()[esta.EntryType.DATA_TYPE] + self.analysis_timeseries_db = get_ts_enum_map()[esta.EntryType.ANALYSIS_TYPE] # Design question: Should the stats be a separate database, or should it be part # of the timeseries database? Technically, it should be part of the timeseries # database. However, I am concerned about the performance of the database @@ -322,7 +328,7 @@ def bulk_insert(self, entries, data_type = None): else: multi_result = None try: - multi_result = ts_enum_map[data_type].insert_many(entries, ordered=False) + multi_result = get_ts_enum_map()[data_type].insert_many(entries, ordered=False) logging.debug("Returning multi_result.inserted_ids = %s... of length %d" % (multi_result.inserted_ids[:10], len(multi_result.inserted_ids))) return multi_result