diff --git a/.gitignore b/.gitignore index 0a8db44..cd4a53c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +tmp + # Python compiled files # ######################### *.pyc diff --git a/dev/Development - weatherForecast.ipynb b/dev/Development - weatherForecast.ipynb new file mode 100644 index 0000000..a10716c --- /dev/null +++ b/dev/Development - weatherForecast.ipynb @@ -0,0 +1,585 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1ebd07f0", + "metadata": {}, + "source": [ + "This is the development notebook for the weather forecast function." + ] + }, + { + "cell_type": "markdown", + "id": "56958d55", + "metadata": {}, + "source": [ + "### weatherForecast.py" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "08949821", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " temp_air ghi dni dhi\n", + "2024-04-01 14:00:00 17.1 835.9 793.8 182.6\n", + "2024-04-01 15:00:00 17.0 761.7 813.0 147.9\n", + "2024-04-01 16:00:00 16.0 626.5 801.9 113.8\n", + "2024-04-01 17:00:00 15.0 446.0 745.3 85.6\n", + "2024-04-01 18:00:00 14.2 236.8 550.7 72.3\n", + "2024-04-01 19:00:00 12.7 38.1 50.7 33.2\n", + "2024-04-01 20:00:00 11.8 0.0 0.0 0.0\n", + "2024-04-01 21:00:00 11.2 0.0 0.0 0.0\n", + "2024-04-01 22:00:00 10.9 0.0 0.0 0.0\n", + "2024-04-01 23:00:00 11.0 0.0 0.0 0.0\n", + "2024-04-02 00:00:00 11.4 0.0 0.0 0.0\n", + "2024-04-02 01:00:00 11.2 0.0 0.0 0.0\n", + "2024-04-02 02:00:00 10.5 0.0 0.0 0.0\n", + "2024-04-02 03:00:00 9.9 0.0 0.0 0.0\n", + "2024-04-02 04:00:00 9.2 0.0 0.0 0.0\n", + "2024-04-02 05:00:00 9.0 0.0 0.0 0.0\n" + ] + } + ], + "source": [ + "# Distributed Optimal and Predictive Energy Resources (DOPER) Copyright (c) 2019\n", + "# The Regents of the University of California, through Lawrence Berkeley\n", + "# National Laboratory (subject to receipt of any required approvals\n", + "# from the U.S. Dept. of Energy). All rights reserved.\n", + "\n", + "\"\"\"\"Distributed Optimal and Predictive Energy Resources\n", + "Weather forecast module.\n", + "\"\"\"\n", + "\n", + "import io\n", + "import re\n", + "import os\n", + "import sys\n", + "import time\n", + "import json\n", + "import pygrib\n", + "import requests\n", + "import warnings\n", + "import traceback\n", + "import numpy as np\n", + "import pandas as pd\n", + "import urllib.request\n", + "import datetime as dtm\n", + "\n", + "warnings.filterwarnings('ignore', message='The forecast module algorithms and features are highly experimental.')\n", + "warnings.filterwarnings('ignore', message=\"The HRRR class was deprecated in pvlib 0.9.1 and will be removed in a future release.\")\n", + "\n", + "try:\n", + " root = os.path.dirname(os.path.abspath(__file__))\n", + " from .resources.pvlib.forecast import HRRR\n", + "except:\n", + " root = os.getcwd()\n", + " sys.path.append(os.path.join(root, '..', 'doper'))\n", + " from resources.pvlib.forecast import HRRR\n", + "\n", + "from fmlc.baseclasses import eFMU\n", + "\n", + "datetime_mask = \"20[0-9][0-9]-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]\"\n", + "\n", + "FC_TO_PVLIV_MAP = {\n", + " '9:Total Cloud Cover:% (instant):lambert:atmosphere:level 0 -': 'Total_cloud_cover_entire_atmosphere',\n", + " '7:2 metre temperature:K (instant):lambert:heightAboveGround:level 2 m': 'Temperature_height_above_ground',\n", + " 'wind_speed_u': 0,\n", + " 'wind_speed_v': 0,\n", + " 'Low_cloud_cover_low_cloud': 0,\n", + " 'Medium_cloud_cover_middle_cloud': 0,\n", + " 'High_cloud_cover_high_cloud': 0,\n", + " 'Pressure_surface': 0,\n", + " 'Wind_speed_gust_surface': 0\n", + "}\n", + "\n", + "def download_latest_hrrr(lat, lon, dt, hour, tmp_dir='',\n", + " debug=False, store_file=False):\n", + " '''\n", + " Documentation and API: https://nomads.ncep.noaa.gov/gribfilter.php?ds=hrrr_2d\n", + " Full HRRR files: https://nomads.ncep.noaa.gov/pub/data/nccf/com/hrrr/prod/\n", + " Historic HRRR files: https://www.ncei.noaa.gov/data/rapid-refresh/access/\n", + " '''\n", + " \n", + " # make url for download (from API)\n", + " url = f'https://nomads.ncep.noaa.gov/cgi-bin/filter_hrrr_2d.pl?dir=%2F'\n", + " fname = f'hrrr.t{dt.strftime(\"%H\")}z.wrfsfcf{hour:02}.grib2'\n", + " url += f'hrrr.{dt.strftime(\"%Y%m%d\")}%2Fconus&file={fname}'\n", + " url += f'&var_TCDC=on&var_TMP=on&all_lev=on&subregion=&'\n", + " # url += f'&var_TCDC=on&var_TMP=on&lev_2_m_above_ground=on&subregion=&'\n", + " url += f'toplat={int(lat+1)}&leftlon={int(lon-1)}&rightlon={int(lon+1)}&bottomlat={int(lat-1)}'\n", + " \n", + " # download forecast\n", + " fname = os.path.join(tmp_dir, fname)\n", + " try:\n", + " if store_file:\n", + " urllib.request.urlretrieve(url, fname)\n", + " else:\n", + " fname = requests.get(url).content\n", + " return fname\n", + " except Exception as e:\n", + " if debug:\n", + " print(url)\n", + " print(e)\n", + " return None\n", + "\n", + "def get_nearest_data(lat, lon, fname):\n", + " \n", + " # open file\n", + " grib = pygrib.open(fname)\n", + " \n", + " # get grib locations\n", + " lat_grib = grib[1].latlons()[0]\n", + " lon_grib = grib[1].latlons()[1]\n", + "\n", + " # calculate distances\n", + " abslat = np.abs(lat_grib - lat)\n", + " abslon = np.abs(lon_grib - lon)\n", + " c = np.maximum(abslon, abslat)\n", + "\n", + " # select nearest\n", + " x, y = np.where(c == np.min(c))\n", + " x, y = x[0], y[0]\n", + " \n", + " # get data\n", + " res = {'lat': lat_grib[x, y],\n", + " 'lon': lon_grib[x, y],\n", + " 'x': x, 'y': y}\n", + " for g in grib:\n", + " name = str(g).split(':fcst time')[0]\n", + " res[name] = g.values[x, y]\n", + "\n", + " return res\n", + "\n", + "def get_hrrr_forecast(lat, lon, dt, tz='America/Los_Angeles', max_hour=16,\n", + " tmp_dir='', debug=False, store_file=False):\n", + "\n", + " # convert timestep to hourly\n", + " dt = dt.replace(minute=0, second=0, microsecond=0, nanosecond=0).tz_localize(None)\n", + "\n", + " # bug in pygrib 2.1.5 does not allow object as input\n", + " store_file = True\n", + " \n", + " res = {}\n", + " for h in range(max_hour+1):\n", + " st = time.time()\n", + "\n", + " # convert local time to utc\n", + " dt_utc = dt.tz_localize(tz).tz_convert('UTC').tz_localize(None)\n", + "\n", + " # get latest hrrr file\n", + " fcObj = download_latest_hrrr(lat, lon, dt_utc, h,\n", + " tmp_dir=tmp_dir,\n", + " debug=debug,\n", + " store_file=store_file)\n", + " \n", + " if fcObj:\n", + " # make readable (pygrib 2.1.5 should support but doesn't)\n", + " if not store_file:\n", + " binary_io = io.BytesIO(fcObj)\n", + " buffer_io = io.BufferedReader(binary_io)\n", + " \n", + " # determine nearest gridpoint\n", + " r = get_nearest_data(lat, lon, fcObj)\n", + " else:\n", + " # no forecast received\n", + " r = {}\n", + "\n", + " r['duration'] = time.time()-st\n", + "\n", + " # add to output\n", + " res[h] = r\n", + " \n", + " # make dataframe\n", + " res = pd.DataFrame(res).transpose()\n", + " res.index = [pd.to_datetime(dt)+pd.DateOffset(hours=ix) for ix in res.index]\n", + " \n", + " return res\n", + "\n", + "class weather_forecaster(eFMU):\n", + " '''\n", + " This class gathers the weather forecasts at one station on a specified frequency. It uses pvlib to\n", + " reference NOAA's HRRR forecast model, and returns the temperature and solar irradiation values. It\n", + " requires a configuration file that specifies the station and sampling frequency.\n", + " '''\n", + " \n", + " def __init__(self):\n", + " '''\n", + " Reads the config information and initializes the forecaster.\n", + " \n", + " Input\n", + " -----\n", + " config (dict): The configuration file. Example fiven in \"get_default_config\".\n", + " '''\n", + " self.input = {'input-data': None, 'config': None, 'timeout': None}\n", + " self.output = {'output-data':None, 'duration':None}\n", + " \n", + " self.forecaster = None\n", + " \n", + " def check_data(self, data, ranges):\n", + " for k, r in ranges.items():\n", + " if k in data.columns:\n", + " if not (data[k].min() >= r[0]):\n", + " self.msg += f'ERROR: Entry \"{k}\" is out of range {data[k].min()} >= {r[0]}.\\n'\n", + " if not (data[k].max() <= r[1]):\n", + " self.msg += f'ERROR: Entry \"{k}\" is out of range {data[k].max()} <= {r[1]}.\\n'\n", + " else:\n", + " self.msg += f'ERROR: Entry \"{k}\" is missing.\\n'\n", + "\n", + " def compute(self, now=None):\n", + " '''\n", + " Gathers forecasts for the specified station. Returns either the forecast and error messages.\n", + " \n", + " Input\n", + " -----\n", + " now (str): String representation of the local time the forecast is requested for. None (defualt)\n", + " falls back to using the user's current clock time.\n", + " \n", + " Return\n", + " ------\n", + " data (pd.DataFrame): The forecast as data frame with date time as index. Empty data frame on error.\n", + " msg (str): Error messages or empty string when no errors.\n", + " '''\n", + " \n", + " self.msg = ''\n", + " st = time.time()\n", + " \n", + " # initialize\n", + " self.config = self.input['config']\n", + "\n", + " # prepare inputs\n", + " tz = self.config['tz']\n", + " if now == None:\n", + " now = pd.to_datetime(time.time(), unit='s')\n", + " now = now.replace(minute=0, second=0, microsecond=0, nanosecond=0)\n", + " now = now.tz_localize('UTC').tz_convert(tz)\n", + " start_time = pd.to_datetime(now)\n", + " \n", + " # FIXME\n", + " start_time = start_time - dtm.timedelta(hours=1)\n", + " # print('WARNING: Time in local time:', now, 'NOAA is 1h behind (DST?)', start_time)\n", + "\n", + " final_time = start_time + pd.Timedelta(hours=self.config['horizon'])\n", + " \n", + " # get forecast\n", + " self.forecast = pd.DataFrame()\n", + " try:\n", + " if self.config['source'] == 'noaa_hrrr':\n", + " if not self.forecaster:\n", + " \n", + " # setup forecaster\n", + " self.forecaster = get_hrrr_forecast\n", + "\n", + " # setup pvlib processor\n", + " self.pvlib_processor = HRRR()\n", + " self.pvlib_processor.set_location(start_time.tz,\n", + " self.config['lat'],\n", + " self.config['lon'])\n", + "\n", + " # tmp dir\n", + " if not os.path.exists(self.config['tmp_dir']):\n", + " os.mkdir(self.config['tmp_dir'])\n", + "\n", + " # get forecast\n", + " self.forecast = self.forecaster(self.config['lat'],\n", + " self.config['lon'],\n", + " start_time,\n", + " tz=tz,\n", + " max_hour=self.config['horizon'],\n", + " tmp_dir=self.config['tmp_dir'],\n", + " debug=self.config['debug'])\n", + " \n", + " elif self.config['source'] == 'json':\n", + "\n", + " # read forecast form json\n", + " self.forecast = pd.read_json(io.StringIO(self.input['input-data'])).sort_index()\n", + " \n", + " else:\n", + "\n", + " # method not implemented\n", + " self.msg += f'ERROR: Source option \"{self.config[\"source\"]}\" not valid.\\n'\n", + " \n", + " # check index\n", + " for i, ix in enumerate(self.forecast.index):\n", + " if not bool(re.match(datetime_mask, str(ix))):\n", + " self.msg += f'ERROR: External forecast date format incorrect \"{ix}\" at position {i}.\\n'\n", + " \n", + " # check and convert to numeric\n", + " for c in self.forecast.columns:\n", + " self.forecast[c] = pd.to_numeric(self.forecast[c], errors='coerce')\n", + " if self.forecast.isnull().values.any():\n", + " self.msg += f'ERROR: NaNs in forecast at: {self.forecast.index[self.forecast.isnull().any(axis=1)]}.\\n'\n", + "\n", + " # check index\n", + " if self.msg == '':\n", + " self.forecast.index = pd.to_datetime(self.forecast.index, format='%Y-%m-%d %H:%M:%S')\n", + " if not len(self.forecast) == self.config['horizon']+1:\n", + " self.msg += f'ERROR: Forecast length {len(self.forecast)} is not horizon {self.config[\"horizon\"]+1}.\\n'\n", + " if not self.forecast.index[0] == start_time.tz_localize(None):\n", + " self.msg += f'ERROR: Forecast start \"{self.forecast.index[0]}\" not ' \\\n", + " + f'start_time \"{start_time.tz_localize(None)}\".\\n'\n", + " if not self.forecast.index[-1] == final_time.tz_localize(None):\n", + " self.msg += f'ERROR: Forecast final \"{self.forecast.index[-1]}\" not ' \\\n", + " + f'final_time \"{final_time.tz_localize(None)}\".\\n'\n", + " if self.forecast.resample('1h').asfreq().isnull().values.any():\n", + " self.msg += f'ERROR: Missing timestamp in forecast.\\n'\n", + " \n", + " except Exception as e:\n", + " self.msg += f'ERROR: {e}\\n\\n{traceback.format_exc()}\\n'\n", + " self.forecast = pd.DataFrame()\n", + " \n", + " # process data\n", + " self.data = pd.DataFrame()\n", + " if self.msg == '':\n", + " try:\n", + " # check forecast\n", + " self.check_data(self.forecast, self.config['forecast_cols'])\n", + "\n", + " # process\n", + " if self.msg == '':\n", + " # direct pvlib form forecast\n", + " direct = {k: v for k, v in FC_TO_PVLIV_MAP.items() if isinstance(v, str)}\n", + " self.pvlib_fc = self.forecast[direct.keys()].copy(deep=True).rename(columns=direct)\n", + " # computed from forecast\n", + " computed = {k: v for k, v in FC_TO_PVLIV_MAP.items() if not isinstance(v, str)}\n", + " for k, v in computed.items():\n", + " self.pvlib_fc[k] = v\n", + " self.pvlib_fc.index = self.pvlib_fc.index.tz_localize(tz)\n", + " # duplicate last beacuse of bug in pvlib\n", + " self.pvlib_fc.loc[self.pvlib_fc.index[-1]+pd.DateOffset(hours=1), :] = self.pvlib_fc.iloc[-1]\n", + " self.data = self.pvlib_processor.process_data(self.pvlib_fc)\n", + " self.data = self.data.loc[self.pvlib_fc.index[:-1]]\n", + " self.data.index = self.data.index.tz_localize(None)\n", + " self.data = self.data[self.config['output_cols'].keys()]\n", + "\n", + " # FIXME\n", + " self.data = self.data.iloc[1:]\n", + " # print('WARNING: Removing first timestep (last hour) due to NOAA 1h behind')\n", + " except Exception as e:\n", + " self.msg += f'ERROR: {e}.\\n\\n{traceback.format_exc()}\\n'\n", + " self.data = pd.DataFrame()\n", + "\n", + " # check data\n", + " if self.msg == '' and self.config['output_cols']:\n", + " self.check_data(self.data, self.config['output_cols'])\n", + "\n", + " # return\n", + " self.init = False\n", + " if self.config['json_return']:\n", + " self.output['output-data'] = self.data.to_json()\n", + " else:\n", + " self.output['output-data'] = self.data\n", + " self.output['duration'] = time.time() - st\n", + "\n", + " if self.msg == '':\n", + " return 'Done.'\n", + " return self.msg\n", + "\n", + "def get_default_config():\n", + " config = {}\n", + " # config['name'] = 'Berkeley'\n", + " config['lat'] = 37.8715\n", + " config['lon'] = -122.2501\n", + " config['tz'] = 'US/Pacific'\n", + " config['horizon'] = 16\n", + " config['tmp_dir'] = os.path.join(root, 'tmp')\n", + " config['debug'] = False\n", + " config['source'] = 'noaa_hrrr'\n", + " config['json_return'] = True\n", + " config['forecast_cols'] = {\n", + " '9:Total Cloud Cover:% (instant):lambert:atmosphere:level 0 -': [0, 100],\n", + " '7:2 metre temperature:K (instant):lambert:heightAboveGround:level 2 m': [200, 400]\n", + " }\n", + " config['output_cols'] = {'temp_air': [-50, 50],\n", + " 'ghi': [0, 1000],\n", + " 'dni': [0, 1500],\n", + " 'dhi': [0, 1000]}\n", + " return config\n", + "\n", + "if __name__ == '__main__':\n", + " \n", + " # get config\n", + " config = get_default_config()\n", + " \n", + " # initialize\n", + " forecaster = weather_forecaster()\n", + " forecaster.input['config'] = config\n", + "\n", + " # for defcon setup\n", + " if len(sys.argv) == 2:\n", + " forecaster.input['config']['source'] = 'json'\n", + " forecaster.input['input-data'] = pd.read_csv(sys.argv[1], index_col=0).to_json()\n", + " \n", + " # get forecast\n", + " msg = forecaster.compute(now=None)\n", + " res = pd.read_json(io.StringIO(forecaster.output['output-data']))\n", + " \n", + " # check for errors\n", + " if msg != 'Done.':\n", + " print(msg)\n", + " else:\n", + " print(res.round(1))" + ] + }, + { + "cell_type": "markdown", + "id": "1b6aa79e", + "metadata": {}, + "source": [ + "### Unit Tests" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "97ce5920", + "metadata": {}, + "outputs": [], + "source": [ + "# forecaster.input['config']['source'] = 'json'\n", + "# forecast = pd.read_csv('test_weather_data.csv', index_col=0).iloc[:-1]\n", + "# now = forecast.index[0]\n", + "\n", + "# # index wrong lenght\n", + "# temp = forecast.copy(deep=True)\n", + "# temp.loc[temp.index[2].replace('00:00', '15:00'), :] = temp.loc[temp.index[2]]\n", + "# forecaster.input['input-data'] = temp.to_json()\n", + "# forecaster.compute(now=now)\n", + "# res = pd.read_json(io.StringIO(forecaster.output['output-data']))\n", + "# msg = forecaster.msg\n", + "# print(msg.startswith('ERROR: Forecast length'))\n", + "\n", + "# # index wrong format\n", + "# temp = forecast.copy(deep=True)\n", + "# ix = temp.index.tolist()\n", + "# ix[0] = 123\n", + "# temp.index = ix\n", + "# forecaster.input['input-data'] = temp.to_json()\n", + "# forecaster.compute(now=now)\n", + "# res = pd.read_json(io.StringIO(forecaster.output['output-data']))\n", + "# msg = forecaster.msg\n", + "# print(msg == 'ERROR: External forecast date format incorrect \"123\" at position 0.\\n')\n", + "\n", + "# # index wrong date\n", + "# temp = forecast.copy(deep=True)\n", + "# ix = temp.index.tolist()\n", + "# ix[0] = '2022-13-01 00:00:00'\n", + "# temp.index = ix\n", + "# forecaster.input['input-data'] = temp.to_json()\n", + "# forecaster.compute(now=now)\n", + "# res = pd.read_json(io.StringIO(forecaster.output['output-data']))\n", + "# msg = forecaster.msg\n", + "# print(msg.split('\\n')[0] == \"ERROR: time data 2022-13-01 00:00:00 doesn't match format specified\")\n", + "\n", + "# # missing timestep\n", + "# temp = forecast.copy(deep=True)\n", + "# temp = temp.loc[temp.index[:1].tolist()+temp.index[2:].tolist()]\n", + "# temp.loc[temp.index[0].replace('00:00', '15:00'), :] = temp.loc[temp.index[2]]\n", + "# forecaster.input['input-data'] = temp.to_json()\n", + "# forecaster.compute(now=now)\n", + "# res = pd.read_json(io.StringIO(forecaster.output['output-data']))\n", + "# msg = forecaster.msg\n", + "# print(msg == 'ERROR: Missing timestamp in forecast.\\n')\n", + "\n", + "# # wrong start time\n", + "# temp = forecast.copy(deep=True)\n", + "# temp = temp.iloc[1:]\n", + "# temp.loc[temp.index[0].replace('00:00', '15:00'), :] = temp.loc[temp.index[2]]\n", + "# forecaster.input['input-data'] = temp.to_json()\n", + "# forecaster.compute(now=now)\n", + "# res = pd.read_json(io.StringIO(forecaster.output['output-data']))\n", + "# msg = forecaster.msg\n", + "# print(msg.startswith('ERROR: Forecast start '))\n", + "\n", + "# # wrong final time\n", + "# temp = forecast.copy(deep=True)\n", + "# temp = temp.iloc[:-1]\n", + "# temp.loc[temp.index[0].replace('00:00', '15:00'), :] = temp.loc[temp.index[2]]\n", + "# forecaster.input['input-data'] = temp.to_json()\n", + "# forecaster.compute(now=now)\n", + "# res = pd.read_json(io.StringIO(forecaster.output['output-data']))\n", + "# msg = forecaster.msg\n", + "# print(msg.startswith('ERROR: Forecast final '))\n", + "\n", + "# # clod cover high\n", + "# temp = forecast.copy(deep=True)\n", + "# temp.loc[temp.index[2], 'Total_cloud_cover_entire_atmosphere'] = 101\n", + "# forecaster.input['input-data'] = temp.to_json()\n", + "# forecaster.compute(now=now)\n", + "# res = pd.read_json(io.StringIO(forecaster.output['output-data']))\n", + "# msg = forecaster.msg\n", + "# print(msg == 'ERROR: Entry \"Total_cloud_cover_entire_atmosphere\" is out of range 101 <= 100.\\n')\n", + "\n", + "# # clod cover low\n", + "# temp = forecast.copy(deep=True)\n", + "# temp.loc[temp.index[2], 'Total_cloud_cover_entire_atmosphere'] = -0.1\n", + "# forecaster.input['input-data'] = temp.to_json()\n", + "# forecaster.compute(now=now)\n", + "# res = pd.read_json(io.StringIO(forecaster.output['output-data']))\n", + "# msg = forecaster.msg\n", + "# print(msg == 'ERROR: Entry \"Total_cloud_cover_entire_atmosphere\" is out of range -0.1 >= 0.\\n')\n", + "\n", + "# # temp high\n", + "# temp = forecast.copy(deep=True)\n", + "# temp.loc[temp.index[2], 'Temperature_height_above_ground'] = 401\n", + "# forecaster.input['input-data'] = temp.to_json()\n", + "# forecaster.compute(now=now)\n", + "# res = pd.read_json(io.StringIO(forecaster.output['output-data']))\n", + "# msg = forecaster.msg\n", + "# print(msg == 'ERROR: Entry \"Temperature_height_above_ground\" is out of range 401.0 <= 400.\\n')\n", + "\n", + "# # temp low\n", + "# temp = forecast.copy(deep=True)\n", + "# temp.loc[temp.index[2], 'Temperature_height_above_ground'] = 199\n", + "# forecaster.input['input-data'] = temp.to_json()\n", + "# forecaster.compute(now=now)\n", + "# res = pd.read_json(io.StringIO(forecaster.output['output-data']))\n", + "# msg = forecaster.msg\n", + "# print(msg == 'ERROR: Entry \"Temperature_height_above_ground\" is out of range 199.0 >= 200.\\n')\n", + "\n", + "# # temp is text\n", + "# temp = forecast.copy(deep=True)\n", + "# temp.loc[temp.index[2], 'Temperature_height_above_ground'] = 'ABCD'\n", + "# forecaster.input['input-data'] = temp.to_json()\n", + "# forecaster.compute(now=now)\n", + "# res = pd.read_json(io.StringIO(forecaster.output['output-data']))\n", + "# msg = forecaster.msg\n", + "# print(msg.startswith('ERROR: NaNs in forecast at: DatetimeIndex'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e804316", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/dev/how-to-update-pypi-package.txt b/dev/how-to-update-pypi-package.txt new file mode 100644 index 0000000..ceb7f64 --- /dev/null +++ b/dev/how-to-update-pypi-package.txt @@ -0,0 +1,18 @@ +Prerequisistes: + +Have twine (pip install twine) +Make an account for PyPi website +Be added to the project on PyPi + +Steps to updating DOPER Package: + +1. Change the version number in doper/__init__.py file. +For more on the setup.py file, check out this link: https://packaging.python.org/tutorials/packaging-projects#configuring-metadata) + +2. In the bash shell, run "python setup.py sdist bdist_wheel" (this will create a build and dist folder which contains info on the packages that we will publish) + +3. In the bash shell, run "tar tzf dist/DOPER-[VERSION NUMBER HERE].tar.gz" to check the contents of the package. If it looks normal, proceed. Just make sure nothing is egrgiously wrong. + +4. In the bash shell, run "twine check dist/*" and make sure you pass the tests. + +5. Finally, run 'twine upload dist/*', fill out your username and password when prompted, and we are done! diff --git a/doper/__init__.py b/doper/__init__.py index fa4cd97..b618b13 100644 --- a/doper/__init__.py +++ b/doper/__init__.py @@ -8,4 +8,4 @@ from .computetariff import * from .data.tariff import get_tariff -__version__ = "2.0.0" +__version__ = "2.1.0" diff --git a/doper/data/comDummy.py b/doper/data/comDummy.py new file mode 100644 index 0000000..e138289 --- /dev/null +++ b/doper/data/comDummy.py @@ -0,0 +1,17 @@ +import time +from fmlc.baseclasses import eFMU + +class communication_dummy(eFMU): + def __init__(self): + self.input = {'input-data': None, 'config': None, 'timeout': None} + self.output = {'output-data': None, 'duration': None} + + def compute(self): + st = time.time() + + self.output['output-data'] = {} + for k, v in self.input['config']: + self.output['output-data'][k] = v + + self.output['duration'] = time.time() - st + return 'Done.' diff --git a/doper/data/weatherForecast.py b/doper/data/weatherForecast.py new file mode 100644 index 0000000..6205313 --- /dev/null +++ b/doper/data/weatherForecast.py @@ -0,0 +1,377 @@ +# Distributed Optimal and Predictive Energy Resources (DOPER) Copyright (c) 2019 +# The Regents of the University of California, through Lawrence Berkeley +# National Laboratory (subject to receipt of any required approvals +# from the U.S. Dept. of Energy). All rights reserved. + +""""Distributed Optimal and Predictive Energy Resources +Weather forecast module. +""" + +import io +import re +import os +import sys +import time +import json +import pygrib +import requests +import warnings +import traceback +import numpy as np +import pandas as pd +import urllib.request +import datetime as dtm + +warnings.filterwarnings('ignore', message='The forecast module algorithms and features are highly experimental.') +warnings.filterwarnings('ignore', message="The HRRR class was deprecated in pvlib 0.9.1 and will be removed in a future release.") + +try: + root = os.path.dirname(os.path.abspath(__file__)) + from .resources.pvlib.forecast import HRRR +except: + root = os.getcwd() + sys.path.append(os.path.join(root, '..', 'doper')) + from resources.pvlib.forecast import HRRR + +from fmlc.baseclasses import eFMU + +datetime_mask = "20[0-9][0-9]-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]" + +FC_TO_PVLIV_MAP = { + '9:Total Cloud Cover:% (instant):lambert:atmosphere:level 0 -': 'Total_cloud_cover_entire_atmosphere', + '7:2 metre temperature:K (instant):lambert:heightAboveGround:level 2 m': 'Temperature_height_above_ground', + 'wind_speed_u': 0, + 'wind_speed_v': 0, + 'Low_cloud_cover_low_cloud': 0, + 'Medium_cloud_cover_middle_cloud': 0, + 'High_cloud_cover_high_cloud': 0, + 'Pressure_surface': 0, + 'Wind_speed_gust_surface': 0 +} + +def download_latest_hrrr(lat, lon, dt, hour, tmp_dir='', + debug=False, store_file=False): + ''' + Documentation and API: https://nomads.ncep.noaa.gov/gribfilter.php?ds=hrrr_2d + Full HRRR files: https://nomads.ncep.noaa.gov/pub/data/nccf/com/hrrr/prod/ + Historic HRRR files: https://www.ncei.noaa.gov/data/rapid-refresh/access/ + ''' + + # make url for download (from API) + url = f'https://nomads.ncep.noaa.gov/cgi-bin/filter_hrrr_2d.pl?dir=%2F' + fname = f'hrrr.t{dt.strftime("%H")}z.wrfsfcf{hour:02}.grib2' + url += f'hrrr.{dt.strftime("%Y%m%d")}%2Fconus&file={fname}' + url += f'&var_TCDC=on&var_TMP=on&all_lev=on&subregion=&' + # url += f'&var_TCDC=on&var_TMP=on&lev_2_m_above_ground=on&subregion=&' + url += f'toplat={int(lat+1)}&leftlon={int(lon-1)}&rightlon={int(lon+1)}&bottomlat={int(lat-1)}' + + # download forecast + fname = os.path.join(tmp_dir, fname) + try: + if store_file: + urllib.request.urlretrieve(url, fname) + else: + fname = requests.get(url).content + return fname + except Exception as e: + if debug: + print(url) + print(e) + return None + +def get_nearest_data(lat, lon, fname): + + # open file + grib = pygrib.open(fname) + + # get grib locations + lat_grib = grib[1].latlons()[0] + lon_grib = grib[1].latlons()[1] + + # calculate distances + abslat = np.abs(lat_grib - lat) + abslon = np.abs(lon_grib - lon) + c = np.maximum(abslon, abslat) + + # select nearest + x, y = np.where(c == np.min(c)) + x, y = x[0], y[0] + + # get data + res = {'lat': lat_grib[x, y], + 'lon': lon_grib[x, y], + 'x': x, 'y': y} + for g in grib: + name = str(g).split(':fcst time')[0] + res[name] = g.values[x, y] + + return res + +def get_hrrr_forecast(lat, lon, dt, tz='America/Los_Angeles', max_hour=16, + tmp_dir='', debug=False, store_file=False): + + # convert timestep to hourly + dt = dt.replace(minute=0, second=0, microsecond=0, nanosecond=0).tz_localize(None) + + # bug in pygrib 2.1.5 does not allow object as input + store_file = True + + res = {} + for h in range(max_hour+1): + st = time.time() + + # convert local time to utc + dt_utc = dt.tz_localize(tz).tz_convert('UTC').tz_localize(None) + + # get latest hrrr file + fcObj = download_latest_hrrr(lat, lon, dt_utc, h, + tmp_dir=tmp_dir, + debug=debug, + store_file=store_file) + + if fcObj: + # make readable (pygrib 2.1.5 should support but doesn't) + if not store_file: + binary_io = io.BytesIO(fcObj) + buffer_io = io.BufferedReader(binary_io) + + # determine nearest gridpoint + r = get_nearest_data(lat, lon, fcObj) + else: + # no forecast received + r = {} + + r['duration'] = time.time()-st + + # add to output + res[h] = r + + # make dataframe + res = pd.DataFrame(res).transpose() + res.index = [pd.to_datetime(dt)+pd.DateOffset(hours=ix) for ix in res.index] + + return res + +class weather_forecaster(eFMU): + ''' + This class gathers the weather forecasts at one station on a specified frequency. It uses pvlib to + reference NOAA's HRRR forecast model, and returns the temperature and solar irradiation values. It + requires a configuration file that specifies the station and sampling frequency. + ''' + + def __init__(self): + ''' + Reads the config information and initializes the forecaster. + + Input + ----- + config (dict): The configuration file. Example fiven in "get_default_config". + ''' + self.input = {'input-data': None, 'config': None, 'timeout': None} + self.output = {'output-data':None, 'duration':None} + + self.forecaster = None + + def check_data(self, data, ranges): + for k, r in ranges.items(): + if k in data.columns: + if not (data[k].min() >= r[0]): + self.msg += f'ERROR: Entry "{k}" is out of range {data[k].min()} >= {r[0]}.\n' + if not (data[k].max() <= r[1]): + self.msg += f'ERROR: Entry "{k}" is out of range {data[k].max()} <= {r[1]}.\n' + else: + self.msg += f'ERROR: Entry "{k}" is missing.\n' + + def compute(self, now=None): + ''' + Gathers forecasts for the specified station. Returns either the forecast and error messages. + + Input + ----- + now (str): String representation of the local time the forecast is requested for. None (defualt) + falls back to using the user's current clock time. + + Return + ------ + data (pd.DataFrame): The forecast as data frame with date time as index. Empty data frame on error. + msg (str): Error messages or empty string when no errors. + ''' + + self.msg = '' + st = time.time() + + # initialize + self.config = self.input['config'] + + # prepare inputs + tz = self.config['tz'] + if now == None: + now = pd.to_datetime(time.time(), unit='s') + now = now.replace(minute=0, second=0, microsecond=0, nanosecond=0) + now = now.tz_localize('UTC').tz_convert(tz) + start_time = pd.to_datetime(now) + + # FIXME + start_time = start_time - dtm.timedelta(hours=1) + # print('WARNING: Time in local time:', now, 'NOAA is 1h behind (DST?)', start_time) + + final_time = start_time + pd.Timedelta(hours=self.config['horizon']) + + # get forecast + self.forecast = pd.DataFrame() + try: + if self.config['source'] == 'noaa_hrrr': + if not self.forecaster: + + # setup forecaster + self.forecaster = get_hrrr_forecast + + # setup pvlib processor + self.pvlib_processor = HRRR() + self.pvlib_processor.set_location(start_time.tz, + self.config['lat'], + self.config['lon']) + + # tmp dir + if not os.path.exists(self.config['tmp_dir']): + os.mkdir(self.config['tmp_dir']) + + # get forecast + self.forecast = self.forecaster(self.config['lat'], + self.config['lon'], + start_time, + tz=tz, + max_hour=self.config['horizon'], + tmp_dir=self.config['tmp_dir'], + debug=self.config['debug']) + + elif self.config['source'] == 'json': + + # read forecast form json + self.forecast = pd.read_json(io.StringIO(self.input['input-data'])).sort_index() + + else: + + # method not implemented + self.msg += f'ERROR: Source option "{self.config["source"]}" not valid.\n' + + # check index + for i, ix in enumerate(self.forecast.index): + if not bool(re.match(datetime_mask, str(ix))): + self.msg += f'ERROR: External forecast date format incorrect "{ix}" at position {i}.\n' + + # check and convert to numeric + for c in self.forecast.columns: + self.forecast[c] = pd.to_numeric(self.forecast[c], errors='coerce') + if self.forecast.isnull().values.any(): + self.msg += f'ERROR: NaNs in forecast at: {self.forecast.index[self.forecast.isnull().any(axis=1)]}.\n' + + # check index + if self.msg == '': + self.forecast.index = pd.to_datetime(self.forecast.index, format='%Y-%m-%d %H:%M:%S') + if not len(self.forecast) == self.config['horizon']+1: + self.msg += f'ERROR: Forecast length {len(self.forecast)} is not horizon {self.config["horizon"]+1}.\n' + if not self.forecast.index[0] == start_time.tz_localize(None): + self.msg += f'ERROR: Forecast start "{self.forecast.index[0]}" not ' \ + + f'start_time "{start_time.tz_localize(None)}".\n' + if not self.forecast.index[-1] == final_time.tz_localize(None): + self.msg += f'ERROR: Forecast final "{self.forecast.index[-1]}" not ' \ + + f'final_time "{final_time.tz_localize(None)}".\n' + if self.forecast.resample('1h').asfreq().isnull().values.any(): + self.msg += f'ERROR: Missing timestamp in forecast.\n' + + except Exception as e: + self.msg += f'ERROR: {e}\n\n{traceback.format_exc()}\n' + self.forecast = pd.DataFrame() + + # process data + self.data = pd.DataFrame() + if self.msg == '': + try: + # check forecast + self.check_data(self.forecast, self.config['forecast_cols']) + + # process + if self.msg == '': + # direct pvlib form forecast + direct = {k: v for k, v in FC_TO_PVLIV_MAP.items() if isinstance(v, str)} + self.pvlib_fc = self.forecast[direct.keys()].copy(deep=True).rename(columns=direct) + # computed from forecast + computed = {k: v for k, v in FC_TO_PVLIV_MAP.items() if not isinstance(v, str)} + for k, v in computed.items(): + self.pvlib_fc[k] = v + self.pvlib_fc.index = self.pvlib_fc.index.tz_localize(tz) + # duplicate last beacuse of bug in pvlib + self.pvlib_fc.loc[self.pvlib_fc.index[-1]+pd.DateOffset(hours=1), :] = self.pvlib_fc.iloc[-1] + self.data = self.pvlib_processor.process_data(self.pvlib_fc) + self.data = self.data.loc[self.pvlib_fc.index[:-1]] + self.data.index = self.data.index.tz_localize(None) + self.data = self.data[self.config['output_cols'].keys()] + + # FIXME + self.data = self.data.iloc[1:] + # print('WARNING: Removing first timestep (last hour) due to NOAA 1h behind') + except Exception as e: + self.msg += f'ERROR: {e}.\n\n{traceback.format_exc()}\n' + self.data = pd.DataFrame() + + # check data + if self.msg == '' and self.config['output_cols']: + self.check_data(self.data, self.config['output_cols']) + + # return + self.init = False + if self.config['json_return']: + self.output['output-data'] = self.data.to_json() + else: + self.output['output-data'] = self.data + self.output['duration'] = time.time() - st + + if self.msg == '': + return 'Done.' + return self.msg + +def get_default_config(): + config = {} + # config['name'] = 'Berkeley' + config['lat'] = 37.8715 + config['lon'] = -122.2501 + config['tz'] = 'US/Pacific' + config['horizon'] = 16 + config['tmp_dir'] = os.path.join(root, 'tmp') + config['debug'] = False + config['source'] = 'noaa_hrrr' + config['json_return'] = True + config['forecast_cols'] = { + '9:Total Cloud Cover:% (instant):lambert:atmosphere:level 0 -': [0, 100], + '7:2 metre temperature:K (instant):lambert:heightAboveGround:level 2 m': [200, 400] + } + config['output_cols'] = {'temp_air': [-50, 50], + 'ghi': [0, 1000], + 'dni': [0, 1500], + 'dhi': [0, 1000]} + return config + +if __name__ == '__main__': + + # get config + config = get_default_config() + + # initialize + forecaster = weather_forecaster() + forecaster.input['config'] = config + + # for defcon setup + if len(sys.argv) == 2: + forecaster.input['config']['source'] = 'json' + forecaster.input['input-data'] = pd.read_csv(sys.argv[1], index_col=0).to_json() + + # get forecast + msg = forecaster.compute(now=None) + res = pd.read_json(io.StringIO(forecaster.output['output-data'])) + + # check for errors + if msg != 'Done.': + print(msg) + else: + print(res.round(1)) \ No newline at end of file diff --git a/doper/resources/__init__.py b/doper/resources/__init__.py new file mode 100644 index 0000000..a772db0 --- /dev/null +++ b/doper/resources/__init__.py @@ -0,0 +1,8 @@ +# Distributed Optimal and Predictive Energy Resources (DOPER) Copyright (c) 2019 +# The Regents of the University of California, through Lawrence Berkeley +# National Laboratory (subject to receipt of any required approvals +# from the U.S. Dept. of Energy). All rights reserved. + +""""Distributed Optimal and Predictive Energy Resources +Resources module. +""" diff --git a/doper/resources/pvlib/README.md b/doper/resources/pvlib/README.md new file mode 100644 index 0000000..9dc4f33 --- /dev/null +++ b/doper/resources/pvlib/README.md @@ -0,0 +1 @@ +The forecast.py file was downloaded from an old depreciated distribution of pvlib (https://github.com/pvlib/pvlib-python/blob/v0.9.5/pvlib/forecast.py) \ No newline at end of file diff --git a/doper/resources/pvlib/__init__.py b/doper/resources/pvlib/__init__.py new file mode 100644 index 0000000..124e4d4 --- /dev/null +++ b/doper/resources/pvlib/__init__.py @@ -0,0 +1,8 @@ +# Distributed Optimal and Predictive Energy Resources (DOPER) Copyright (c) 2019 +# The Regents of the University of California, through Lawrence Berkeley +# National Laboratory (subject to receipt of any required approvals +# from the U.S. Dept. of Energy). All rights reserved. + +""""Distributed Optimal and Predictive Energy Resources +pvlib module. +""" diff --git a/doper/resources/pvlib/forecast.py b/doper/resources/pvlib/forecast.py new file mode 100644 index 0000000..b077024 --- /dev/null +++ b/doper/resources/pvlib/forecast.py @@ -0,0 +1,1211 @@ +''' +The 'forecast' module contains class definitions for +retreiving forecasted data from UNIDATA Thredd servers. +''' +from netCDF4 import num2date +import numpy as np +import pandas as pd +from requests.exceptions import HTTPError +from xml.etree.ElementTree import ParseError + +from pvlib.location import Location +from pvlib.irradiance import campbell_norman, get_extra_radiation, disc +from siphon.catalog import TDSCatalog +from siphon.ncss import NCSS + +import warnings +from pvlib._deprecation import deprecated + + +warnings.warn( + 'The forecast module algorithms and features are highly experimental. ' + 'The API may change, the functionality may be consolidated into an io ' + 'module, or the module may be separated into its own package.') + +_forecast_deprecated = deprecated( + since='0.9.1', + removal='a future release', + addendum='For details, see https://pvlib-python.readthedocs.io/en/stable/user_guide/forecasts.html' # noqa: E501 +) + +# don't decorate the base class to prevent the subclasses from showing +# duplicate warnings: +# @_forecast_deprecated +class ForecastModel: + """ + An object for querying and holding forecast model information for + use within the pvlib library. + + Simplifies use of siphon library on a THREDDS server. + + Parameters + ---------- + model_type: string + UNIDATA category in which the model is located. + model_name: string + Name of the UNIDATA forecast model. + set_type: string + Model dataset type. + + Attributes + ---------- + access_url: string + URL specifying the dataset from data will be retrieved. + base_tds_url : string + The top level server address + catalog_url : string + The url path of the catalog to parse. + data: pd.DataFrame + Data returned from the query. + data_format: string + Format of the forecast data being requested from UNIDATA. + dataset: Dataset + Object containing information used to access forecast data. + dataframe_variables: list + Model variables that are present in the data. + datasets_list: list + List of all available datasets. + fm_models: Dataset + TDSCatalog object containing all available + forecast models from UNIDATA. + fm_models_list: list + List of all available forecast models from UNIDATA. + latitude: list + A list of floats containing latitude values. + location: Location + A pvlib Location object containing geographic quantities. + longitude: list + A list of floats containing longitude values. + lbox: boolean + Indicates the use of a location bounding box. + ncss: NCSS object + NCSS + model_name: string + Name of the UNIDATA forecast model. + model: Dataset + A dictionary of Dataset object, whose keys are the name of the + dataset's name. + model_url: string + The url path of the dataset to parse. + modelvariables: list + Common variable names that correspond to queryvariables. + query: NCSS query object + NCSS object used to complete the forecast data retrival. + queryvariables: list + Variables that are used to query the THREDDS Data Server. + time: DatetimeIndex + Time range. + variables: dict + Defines the variables to obtain from the weather + model and how they should be renamed to common variable names. + units: dict + Dictionary containing the units of the standard variables + and the model specific variables. + vert_level: float or integer + Vertical altitude for query data. + """ + + access_url_key = 'NetcdfSubset' + catalog_url = 'https://thredds.ucar.edu/thredds/catalog.xml' + base_tds_url = catalog_url.split('/thredds/')[0] + data_format = 'netcdf' + + units = { + 'temp_air': 'C', + 'wind_speed': 'm/s', + 'ghi': 'W/m^2', + 'ghi_raw': 'W/m^2', + 'dni': 'W/m^2', + 'dhi': 'W/m^2', + 'total_clouds': '%', + 'low_clouds': '%', + 'mid_clouds': '%', + 'high_clouds': '%'} + + def __init__(self, model_type, model_name, set_type, vert_level=None): + self.model_type = model_type + self.model_name = model_name + self.set_type = set_type + self.connected = False + self.vert_level = vert_level + + def connect_to_catalog(self): + self.catalog = TDSCatalog(self.catalog_url) + self.fm_models = TDSCatalog( + self.catalog.catalog_refs[self.model_type].href) + self.fm_models_list = sorted(list(self.fm_models.catalog_refs.keys())) + + try: + model_url = self.fm_models.catalog_refs[self.model_name].href + except ParseError: + raise ParseError(self.model_name + ' model may be unavailable.') + + try: + self.model = TDSCatalog(model_url) + except HTTPError: + try: + self.model = TDSCatalog(model_url) + except HTTPError: + raise HTTPError(self.model_name + ' model may be unavailable.') + + self.datasets_list = list(self.model.datasets.keys()) + self.set_dataset() + self.connected = True + + def __repr__(self): + return f'{self.model_name}, {self.set_type}' + + def set_dataset(self): + ''' + Retrieves the designated dataset, creates NCSS object, and + creates a NCSS query object. + ''' + + keys = list(self.model.datasets.keys()) + labels = [item.split()[0].lower() for item in keys] + if self.set_type == 'best': + self.dataset = self.model.datasets[keys[labels.index('best')]] + elif self.set_type == 'latest': + self.dataset = self.model.datasets[keys[labels.index('latest')]] + elif self.set_type == 'full': + self.dataset = self.model.datasets[keys[labels.index('full')]] + + self.access_url = self.dataset.access_urls[self.access_url_key] + self.ncss = NCSS(self.access_url) + self.query = self.ncss.query() + + def set_query_time_range(self, start, end): + """ + Parameters + ---------- + start : datetime.datetime, pandas.Timestamp + Must be tz-localized. + end : datetime.datetime, pandas.Timestamp + Must be tz-localized. + + Notes + ----- + Assigns ``self.start``, ``self.end``. Modifies ``self.query`` + """ + self.start = pd.Timestamp(start) + self.end = pd.Timestamp(end) + if self.start.tz is None or self.end.tz is None: + raise TypeError('start and end must be tz-localized') + # don't assume that siphon or the server can handle anything other + # than UTC + self.query.time_range( + self.start.tz_convert('UTC'), + self.end.tz_convert('UTC') + ) + + def set_query_latlon(self): + ''' + Sets the NCSS query location latitude and longitude. + ''' + + if (isinstance(self.longitude, list) and + isinstance(self.latitude, list)): + self.lbox = True + # west, east, south, north + self.query.lonlat_box(self.longitude[0], self.longitude[1], + self.latitude[0], self.latitude[1]) + else: + self.lbox = False + self.query.lonlat_point(self.longitude, self.latitude) + + def set_location(self, tz, latitude, longitude): + ''' + Sets the location for the query. + + Parameters + ---------- + tz: tzinfo + Timezone of the query + latitude: float + Latitude of the query + longitude: float + Longitude of the query + + Notes + ----- + Assigns ``self.location``. + ''' + self.location = Location(latitude, longitude, tz=tz) + + def get_data(self, latitude, longitude, start, end, + vert_level=None, query_variables=None, + close_netcdf_data=True, **kwargs): + """ + Submits a query to the UNIDATA servers using Siphon NCSS and + converts the netcdf data to a pandas DataFrame. + + Parameters + ---------- + latitude: float + The latitude value. + longitude: float + The longitude value. + start: datetime or timestamp + The start time. + end: datetime or timestamp + The end time. + vert_level: None, float or integer, default None + Vertical altitude of interest. + query_variables: None or list, default None + If None, uses self.variables. + close_netcdf_data: bool, default True + Controls if the temporary netcdf data file should be closed. + Set to False to access the raw data. + **kwargs: + Additional keyword arguments are silently ignored. + + Returns + ------- + forecast_data : DataFrame + column names are the weather model's variable names. + """ + + if not self.connected: + self.connect_to_catalog() + + if vert_level is not None: + self.vert_level = vert_level + + if query_variables is None: + self.query_variables = list(self.variables.values()) + else: + self.query_variables = query_variables + + self.set_query_time_range(start, end) + + self.latitude = latitude + self.longitude = longitude + self.set_query_latlon() # modifies self.query + self.set_location(self.start.tz, latitude, longitude) + + if self.vert_level is not None: + self.query.vertical_level(self.vert_level) + + self.query.variables(*self.query_variables) + self.query.accept(self.data_format) + + self.netcdf_data = self.ncss.get_data(self.query) + + # might be better to go to xarray here so that we can handle + # higher dimensional data for more advanced applications + self.data = self._netcdf2pandas(self.netcdf_data, self.query_variables, + self.start, self.end) + + if close_netcdf_data: + self.netcdf_data.close() + + return self.data + + def process_data(self, data, **kwargs): + """ + Defines the steps needed to convert raw forecast data + into processed forecast data. Most forecast models implement + their own version of this method which also call this one. + + Parameters + ---------- + data: DataFrame + Raw forecast data + + Returns + ------- + data: DataFrame + Processed forecast data. + """ + data = self.rename(data) + return data + + def get_processed_data(self, *args, **kwargs): + """ + Get and process forecast data. + + Parameters + ---------- + *args: positional arguments + Passed to get_data + **kwargs: keyword arguments + Passed to get_data and process_data + + Returns + ------- + data: DataFrame + Processed forecast data + """ + return self.process_data(self.get_data(*args, **kwargs), **kwargs) + + def rename(self, data, variables=None): + """ + Renames the columns according the variable mapping. + + Parameters + ---------- + data: DataFrame + variables: None or dict, default None + If None, uses self.variables + + Returns + ------- + data: DataFrame + Renamed data. + """ + if variables is None: + variables = self.variables + return data.rename(columns={y: x for x, y in variables.items()}) + + def _netcdf2pandas(self, netcdf_data, query_variables, start, end): + """ + Transforms data from netcdf to pandas DataFrame. + + Parameters + ---------- + data: netcdf + Data returned from UNIDATA NCSS query. + query_variables: list + The variables requested. + start: Timestamp + The start time + end: Timestamp + The end time + + Returns + ------- + pd.DataFrame + """ + # set self.time + try: + time_var = 'time' + self.set_time(netcdf_data.variables[time_var]) + except KeyError: + # which model does this dumb thing? + time_var = 'time1' + self.set_time(netcdf_data.variables[time_var]) + + data_dict = {} + for key, data in netcdf_data.variables.items(): + # if accounts for possibility of extra variable returned + if key not in query_variables: + continue + squeezed = data[:].squeeze() + + # If the data is big endian, swap the byte order to make it + # little endian + if squeezed.dtype.byteorder == '>': + squeezed = squeezed.byteswap().newbyteorder() + if squeezed.ndim == 1: + data_dict[key] = squeezed + elif squeezed.ndim == 2: + for num, data_level in enumerate(squeezed.T): + data_dict[key + '_' + str(num)] = data_level + else: + raise ValueError('cannot parse ndim > 2') + + data = pd.DataFrame(data_dict, index=self.time) + # sometimes data is returned as hours since T0 + # where T0 is before start. Then the hours between + # T0 and start are added *after* end. So sort and slice + # to remove the garbage + data = data.sort_index().loc[start:end] + return data + + def set_time(self, time): + ''' + Converts time data into a pandas date object. + + Parameters + ---------- + time: netcdf + Contains time information. + + Returns + ------- + pandas.DatetimeIndex + ''' + # np.masked_array with elements like real_datetime(2021, 8, 17, 16, 0) + # and dtype=object + times = num2date(time[:].squeeze(), time.units, + only_use_cftime_datetimes=False, + only_use_python_datetimes=True) + # convert to pandas, localize to UTC, convert to desired timezone + self.time = pd.DatetimeIndex( + times, tz='UTC').tz_convert(self.location.tz) + + def cloud_cover_to_ghi_linear(self, cloud_cover, ghi_clear, offset=35, + **kwargs): + """ + Convert cloud cover to GHI using a linear relationship. + + 0% cloud cover returns ghi_clear. + + 100% cloud cover returns offset*ghi_clear. + + Parameters + ---------- + cloud_cover: numeric + Cloud cover in %. + ghi_clear: numeric + GHI under clear sky conditions. + offset: numeric, default 35 + Determines the minimum GHI. + kwargs + Not used. + + Returns + ------- + ghi: numeric + Estimated GHI. + + References + ---------- + Larson et. al. "Day-ahead forecasting of solar power output from + photovoltaic plants in the American Southwest" Renewable Energy + 91, 11-20 (2016). + """ + + offset = offset / 100. + cloud_cover = cloud_cover / 100. + ghi = (offset + (1 - offset) * (1 - cloud_cover)) * ghi_clear + return ghi + + def cloud_cover_to_irradiance_clearsky_scaling(self, cloud_cover, + method='linear', + **kwargs): + """ + Estimates irradiance from cloud cover in the following steps: + + 1. Determine clear sky GHI using Ineichen model and + climatological turbidity. + 2. Estimate cloudy sky GHI using a function of + cloud_cover e.g. + :py:meth:`~ForecastModel.cloud_cover_to_ghi_linear` + 3. Estimate cloudy sky DNI using the DISC model. + 4. Calculate DHI from DNI and GHI. + + Parameters + ---------- + cloud_cover : Series + Cloud cover in %. + method : str, default 'linear' + Method for converting cloud cover to GHI. + 'linear' is currently the only option. + **kwargs + Passed to the method that does the conversion + + Returns + ------- + irrads : DataFrame + Estimated GHI, DNI, and DHI. + """ + solpos = self.location.get_solarposition(cloud_cover.index) + cs = self.location.get_clearsky(cloud_cover.index, model='ineichen', + solar_position=solpos) + + method = method.lower() + if method == 'linear': + ghi = self.cloud_cover_to_ghi_linear(cloud_cover, cs['ghi'], + **kwargs) + else: + raise ValueError('invalid method argument') + + dni = disc(ghi, solpos['zenith'], cloud_cover.index)['dni'] + dhi = ghi - dni * np.cos(np.radians(solpos['zenith'])) + + irrads = pd.DataFrame({'ghi': ghi, 'dni': dni, 'dhi': dhi}).fillna(0) + return irrads + + def cloud_cover_to_transmittance_linear(self, cloud_cover, offset=0.75, + **kwargs): + """ + Convert cloud cover (percentage) to atmospheric transmittance + using a linear model. + + 0% cloud cover returns "offset". + + 100% cloud cover returns 0. + + Parameters + ---------- + cloud_cover : numeric + Cloud cover in %. + offset : numeric, default 0.75 + Determines the maximum transmittance. [unitless] + kwargs + Not used. + + Returns + ------- + transmittance : numeric + The fraction of extraterrestrial irradiance that reaches + the ground. [unitless] + """ + transmittance = ((100.0 - cloud_cover) / 100.0) * offset + + return transmittance + + def cloud_cover_to_irradiance_campbell_norman(self, cloud_cover, **kwargs): + """ + Estimates irradiance from cloud cover in the following steps: + + 1. Determine transmittance using a function of cloud cover e.g. + :py:meth:`~ForecastModel.cloud_cover_to_transmittance_linear` + 2. Calculate GHI, DNI, DHI using the + :py:func:`pvlib.irradiance.campbell_norman` model + + Parameters + ---------- + cloud_cover : Series + + Returns + ------- + irradiance : DataFrame + Columns include ghi, dni, dhi + """ + # in principle, get_solarposition could use the forecast + # pressure, temp, etc., but the cloud cover forecast is not + # accurate enough to justify using these minor corrections + solar_position = self.location.get_solarposition(cloud_cover.index) + dni_extra = get_extra_radiation(cloud_cover.index) + + transmittance = self.cloud_cover_to_transmittance_linear(cloud_cover, + **kwargs) + + irrads = campbell_norman(solar_position['apparent_zenith'], + transmittance, dni_extra=dni_extra) + irrads = irrads.fillna(0) + + return irrads + + def cloud_cover_to_irradiance(self, cloud_cover, how='clearsky_scaling', + **kwargs): + """ + Convert cloud cover to irradiance. A wrapper method. + + Parameters + ---------- + cloud_cover : Series + how : str, default 'clearsky_scaling' + Selects the method for conversion. Can be one of + clearsky_scaling or campbell_norman. Method liujordan is + deprecated. + **kwargs + Passed to the selected method. + + Returns + ------- + irradiance : DataFrame + Columns include ghi, dni, dhi + """ + + how = how.lower() + if how == 'clearsky_scaling': + irrads = self.cloud_cover_to_irradiance_clearsky_scaling( + cloud_cover, **kwargs) + elif how == 'campbell_norman': + irrads = self.cloud_cover_to_irradiance_campbell_norman( + cloud_cover, **kwargs) + else: + raise ValueError('invalid how argument') + + return irrads + + def kelvin_to_celsius(self, temperature): + """ + Converts Kelvin to celsius. + + Parameters + ---------- + temperature: numeric + + Returns + ------- + temperature: numeric + """ + return temperature - 273.15 + + def isobaric_to_ambient_temperature(self, data): + """ + Calculates temperature from isobaric temperature. + + Parameters + ---------- + data: DataFrame + Must contain columns pressure, temperature_iso, + temperature_dew_iso. Input temperature in K. + + Returns + ------- + temperature : Series + Temperature in K + """ + + P = data['pressure'] / 100.0 # noqa: N806 + Tiso = data['temperature_iso'] # noqa: N806 + Td = data['temperature_dew_iso'] - 273.15 # noqa: N806 + + # saturation water vapor pressure + e = 6.11 * 10**((7.5 * Td) / (Td + 273.3)) + + # saturation water vapor mixing ratio + w = 0.622 * (e / (P - e)) + + temperature = Tiso - ((2.501 * 10.**6) / 1005.7) * w + + return temperature + + def uv_to_speed(self, data): + """ + Computes wind speed from wind components. + + Parameters + ---------- + data : DataFrame + Must contain the columns 'wind_speed_u' and 'wind_speed_v'. + + Returns + ------- + wind_speed : Series + """ + wind_speed = np.sqrt(data['wind_speed_u']**2 + data['wind_speed_v']**2) + + return wind_speed + + def gust_to_speed(self, data, scaling=1/1.4): + """ + Computes standard wind speed from gust. + Very approximate and location dependent. + + Parameters + ---------- + data : DataFrame + Must contain the column 'wind_speed_gust'. + + Returns + ------- + wind_speed : Series + """ + wind_speed = data['wind_speed_gust'] * scaling + + return wind_speed + + +@_forecast_deprecated +class GFS(ForecastModel): + """ + Subclass of the ForecastModel class representing GFS + forecast model. + + Model data corresponds to 0.25 degree resolution forecasts. + + Parameters + ---------- + resolution: string, default 'half' + Resolution of the model, either 'half' or 'quarter' degree. + set_type: string, default 'best' + Type of model to pull data from. + + Attributes + ---------- + dataframe_variables: list + Common variables present in the final set of data. + model: string + Name of the UNIDATA forecast model. + model_type: string + UNIDATA category in which the model is located. + variables: dict + Defines the variables to obtain from the weather + model and how they should be renamed to common variable names. + units: dict + Dictionary containing the units of the standard variables + and the model specific variables. + """ + + _resolutions = ['Half', 'Quarter'] + + def __init__(self, resolution='half', set_type='best'): + model_type = 'Forecast Model Data' + + resolution = resolution.title() + if resolution not in self._resolutions: + raise ValueError(f'resolution must in {self._resolutions}') + + model = f'GFS {resolution} Degree Forecast' + + # isobaric variables will require a vert_level to prevent + # excessive data downloads + self.variables = { + 'temp_air': 'Temperature_surface', + 'wind_speed_gust': 'Wind_speed_gust_surface', + 'wind_speed_u': 'u-component_of_wind_isobaric', + 'wind_speed_v': 'v-component_of_wind_isobaric', + 'total_clouds': + 'Total_cloud_cover_entire_atmosphere_Mixed_intervals_Average', + 'low_clouds': + 'Low_cloud_cover_low_cloud_Mixed_intervals_Average', + 'mid_clouds': + 'Medium_cloud_cover_middle_cloud_Mixed_intervals_Average', + 'high_clouds': + 'High_cloud_cover_high_cloud_Mixed_intervals_Average', + 'boundary_clouds': ('Total_cloud_cover_boundary_layer_cloud_' + 'Mixed_intervals_Average'), + 'convect_clouds': 'Total_cloud_cover_convective_cloud', + 'ghi_raw': ('Downward_Short-Wave_Radiation_Flux_' + 'surface_Mixed_intervals_Average')} + + self.output_variables = [ + 'temp_air', + 'wind_speed', + 'ghi', + 'dni', + 'dhi', + 'total_clouds', + 'low_clouds', + 'mid_clouds', + 'high_clouds'] + + super().__init__(model_type, model, set_type, + vert_level=100000) + + def process_data(self, data, cloud_cover='total_clouds', **kwargs): + """ + Defines the steps needed to convert raw forecast data + into processed forecast data. + + Parameters + ---------- + data: DataFrame + Raw forecast data + cloud_cover: str, default 'total_clouds' + The type of cloud cover used to infer the irradiance. + + Returns + ------- + data: DataFrame + Processed forecast data. + """ + data = super().process_data(data, **kwargs) + data['temp_air'] = self.kelvin_to_celsius(data['temp_air']) + data['wind_speed'] = self.uv_to_speed(data) + irrads = self.cloud_cover_to_irradiance(data[cloud_cover], **kwargs) + data = data.join(irrads, how='outer') + return data[self.output_variables] + + +@_forecast_deprecated +class HRRR_ESRL(ForecastModel): # noqa: N801 + """ + Subclass of the ForecastModel class representing + NOAA/GSD/ESRL's HRRR forecast model. + This is not an operational product. + + Model data corresponds to NOAA/GSD/ESRL HRRR CONUS 3km resolution + surface forecasts. + + Parameters + ---------- + set_type: string, default 'best' + Type of model to pull data from. + + Attributes + ---------- + dataframe_variables: list + Common variables present in the final set of data. + model: string + Name of the UNIDATA forecast model. + model_type: string + UNIDATA category in which the model is located. + variables: dict + Defines the variables to obtain from the weather + model and how they should be renamed to common variable names. + units: dict + Dictionary containing the units of the standard variables + and the model specific variables. + """ + + def __init__(self, set_type='best'): + warnings.warn('HRRR_ESRL is an experimental model and is not ' + 'always available.') + + model_type = 'Forecast Model Data' + model = 'GSD HRRR CONUS 3km surface' + + self.variables = { + 'temp_air': 'Temperature_surface', + 'wind_speed_gust': 'Wind_speed_gust_surface', + # 'temp_air': 'Temperature_height_above_ground', # GH 702 + # 'wind_speed_u': 'u-component_of_wind_height_above_ground', + # 'wind_speed_v': 'v-component_of_wind_height_above_ground', + 'total_clouds': 'Total_cloud_cover_entire_atmosphere', + 'low_clouds': 'Low_cloud_cover_UnknownLevelType-214', + 'mid_clouds': 'Medium_cloud_cover_UnknownLevelType-224', + 'high_clouds': 'High_cloud_cover_UnknownLevelType-234', + 'ghi_raw': 'Downward_short-wave_radiation_flux_surface', } + + self.output_variables = [ + 'temp_air', + 'wind_speed', + 'ghi_raw', + 'ghi', + 'dni', + 'dhi', + 'total_clouds', + 'low_clouds', + 'mid_clouds', + 'high_clouds'] + + super().__init__(model_type, model, set_type) + + def process_data(self, data, cloud_cover='total_clouds', **kwargs): + """ + Defines the steps needed to convert raw forecast data + into processed forecast data. + + Parameters + ---------- + data: DataFrame + Raw forecast data + cloud_cover: str, default 'total_clouds' + The type of cloud cover used to infer the irradiance. + + Returns + ------- + data: DataFrame + Processed forecast data. + """ + + data = super().process_data(data, **kwargs) + data['temp_air'] = self.kelvin_to_celsius(data['temp_air']) + data['wind_speed'] = self.gust_to_speed(data) + # data['wind_speed'] = self.uv_to_speed(data) # GH 702 + irrads = self.cloud_cover_to_irradiance(data[cloud_cover], **kwargs) + data = data.join(irrads, how='outer') + return data[self.output_variables] + + +@_forecast_deprecated +class NAM(ForecastModel): + """ + Subclass of the ForecastModel class representing NAM + forecast model. + + Model data corresponds to NAM CONUS 12km resolution forecasts + from CONDUIT. + + Parameters + ---------- + set_type: string, default 'best' + Type of model to pull data from. + + Attributes + ---------- + dataframe_variables: list + Common variables present in the final set of data. + model: string + Name of the UNIDATA forecast model. + model_type: string + UNIDATA category in which the model is located. + variables: dict + Defines the variables to obtain from the weather + model and how they should be renamed to common variable names. + units: dict + Dictionary containing the units of the standard variables + and the model specific variables. + """ + + def __init__(self, set_type='best'): + model_type = 'Forecast Model Data' + model = 'NAM CONUS 12km from CONDUIT' + + self.variables = { + 'temp_air': 'Temperature_surface', + 'wind_speed_gust': 'Wind_speed_gust_surface', + 'total_clouds': 'Total_cloud_cover_entire_atmosphere_single_layer', + 'low_clouds': 'Low_cloud_cover_low_cloud', + 'mid_clouds': 'Medium_cloud_cover_middle_cloud', + 'high_clouds': 'High_cloud_cover_high_cloud', + 'ghi_raw': 'Downward_Short-Wave_Radiation_Flux_surface', } + + self.output_variables = [ + 'temp_air', + 'wind_speed', + 'ghi', + 'dni', + 'dhi', + 'total_clouds', + 'low_clouds', + 'mid_clouds', + 'high_clouds'] + + super().__init__(model_type, model, set_type) + + def process_data(self, data, cloud_cover='total_clouds', **kwargs): + """ + Defines the steps needed to convert raw forecast data + into processed forecast data. + + Parameters + ---------- + data: DataFrame + Raw forecast data + cloud_cover: str, default 'total_clouds' + The type of cloud cover used to infer the irradiance. + + Returns + ------- + data: DataFrame + Processed forecast data. + """ + + data = super().process_data(data, **kwargs) + data['temp_air'] = self.kelvin_to_celsius(data['temp_air']) + data['wind_speed'] = self.gust_to_speed(data) + irrads = self.cloud_cover_to_irradiance(data[cloud_cover], **kwargs) + data = data.join(irrads, how='outer') + return data[self.output_variables] + + +@_forecast_deprecated +class HRRR(ForecastModel): + """ + Subclass of the ForecastModel class representing HRRR + forecast model. + + Model data corresponds to NCEP HRRR CONUS 2.5km resolution + forecasts. + + Parameters + ---------- + set_type: string, default 'best' + Type of model to pull data from. + + Attributes + ---------- + dataframe_variables: list + Common variables present in the final set of data. + model: string + Name of the UNIDATA forecast model. + model_type: string + UNIDATA category in which the model is located. + variables: dict + Defines the variables to obtain from the weather + model and how they should be renamed to common variable names. + units: dict + Dictionary containing the units of the standard variables + and the model specific variables. + """ + + def __init__(self, set_type='best'): + model_type = 'Forecast Model Data' + model = 'HRRR CONUS 2.5km Forecasts' + + self.variables = { + 'temp_air': 'Temperature_height_above_ground', + 'pressure': 'Pressure_surface', + 'wind_speed_gust': 'Wind_speed_gust_surface', + 'wind_speed_u': 'u-component_of_wind_height_above_ground', + 'wind_speed_v': 'v-component_of_wind_height_above_ground', + 'total_clouds': 'Total_cloud_cover_entire_atmosphere', + 'low_clouds': 'Low_cloud_cover_low_cloud', + 'mid_clouds': 'Medium_cloud_cover_middle_cloud', + 'high_clouds': 'High_cloud_cover_high_cloud'} + + self.output_variables = [ + 'temp_air', + 'wind_speed', + 'ghi', + 'dni', + 'dhi', + 'total_clouds', + 'low_clouds', + 'mid_clouds', + 'high_clouds', ] + + super().__init__(model_type, model, set_type) + + def process_data(self, data, cloud_cover='total_clouds', **kwargs): + """ + Defines the steps needed to convert raw forecast data + into processed forecast data. + + Parameters + ---------- + data: DataFrame + Raw forecast data + cloud_cover: str, default 'total_clouds' + The type of cloud cover used to infer the irradiance. + + Returns + ------- + data: DataFrame + Processed forecast data. + """ + data = super().process_data(data, **kwargs) + wind_mapping = { + 'wind_speed_u': 'u-component_of_wind_height_above_ground_0', + 'wind_speed_v': 'v-component_of_wind_height_above_ground_0', + } + data = self.rename(data, variables=wind_mapping) + data['temp_air'] = self.kelvin_to_celsius(data['temp_air']) + data['wind_speed'] = self.uv_to_speed(data) + irrads = self.cloud_cover_to_irradiance(data[cloud_cover], **kwargs) + data = data.join(irrads, how='outer') + data = data.iloc[:-1, :] # issue with last point + return data[self.output_variables] + + +@_forecast_deprecated +class NDFD(ForecastModel): + """ + Subclass of the ForecastModel class representing NDFD forecast + model. + + Model data corresponds to NWS CONUS CONDUIT forecasts. + + Parameters + ---------- + set_type: string, default 'best' + Type of model to pull data from. + + Attributes + ---------- + dataframe_variables: list + Common variables present in the final set of data. + model: string + Name of the UNIDATA forecast model. + model_type: string + UNIDATA category in which the model is located. + variables: dict + Defines the variables to obtain from the weather + model and how they should be renamed to common variable names. + units: dict + Dictionary containing the units of the standard variables + and the model specific variables. + """ + + def __init__(self, set_type='best'): + model_type = 'Forecast Products and Analyses' + model = 'National Weather Service CONUS Forecast Grids (CONDUIT)' + self.variables = { + 'temp_air': 'Temperature_height_above_ground', + 'wind_speed': 'Wind_speed_height_above_ground', + 'total_clouds': 'Total_cloud_cover_surface', } + self.output_variables = [ + 'temp_air', + 'wind_speed', + 'ghi', + 'dni', + 'dhi', + 'total_clouds', ] + super().__init__(model_type, model, set_type) + + def process_data(self, data, **kwargs): + """ + Defines the steps needed to convert raw forecast data + into processed forecast data. + + Parameters + ---------- + data: DataFrame + Raw forecast data + + Returns + ------- + data: DataFrame + Processed forecast data. + """ + + cloud_cover = 'total_clouds' + data = super().process_data(data, **kwargs) + data['temp_air'] = self.kelvin_to_celsius(data['temp_air']) + irrads = self.cloud_cover_to_irradiance(data[cloud_cover], **kwargs) + data = data.join(irrads, how='outer') + return data[self.output_variables] + + +@_forecast_deprecated +class RAP(ForecastModel): + """ + Subclass of the ForecastModel class representing RAP forecast model. + + Model data corresponds to Rapid Refresh CONUS 20km resolution + forecasts. + + Parameters + ---------- + resolution: string or int, default '20' + The model resolution, either '20' or '40' (km) + set_type: string, default 'best' + Type of model to pull data from. + + Attributes + ---------- + dataframe_variables: list + Common variables present in the final set of data. + model: string + Name of the UNIDATA forecast model. + model_type: string + UNIDATA category in which the model is located. + variables: dict + Defines the variables to obtain from the weather + model and how they should be renamed to common variable names. + units: dict + Dictionary containing the units of the standard variables + and the model specific variables. + """ + + _resolutions = ['20', '40'] + + def __init__(self, resolution='20', set_type='best'): + + resolution = str(resolution) + if resolution not in self._resolutions: + raise ValueError(f'resolution must in {self._resolutions}') + + model_type = 'Forecast Model Data' + model = f'Rapid Refresh CONUS {resolution}km' + self.variables = { + 'temp_air': 'Temperature_surface', + 'wind_speed_gust': 'Wind_speed_gust_surface', + 'total_clouds': 'Total_cloud_cover_entire_atmosphere', + 'low_clouds': 'Low_cloud_cover_low_cloud', + 'mid_clouds': 'Medium_cloud_cover_middle_cloud', + 'high_clouds': 'High_cloud_cover_high_cloud', } + self.output_variables = [ + 'temp_air', + 'wind_speed', + 'ghi', + 'dni', + 'dhi', + 'total_clouds', + 'low_clouds', + 'mid_clouds', + 'high_clouds', ] + super().__init__(model_type, model, set_type) + + def process_data(self, data, cloud_cover='total_clouds', **kwargs): + """ + Defines the steps needed to convert raw forecast data + into processed forecast data. + + Parameters + ---------- + data: DataFrame + Raw forecast data + cloud_cover: str, default 'total_clouds' + The type of cloud cover used to infer the irradiance. + + Returns + ------- + data: DataFrame + Processed forecast data. + """ + + data = super().process_data(data, **kwargs) + data['temp_air'] = self.kelvin_to_celsius(data['temp_air']) + data['wind_speed'] = self.gust_to_speed(data) + irrads = self.cloud_cover_to_irradiance(data[cloud_cover], **kwargs) + data = data.join(irrads, how='outer') + return data[self.output_variables] diff --git a/requirements.txt b/requirements.txt index 439262e..afb0342 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ pandas pyomo>6.0 matplotlib pyutilib +pygrib