Source code for pyiso.miso

from collections import namedtuple
from pyiso.base import BaseClient
from pyiso import LOGGER
import pandas as pd
from io import BytesIO
from datetime import datetime, timedelta
import pytz
from dateutil.parser import parse
import re

IntervalChoices = namedtuple('IntervalChoices',
                             ['hourly', 'hourly_prelim', 'fivemin', 'tenmin',
                              'na', 'dam', 'dam_exante'])


[docs]class MISOClient(BaseClient): NAME = 'MISO' base_url = 'https://www.misoenergy.org' fuels = { 'Coal': 'coal', 'Natural Gas': 'natgas', 'Nuclear': 'nuclear', 'Other': 'other', 'Wind': 'wind', } # MISO is always on utc offset is -5 # Due to a legacy problem, pytz time zones names are sign reversed TZ_NAME = 'Etc/GMT+5' MARKET_CHOICES = IntervalChoices(hourly='RTHR', fivemin='RT5M', tenmin='RT5M', na='RT5M', dam='DAHR', hourly_prelim='RTHR_prelim', dam_exante='DAHR_exante')
[docs] def get_generation(self, latest=False, **kwargs): # set args self.handle_options(data='gen', latest=latest, **kwargs) # get data if self.options['latest']: content = self.get_latest_fuel_mix() data = self.parse_latest_fuel_mix(content) extras = { 'ba_name': self.NAME, 'market': self.MARKET_CHOICES.fivemin, 'freq': self.FREQUENCY_CHOICES.fivemin, } elif self.options['forecast']: data = self.handle_forecast() extras = { 'ba_name': self.NAME, 'market': self.MARKET_CHOICES.dam, 'freq': self.FREQUENCY_CHOICES.hourly, } else: raise ValueError('Either latest or forecast must be True') # return return self.serialize_faster(data, extras=extras)
[docs] def get_load(self, latest=False, **kwargs): # set args self.handle_options(data='load', latest=latest, **kwargs) # get data if self.options['forecast']: data = self.handle_forecast() extras = { 'ba_name': self.NAME, 'market': self.MARKET_CHOICES.dam, 'freq': self.FREQUENCY_CHOICES.hourly, } else: raise ValueError('forecast must be True') # return return self.serialize_faster(data, extras=extras)
[docs] def get_trade(self, latest=False, **kwargs): # set args self.handle_options(data='trade', latest=latest, **kwargs) # get data if self.options['forecast']: data = self.handle_forecast() extras = { 'ba_name': self.NAME, 'market': self.MARKET_CHOICES.dam, 'freq': self.FREQUENCY_CHOICES.hourly, } else: raise ValueError('forecast must be True') # return return self.serialize_faster(data, extras=extras)
[docs] def get_latest_fuel_mix(self): # set up request url = self.base_url + '/ria/FuelMix.aspx?CSV=True' # carry out request response = self.request(url) if not response: return None # test for valid content if 'The page cannot be displayed' in response.text: LOGGER.error('MISO: Error in source data for generation') return None # return good return response.content
[docs] def parse_latest_fuel_mix(self, content): # handle bad input if not content: return pd.DataFrame() # preliminary parsing df = pd.read_csv(BytesIO(content), header=0, index_col=0, parse_dates=True) # set index try: df.index = self.utcify_index(df.index) except AttributeError: LOGGER.error('MISO: Error in source data for generation %s' % content) return pd.DataFrame() df.index.set_names(['timestamp'], inplace=True) # set names and labels df['fuel_name'] = df.apply(lambda x: self.fuels[x['CATEGORY']], axis=1) df['gen_MW'] = df['ACT'] # return return df[['fuel_name', 'gen_MW']]
[docs] def handle_forecast(self): dates_list = self.dates() if min(dates_list) > self.local_now().date(): dates_list = [self.local_now().date()] + dates_list pieces = [self.fetch_forecast(date) for date in dates_list] df = pd.concat(pieces) return self.parse_forecast(df)
[docs] def fetch_forecast(self, date): # construct url datestr = date.strftime('%Y%m%d') url = self.base_url + '/Library/Repository/Market%20Reports/' + datestr + '_da_ex.xls' # make request with self.request for easier debugging, mocking response = self.request(url) if not response: return pd.DataFrame() if response.status_code == 404: LOGGER.debug('No MISO forecast data available at %s' % datestr) return pd.DataFrame() xls = pd.read_excel(BytesIO(response.content)) # clean header header_df = xls.iloc[:5] df = xls.iloc[5:] df.columns = ['hour_str'] + list(header_df.iloc[-1][1:]) # set index idx = [] for hour_str in df['hour_str']: # format like 'Hour 01' to 'Hour 24' ihour = int(hour_str[5:]) - 1 local_ts = datetime(date.year, date.month, date.day, ihour) idx.append(self.utcify(local_ts)) df.index = idx df.index.set_names(['timestamp'], inplace=True) # return return df
[docs] def parse_forecast(self, df): sliced = self.slice_times(df) if self.options['data'] == 'gen': try: sliced['gen_MW'] = 1000.0 * sliced['Supply Cleared (GWh) - Physical'] sliced['fuel_name'] = 'other' return sliced[['gen_MW', 'fuel_name']] except KeyError: LOGGER.warn('MISO genmix error: missing key %s in %s' % ('Supply Cleared (GWh) - Physical', sliced.columns)) return pd.DataFrame() elif self.options['data'] == 'load': try: sliced['load_MW'] = 1000.0 * (sliced['Demand Cleared (GWh) - Physical - Fixed'] + sliced['Demand Cleared (GWh) - Physical - Price Sen.']) return sliced['load_MW'] except KeyError: LOGGER.warn('MISO load error: missing key %s in %s' % ('Demand Cleared (GWh) - Physical - Fixed', sliced.columns)) return pd.DataFrame() elif self.options['data'] == 'trade': try: sliced['net_exp_MW'] = -1000.0 * sliced['Net Scheduled Imports (GWh)'] return sliced['net_exp_MW'] except KeyError: LOGGER.warn('MISO trade error: missing key %s in %s' % ('Net Scheduled Imports (GWh)', sliced.columns)) return pd.DataFrame() else: raise ValueError('Can only parse MISO forecast gen, load, or trade data, not %s' % self.options['data'])
[docs] def handle_options(self, **kwargs): super(MISOClient, self).handle_options(**kwargs) if 'market' not in self.options: self.options['market'] = self.MARKET_CHOICES.dam self.options['freq'] = self.FREQUENCY_CHOICES.hourly if 'freq' not in self.options: self.options['freq'] = self.FREQUENCY_CHOICES.hourly
[docs] def get_realtime_lmp(self, **kwargs): # get csv with latest 5 minute data url = self.base_url + '/ria/Consolidated.aspx?format=csv' response = self.request(url) # parse data into DataFrame data = BytesIO(response.content) df = pd.read_csv(data, skiprows=[1, 3], header=None) # parse timestamp from column name, add timezone ts = df.iloc[0, 13] ts = ts.replace('RefId=', '') ts = parse(ts, ignoretz=True) ts = self.utcify(ts) # MEC = Marginal Energy Component (unconstrained LMP) # MCC = Marginal Congestion Component (GSF X Marginal Value) # drop MEC and MCC prices drop_col = [2, 3, 5, 6, 8, 9, 11, 12, 13] # drop Ex/Post Ante prices drop_col += [4, 7, 10] df.drop(drop_col, axis=1, inplace=True) # drop 'header' rows df.drop([0, 1], axis=0, inplace=True) # add columns df.rename(columns={0: 'node_id', 1: 'lmp'}, inplace=True) df['timestamp'] = ts df['ba_name'] = 'MISO' df['lmp_type'] = 'TotalLMP' df['freq'] = self.FREQUENCY_CHOICES.fivemin df['market'] = self.MARKET_CHOICES.fivemin # parse lmp as int df['lmp'] = df['lmp'].astype(float) return df
[docs] def get_historical_lmp(self): # Etc/GMT+5 is actually GMT - 05:00 which is MISO time tz = pytz.timezone(self.TZ_NAME) local_start = self.options['start_at'].astimezone(tz).date() local_end = self.options['end_at'].astimezone(tz).date() # get days between start and end days = [local_start + timedelta(days=x) for x in range((local_end-local_start).days + 1)] name_dict = {self.MARKET_CHOICES.hourly: '_rt_lmp_final.csv', self.MARKET_CHOICES.hourly_prelim: '_rt_lmp_prelim.csv', self.MARKET_CHOICES.dam: '_da_expost_lmp.csv', self.MARKET_CHOICES.dam_exante: '_da_exante_lmp.csv'} pieces = [] for day in days: # get the filename extension ext = name_dict[self.options['market']] datestr = day.strftime('%Y%m%d') url = self.base_url + '/Library/Repository/Market%20Reports/' + datestr + ext response = self.request(url) if response.status_code == 404: if self.options['market'] == self.MARKET_CHOICES.hourly: # try preliminary and tell the user self.options['market'] = self.MARKET_CHOICES.hourly_prelim ext = name_dict[self.MARKET_CHOICES.hourly_prelim] url = self.base_url + '/Library/Repository/Market%20Reports/' + datestr + ext response = self.request(url) # if that didn't work, don't append to pieces if response.status_code == 404: continue # skip file information udf = pd.read_csv(BytesIO(response.content), skiprows=[0, 1, 2, 3]) # standardize format udf = pd.melt(udf, id_vars=['Node', 'Value', 'Type']) # get naive timestamps, HE_1 = hour ending 1 udf['hour'] = udf['variable'].apply(str.replace, args=('HE ', '')).astype(int) - 1 udf['timestamp'] = udf['hour'].apply(lambda x: timedelta(hours=x)) + day udf.drop(['hour', 'variable'], axis=1, inplace=True) pieces.append(udf) df = pd.concat(pieces) if df.empty: return df # utcify df.set_index('timestamp', inplace=True) df.index = self.utcify_index(df.index) # drop MCC and MLC df = df[df['Value'] == 'LMP'] # drop node type df.drop('Type', axis=1, inplace=True) # standardize names rename_dict = {'Node': 'node_id', 'Value': 'lmp_type', 'value': 'lmp', } df.rename(columns=rename_dict, inplace=True) # add columns df['freq'] = self.options['freq'] df['market'] = self.options['market'] df['ba_name'] = 'MISO' return df