1def retrieve_time_series(api, series_ID):
2 """
3 Return the time series dataframe, based on API and unique Series ID
4 api: API that we're connected to
5 series_ID: string. Name of the series that we want to pull from the EIA API
6 """
7 #Retrieve Data By Series ID
8 series_search = api.data_by_series(series=series_ID)
9 ##Create a pandas dataframe from the retrieved time series
10 df = pd.DataFrame(series_search)
11 return df
12
13###Execute in the main block
14#Create EIA API using your specific API key
15api_key = "YOR API KEY HERE"
16api = eia.API(api_key)
17
18#Pull the electricity price data
19series_ID='ELEC.PRICE.TX-ALL.M'
20electricity_df=retrieve_time_series(api, series_ID)
21electricity_df.reset_index(level=0, inplace=True)
22#Rename the columns for easer analysis
23electricity_df.rename(columns={'index':'Date',
24 electricity_df.columns[1]:'Electricity_Price'},
25 inplace=True)
1def calculate_model_accuracy_metrics(actual, predicted):
2 """
3 Output model accuracy metrics, comparing predicted values
4 to actual values.
5 Arguments:
6 actual: list. Time series of actual values.
7 predicted: list. Time series of predicted values
8 Outputs:
9 Forecast bias metrics, mean absolute error, mean squared error,
10 and root mean squared error in the console
11 """
12 #Calculate forecast bias
13 forecast_errors = [actual[i]-predicted[i] for i in range(len(actual))]
14 bias = sum(forecast_errors) * 1.0/len(actual)
15 print('Bias: %f' % bias)
16 #Calculate mean absolute error
17 mae = mean_absolute_error(actual, predicted)
18 print('MAE: %f' % mae)
19 #Calculate mean squared error and root mean squared error
20 mse = mean_squared_error(actual, predicted)
21 print('MSE: %f' % mse)
22 rmse = sqrt(mse)
23 print('RMSE: %f' % rmse)
24#Execute in the main block
25#Un-difference the data
26for i in range(1,len(master_df.index)-1):
27 master_df.at[i,'Electricity_Price_Transformed']= master_df.at[i-1,'Electricity_Price_Transformed']+master_df.at[i,'Electricity_Price_Transformed_Differenced_PostProcess']
28
29#Back-transform the data
30master_df.loc[:,'Predicted_Electricity_Price']=np.exp(master_df['Electricity_Price_Transformed'])
31
32#Compare the forecasted data to the real data
33print(master_df[master_df['Predicted']==1][['Date','Electricity_Price', 'Predicted_Electricity_Price']])
34#Evaluate the accuracy of the results
35calculate_model_accuracy_metrics(list(master_df[master_df['Predicted']==1]['Electricity_Price']),
36 list(master_df[master_df['Predicted']==1 ['Predicted_Electricity_Price']))
1#Pull in natural gas time series data
2series_ID='NG.N3035TX3.M'
3nat_gas_df=retrieve_time_series(api, series_ID)
4nat_gas_df.reset_index(level=0, inplace=True)
5#Rename the columns
6nat_gas_df.rename(columns={'index':'Date',
7 nat_gas_df.columns[1]:'Nat_Gas_Price_MCF'},
8 inplace=True)
9#Convert the Date column into a date object
10nat_gas_df['Date']=pd.to_datetime(nat_gas_df['Date'])
11#Set Date as a Pandas DatetimeIndex
12nat_gas_df.index=pd.DatetimeIndex(nat_gas_df['Date'])
13#Decompose the time series into parts
14decompose_time_series(nat_gas_df['Nat_Gas_Price_MCF'])
15
16#Merge the two time series together based on Date Index
17master_df=pd.merge(electricity_df['Electricity_Price'], nat_gas_df['Nat_Gas_Price_MCF'],
18 left_index=True, right_index=True)
19master_df.reset_index(level=0, inplace=True)
20
21#Plot the two variables in the same plot
22plt.plot(master_df['Date'],
23 master_df['Electricity_Price'], label="Electricity_Price")
24plt.plot(master_df['Date'],
25 master_df['Nat_Gas_Price_MCF'], label="Nat_Gas_Price")
26# Place a legend to the right of this smaller subplot.
27plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
28plt.title('Natural Gas Price vs. TX Electricity Price over Time')
29plt.show()
1#Conver the dataframe to a numpy array
2master_array=np.array(master_df[['Electricity_Price_Transformed_Differenced',
3 'Nat_Gas_Price_MCF_Transformed_Differenced']].dropna())
4
5#Generate a training and test set for building the model: 95/5 split
6training_set = master_array[:int(0.95*(len(master_array)))]
7test_set = master_array[int(0.95*(len(master_array))):]
8
9#Fit to a VAR model
10model = VAR(endog=training_set)
11model_fit = model.fit()
12#Print a summary of the model results
13model_fit.summary()
1def decompose_time_series(series):
2 """
3 Decompose a time series and plot it in the console
4 Arguments:
5 series: series. Time series that we want to decompose
6 Outputs:
7 Decomposition plot in the console
8 """
9 result = seasonal_decompose(series, model='additive')
10 result.plot()
11 pyplot.show()
12#Execute in the main block
13#Convert the Date column into a date object
14electricity_df['Date']=pd.to_datetime(electricity_df['Date'])
15#Set Date as a Pandas DatetimeIndex
16electricity_df.index=pd.DatetimeIndex(electricity_df['Date'])
17#Decompose the time series into parts
18decompose_time_series(electricity_df['Electricity_Price'])
1#Transform the columns using natural log
2master_df['Electricity_Price_Transformed']=np.log(master_df['Electricity_Price'])
3master_df['Nat_Gas_Price_MCF_Transformed']=np.log(master_df['Nat_Gas_Price_MCF'])
4
5#Difference the data by 1 month
6n=1
7master_df['Electricity_Price_Transformed_Differenced'] = master_df['Electricity_Price_Transformed'] - master_df['Electricity_Price_Transformed'].shift(n)
8master_df['Nat_Gas_Price_MCF_Transformed_Differenced'] = master_df['Nat_Gas_Price_MCF_Transformed'] - master_df['Nat_Gas_Price_MCF_Transformed'].shift(n)