# Import libraries
import os

os.chdir('..')
from datetime import datetime
from statsmodels.tsa.api import VAR
from filecoin_digital_twin.modeling import gas_dynamics_VAR_prediction, gas_dynamics_VAR_invert
from filecoin_digital_twin.retrieve_data import pull_data, pull_message_count_data,process_message_count_data, compute_difference_vector
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
from pandas import DataFrame

CONN_STRING_PATH = 'config/sentinel-conn-string.txt'


start_date = datetime(2021, 7, 1)
end_date = datetime(2021, 10, 15)
truncation_interval = 'DAY'



## compute_gas_dynamics_vector - obtain macro variables for system identification 
macro_data = pull_data(truncation_interval=truncation_interval,
                            start_date=start_date,
                            end_date=end_date)

#Pull and process message_count_data
message_count_data = pull_message_count_data(truncation_interval=truncation_interval,
                            start_date=start_date,
                            end_date=end_date,
                            CONN_STRING_PATH=CONN_STRING_PATH)

message_count_data = process_message_count_data(message_count_data, truncation_interval, start_date, end_date)

#Pivot and fill null values with 0 for the vector
vector = message_count_data.pivot("datetime", "Actor-Method", "percentage_gas_used").fillna(0)


#Collaspe unknown columns
unknown_cols = [x for x in vector.columns if x.startswith("<unknown>-")]
vector["unknown"] = vector[unknown_cols].sum(axis=1)
vector = vector.drop(columns=unknown_cols)

# #Join macro data into the vector
vector = vector.join(macro_data)

# subset to prediction columns
prediction_columns = ['fil/5/account-0',
                      'fil/5/storagemarket-2','fil/5/storagemarket-4','fil/5/storageminer-11',
                      'fil/5/storageminer-16','fil/5/storageminer-25','fil/5/storageminer-26',
                      'fil/5/storageminer-5','fil/5/storageminer-6','fil/5/storageminer-7']
    
vector = vector[prediction_columns]

#Get rid of rows with null values
vector = vector[~pd.isnull(vector).any(axis=1)]


# create an empty list to hold our predictions
predictions = []
# subset actual values for comparison
actual = vector.iloc[60:60+48]
# iterate through each step training and forecasting the next day. 
for i in range(0,47):
    training = vector.iloc[0:60+i]
    
    #Compute the difference vector
    diff_vector = compute_difference_vector(training)

    pred = gas_dynamics_VAR_prediction(diff_vector,'DAY', lag=5,steps=1)
    #Pull out prior state
    prior_state = training.iloc[-1]

    #Invert the prediction
    pred = gas_dynamics_VAR_invert(pred, prior_state, prediction_columns)
    predictions.append(pred)


# create a dataframe from the predictions
one_step_predictions_df_all = pd.concat(predictions).reset_index()
del one_step_predictions_df_all['index']

actual.reset_index(inplace=True)
del actual['datetime']


# calculate the RMSE
RMSE = sm.tools.eval_measures.rmse(actual,one_step_predictions_df_all,axis=1)
RMSE = np.round(RMSE,decimals=4)
RMSE_dict = dict(zip(one_step_predictions_df_all.columns,RMSE))


#Plot the results
for x in one_step_predictions_df_all.columns:
    actual[x].plot(kind='line')
    one_step_predictions_df_all[x].plot(kind='line')
    title_text = str(x) + ' RMSE: {}'.format(RMSE_dict[x])
    plt.title(title_text)
    plt.legend(["In-sample", "Prediction"])
    plt.show()

Gas Usage Systems Identification Model Backtesting¶

What is Systems Identification?¶

Our model:¶

What is Backtesting?¶

Pros¶

Cons¶

Conclusion¶

Future enhancements ¶

Gas Usage Systems Identification Model Backtesting¶

What is Systems Identification?¶

Our model:¶

What is Backtesting?¶

Pros¶

Cons¶

Conclusion¶

Future enhancements¶

Future enhancements ¶