import os
os.chdir('..')


from filecoin_digital_twin.retrieve_data import pull_storage_data
import chow_test 
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# set start and end dates for data pull
start_date = datetime(2021,7,1)
end_date = datetime(2021,12,20)
truncation_interval = "DAY"


# load connection string
CONN_STRING_PATH = 'config/sentinel-conn-string.txt'
from sqlalchemy import create_engine



with open(CONN_STRING_PATH, 'r') as fid:
    conn_string = fid.read()
    
# create database connection.
connection = create_engine(conn_string, pool_recycle=3600).connect()


# pull storage data
storage = pull_storage_data(truncation_interval,start_date,end_date)
# reset index from timestamp
storage.reset_index(inplace=True)


QUERY = """
SELECT 
date_trunc('{}', 
to_timestamp(height_to_unix(d.height))) AS datetime,
SUM(gas_used) as sum_gas_used_daily
FROM derived_gas_outputs d
WHERE
to_timestamp(height_to_unix(height)) BETWEEN '{}' AND '{}'
GROUP BY
datetime
ORDER BY
datetime asc
""".format(truncation_interval,start_date,end_date)
derived_gas_outputs_daily = (pd.read_sql(QUERY, connection))

# remove first and last days
derived_gas_outputs_daily = derived_gas_outputs_daily[1:-1]

# reset index from timestamp
derived_gas_outputs_daily.reset_index(inplace=True)


# storage data
chow_test.chow_test(X_series=storage['index'],
                    y_series=storage.power_rb,
                    last_index=85,first_index=86,significance=0.05)

Reject the null hypothesis of equality of regression coefficients in the two periods.
Chow Statistic: 769.6132150177385, P_value: 1.1102230246251565e-16

(769.6132150177385, 1.1102230246251565e-16)


storage.plot(x='datetime',y='power_rb',title='Storage power_rb with structural break')
plt.axvline(x='2021-09-24', label='Regulation Shock',c='r')
plt.legend()

<matplotlib.legend.Legend at 0x7f8ad0664100>


chow_test.chow_test(X_series=derived_gas_outputs_daily['index'],
                    y_series=derived_gas_outputs_daily.sum_gas_used_daily,
                    last_index=84,first_index=85,significance=0.05)

Reject the null hypothesis of equality of regression coefficients in the two periods.
Chow Statistic: 42.88689014843319, P_value: 9.992007221626409e-16

(42.88689014843319, 9.992007221626409e-16)


derived_gas_outputs_daily.plot(x='datetime',y='sum_gas_used_daily',title='Sum of daily gas used with structural break')
plt.axvline(x='2021-09-24', label='Regulation Shock',c='r')
plt.legend()

<matplotlib.legend.Legend at 0x7f8ad0df21f0>

Storage onboarding structural break testing¶

Pull data¶

Chow test¶

Conclusion¶