The goal of this notebook is to explore the impact of the upcoming sector expiration wave on April / May, especially when considering the ones with V1 proofs.
%load_ext autotime
%load_ext autoreload
%autoreload 2
time: 7.6 ms (started: 2021-08-16 20:02:35 +00:00)
# External dependences
import pandas as pd
import numpy as np
import plotly.express as px
from prophet import Prophet
import matplotlib.pyplot as plt
# Move path to parent folder
import sys
sys.path.insert(1, '../')
import plotly
plotly.offline.init_notebook_mode()
time: 781 ms (started: 2021-08-16 20:02:35 +00:00)
# Create a connection object from a conn string
from filecoin_metrics.connection import get_connection, get_connection_string
conn_string = get_connection_string('../config/sentinel-conn-string.txt')
connection = get_connection(conn_string)
time: 323 ms (started: 2021-08-16 20:02:35 +00:00)
QUERY = """
/* Get the last state of the sectors */
with sector_states as (
select
msi.*,
max(msi.height) over (partition by msi.sector_id, msi.miner_id) as max_height
from miner_sector_infos msi
where msi.activation_epoch > 0
and msi.expiration_epoch > msi.height /* Get only active sectors */
order by max_height
)
select
count(*) as sector_count,
sum(ss.initial_pledge::numeric) / 1e18 as initial_pledge_in_fil,
count(*) * 32 as network_power_in_gb,
date_trunc('DAY', to_timestamp(height_to_unix(ss.activation_epoch))) as activation_date,
date_trunc('DAY', to_timestamp(height_to_unix(ss.expiration_epoch))) as expiration_date
from sector_states as ss
where ss.max_height = ss.height /* get the last state of the info */
group by activation_date, expiration_date
order by activation_date, expiration_date
"""
query_df = (pd.read_sql(QUERY, connection)
.assign(network_power_in_pib=lambda df: df.network_power_in_gb / (1024 ** 2))
.assign(initial_pledge_in_thousand_fil=lambda df: df.initial_pledge_in_fil / 1000))
time: 4min 42s (started: 2021-08-16 20:02:36 +00:00)
# Maximum date for V1 sectors
UPGRADE_DATE = '2020-11-25 00:00:00'
metrics = {'is_v1': lambda x: x['activation_date'] < UPGRADE_DATE}
query_df = query_df.assign(**metrics)
time: 14.3 ms (started: 2021-08-16 20:07:18 +00:00)
def resample_and_bar_plot(df, resample_rule, time_column, value_column, title, **kwargs):
fig_df = df.resample(resample_rule, on=time_column, label='left').sum()
fig = px.bar(fig_df.reset_index(),
x=time_column,
y=value_column,
title=title,
**kwargs)
return fig
def resample_and_bar_plot_relative(df, resample_rule, time_column, value_column, title, **kwargs):
fig_df = df.resample(resample_rule, on=time_column, label='left').sum()
y = fig_df.groupby(time_column).sum()
fig_df /= y
fig = px.bar(fig_df.reset_index(),
x=time_column,
y=value_column,
title=title,
**kwargs)
return fig
time: 13.4 ms (started: 2021-08-16 20:07:18 +00:00)
df = query_df.copy()
print("Basic stats")
print("---")
print(f"Total sectors (#): {df.sector_count.sum()}")
print(f"Raw bytes power (PiB): {df.network_power_in_gb.sum() / (1024 ** 2) :.3g}")
print(f"Initial pledge (FIL): {df.initial_pledge_in_fil.sum()}")
print("---")
Basic stats --- Total sectors (#): 76983949 Raw bytes power (PiB): 2.35e+03 Initial pledge (FIL): 22906027.384519055 --- time: 14 ms (started: 2021-08-16 20:07:18 +00:00)
resample_rule = '1m'
time_column = 'expiration_date'
value_column = 'sector_count'
title = 'Count of Expiring Sectors (#)'
groups = [pd.Grouper(key='expiration_date', freq=resample_rule),
'is_v1']
fig_df = (df.groupby(groups)
.sum()
)
fig = px.bar(fig_df.reset_index(),
x=time_column,
y=value_column,
color='is_v1',
title=title)
fig.show()
time: 301 ms (started: 2021-08-16 20:07:18 +00:00)
resample_rule = '1d'
time_column = 'expiration_date'
value_column = 'sector_count'
title = 'Count of Expiring Sectors Before 15Jun2021 (log #)'
groups = [pd.Grouper(key='expiration_date', freq=resample_rule),
'is_v1']
fig_df = (df.query("expiration_date < '2021-06-15 00:00+00:00'")
.groupby(groups)
.sum()
.reset_index()
)
if fig_df.empty:
print('WARNING: DataFrame is Empty')
else:
fig = px.bar(fig_df,
x=time_column,
y=value_column,
facet_row=fig_df.is_v1,
title=title,
log_y=True)
fig.show()
WARNING: DataFrame is Empty time: 24.6 ms (started: 2021-08-16 20:07:18 +00:00)
sector_count = df.sector_count.sum()
v1_count = df.query("is_v1 == True").sector_count.sum()
v1_6mo_count = (df.query("is_v1 == True & expiration_date < '2021-06-01 00:00+00:00'")
.sector_count
.sum())
v1_12mo_count = (df.query("is_v1 == True & expiration_date < '2021-12-01 00:00+00:00' & expiration_date >= '2021-06-01 00:00+00:00'")
.sector_count
.sum())
v1_18mo_count = (df.query("is_v1 == True & expiration_date < '2022-06-01 00:00+00:00' & expiration_date >= '2021-12-01 00:00+00:00'")
.sector_count
.sum())
print("---")
print(f"Total sectors (#): {sector_count}")
print(f"V1 sectors (#): {v1_count}")
print(f"V1 sectors share (%): {v1_count / sector_count :.1%}")
print("---")
print(f"6mo V1 sectors share (%) of total sectors: {v1_6mo_count / sector_count :.2%}")
print(f"12mo V1 sectors share (%) of total sectors: {v1_12mo_count / sector_count :.2%}")
print(f"18mo V1 sectors share (%) of total sectors: {v1_18mo_count / sector_count :.2%}")
print("---")
print(f"6mo V1 sectors share (%) of total sectors: {v1_6mo_count / v1_count :.2%}")
print(f"12mo V1 sectors share (%) of total sectors: {v1_12mo_count / v1_count :.2%}")
print(f"18mo V1 sectors share (%) of total sectors: {v1_18mo_count / v1_count :.2%}")
print("---")
--- Total sectors (#): 76983949 V1 sectors (#): 1189468 V1 sectors share (%): 1.5% --- 6mo V1 sectors share (%) of total sectors: 0.00% 12mo V1 sectors share (%) of total sectors: 0.00% 18mo V1 sectors share (%) of total sectors: 1.55% --- 6mo V1 sectors share (%) of total sectors: 0.00% 12mo V1 sectors share (%) of total sectors: 0.00% 18mo V1 sectors share (%) of total sectors: 100.00% --- time: 28.7 ms (started: 2021-08-16 20:07:19 +00:00)
resample_rule = '1m'
time_column = 'expiration_date'
value_column = 'sector_count'
title = 'Upcoming Sector Expiration Count, grouped by sector version (#)'
groups = [pd.Grouper(key='expiration_date', freq=resample_rule),
'is_v1']
fig_df = (df.groupby(groups)
.sum()
.reset_index()
)
fig = px.bar(fig_df.reset_index(),
x=time_column,
y=value_column,
facet_row='is_v1',
title=title)
fig.show()
time: 88.5 ms (started: 2021-08-16 20:07:19 +00:00)
resample_rule = '1w'
time_column = 'activation_date'
value_column = 'sector_count'
title = 'Activated Sector Count, grouped by sector version (#)'
groups = [pd.Grouper(key='activation_date', freq=resample_rule),
'is_v1']
fig_df = (df.groupby(groups)
.sum()
.reset_index()
)
fig = px.bar(fig_df.reset_index(),
x=time_column,
y=value_column,
facet_row='is_v1',
title=title)
fig.show()
time: 84.5 ms (started: 2021-08-16 20:07:19 +00:00)
rename_cols = {'activation_date': 'ds',
'sector_count': 'y'}
proj_df = (df.resample("1d", on="activation_date")
.sector_count
.sum()
.reset_index()
.rename(columns=rename_cols)
.assign(ds=lambda df: df.ds.dt.tz_localize(None))
.assign(y=lambda df: df.y.cumsum() / (32 * 1024 * 1024)))
m = Prophet(changepoint_prior_scale=0.4)
m.fit(proj_df)
future = m.make_future_dataframe(periods=60)
forecast = m.predict(future)
fig = m.plot(forecast, figsize=(10, 4))
plt.title('Past and Forecasted RB Storage Power')
plt.xlabel("Time")
plt.ylabel("Total RB Storage Power (EiB)")
plt.show()
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Initial log joint probability = -34.86 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 99 1143.27 0.022935 1613.26 0.3619 1 119 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 199 1238.34 0.000966434 452.201 0.3022 1 236 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 299 1275.24 0.0059398 139.084 1 1 355 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 399 1288.35 0.00662802 301.25 1 1 465 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 499 1293.15 0.00146768 91.2477 0.7303 0.7303 576 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 599 1296.95 0.00076636 171.424 0.2215 0.2215 698 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 699 1300.09 0.000830021 48.8602 1 1 807 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 799 1302.23 0.00257937 67.8508 1 1 920 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 899 1302.98 0.00209483 366.517 0.1783 1 1039 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 999 1303.21 0.00620624 54.0371 1 1 1160 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 1014 1303.27 9.38817e-06 15.8519 2.155e-07 0.001 1233 LS failed, Hessian reset 1099 1303.36 3.53407e-05 22.4954 1 1 1337 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 1120 1303.37 9.83812e-06 24.2391 3.985e-07 0.001 1405 LS failed, Hessian reset 1199 1303.42 0.00123869 60.0257 1 1 1503 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 1299 1303.45 1.81636e-05 8.12707 0.1367 1 1632 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 1399 1303.45 2.33894e-05 8.63394 2.276 0.5979 1763 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 1409 1303.45 2.07816e-06 7.3013 1.343 0.1343 1780 Optimization terminated normally: Convergence detected: relative gradient magnitude is below tolerance
time: 2.69 s (started: 2021-08-16 20:07:19 +00:00)
rename_cols = {'activation_date': 'ds',
'sector_count': 'y'}
proj_df = (df.query("activation_date > '2020-11-01 00:00+00:00'")
.resample("1d", on="activation_date")
.sector_count
.sum()
.reset_index()
.rename(columns=rename_cols)
.assign(ds=lambda df: df.ds.dt.tz_localize(None)))
m = Prophet(changepoint_prior_scale=0.2)
m.fit(proj_df)
future = m.make_future_dataframe(periods=60)
forecast = m.predict(future)
fig = m.plot(forecast, figsize=(10, 4))
plt.title('Past and Forecasted Daily New Sectors')
plt.xlabel("Time")
plt.ylabel("Daily New Sectors (#)")
plt.show()
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Initial log joint probability = -10.0724 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 99 529.418 0.0187003 149.773 0.3614 1 122 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 148 533.193 0.00060227 37.8832 1.658e-05 0.001 232 LS failed, Hessian reset 199 535.49 0.00114505 19.5532 0.8739 0.8739 294 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 299 539.479 0.00103885 61.6846 0.234 0.234 419 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 373 539.836 1.03298e-07 16.441 1 1 518 Optimization terminated normally: Convergence detected: relative gradient magnitude is below tolerance
time: 1.54 s (started: 2021-08-16 20:07:21 +00:00)
rename_cols = {'activation_date': 'ds',
'sector_count': 'y'}
proj_df = (df.query("activation_date > '2020-11-01 00:00+00:00' & activation_date < '2021-03-01 00:00+00:00'")
.resample("1d", on="activation_date")
.sector_count
.sum()
.reset_index()
.rename(columns=rename_cols)
.assign(ds=lambda df: df.ds.dt.tz_localize(None)))
m = Prophet(changepoint_prior_scale=0.2)
m.fit(proj_df)
future = m.make_future_dataframe(periods=90)
forecast = m.predict(future)
fig = m.plot(forecast, figsize=(10, 4))
plt.title('Past and Forecasted Daily New Sectors without March data')
plt.xlabel("Time")
plt.ylabel("Daily New Sectors (#)")
plt.show()
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Initial log joint probability = -4.476 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 72 151.528 0.00255944 21.4481 7.658e-05 0.001 120 LS failed, Hessian reset 99 151.713 0.000149277 17.1013 2.201 0.2201 163 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 144 151.717 0.000179243 13.4875 8.639e-06 0.001 264 LS failed, Hessian reset 199 151.718 9.43985e-06 17.6273 1 1 342 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 226 151.733 5.13839e-05 17.9329 3.145e-06 0.001 416 LS failed, Hessian reset 252 151.733 9.53697e-09 15.6199 0.06896 0.06896 461 Optimization terminated normally: Convergence detected: absolute parameter change was below tolerance
time: 1.69 s (started: 2021-08-16 20:07:23 +00:00)
rename_cols = {'activation_date': 'ds',
0: 'y'}
proj_df = (df.query("activation_date > '2020-11-01 00:00+00:00'")
.resample("1d", on="activation_date")
.apply(lambda x: (x.initial_pledge_in_fil / x.sector_count).mean())
.reset_index()
.rename(columns=rename_cols)
.assign(ds=lambda df: df.ds.dt.tz_localize(None))
)
m = Prophet(changepoint_prior_scale=0.2)
m.fit(proj_df)
future = m.make_future_dataframe(periods=60)
forecast = m.predict(future)
fig = m.plot(forecast, figsize=(10, 4))
plt.title('Past and Forecasted Initial Pledge per Sector')
plt.xlabel("Time")
plt.ylabel("Mean Initial Pledge per Sector (FIL)")
plt.show()
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this. INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Initial log joint probability = -5.13883 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 67 332.998 0.00241765 47.1703 0.0001122 0.001 114 LS failed, Hessian reset 83 333.444 0.00137575 31.2096 8.656e-05 0.001 163 LS failed, Hessian reset 99 333.514 0.0068239 31.2128 1 1 189 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 199 335.002 0.00030881 14.688 1 1 323 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 296 336.538 0.00260695 25.3458 0.0001489 0.001 478 LS failed, Hessian reset 299 336.56 0.000602843 16.2874 0.3774 1 484 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 327 336.796 0.000639859 16.6146 4.489e-05 0.001 561 LS failed, Hessian reset 399 336.928 0.000704987 18.0159 0.3497 1 658 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 450 337.031 0.00127428 35.4843 6.93e-05 0.001 757 LS failed, Hessian reset 499 337.044 1.25545e-07 14.6231 1 1 828 Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes 501 337.044 4.96423e-08 18.3408 0.3679 1 832 Optimization terminated normally: Convergence detected: relative gradient magnitude is below tolerance
time: 1.64 s (started: 2021-08-16 20:07:25 +00:00)
m.fit
<bound method Prophet.fit of <prophet.forecaster.Prophet object at 0x7f19022cfc10>>
time: 22.7 ms (started: 2021-08-16 20:07:26 +00:00)
resample_rule = '7d'
time_column = 'activation_date'
value_column = 'sector_count'
title = 'Count of Sector Activation Date (#)'
groups = [pd.Grouper(key='activation_date', freq=resample_rule),
pd.Grouper(key='expiration_date', freq=resample_rule)]
fig_df = df.groupby(groups).sum().reset_index()
px.density_heatmap(fig_df,
x='activation_date',
y='expiration_date',
z='sector_count')
time: 110 ms (started: 2021-08-16 20:07:26 +00:00)
resample_rule = '1d'
time_column = 'activation_date'
value_column = 'sector_count'
title = 'Count of Sector Activation Date (#)'
groups = [pd.Grouper(key='activation_date', freq=resample_rule),
pd.Grouper(key='expiration_date', freq=resample_rule)]
fig_df = df.groupby(groups).sum().reset_index()
fig = px.density_contour(fig_df,
x='activation_date',
y='expiration_date',
z='sector_count',
histfunc='sum')
fig.show()
time: 372 ms (started: 2021-08-16 20:07:26 +00:00)
sector_count = df.initial_pledge_in_fil.sum()
v1_count = df.query("is_v1 == True").initial_pledge_in_fil.sum()
v1_6mo_count = (df.query("is_v1 == True & expiration_date < '2021-06-01 00:00+00:00'")
.initial_pledge_in_fil
.sum())
v1_12mo_count = (df.query("is_v1 == True & expiration_date < '2021-12-01 00:00+00:00' & expiration_date >= '2021-06-01 00:00+00:00'")
.initial_pledge_in_fil
.sum())
v1_18mo_count = (df.query("is_v1 == True & expiration_date < '2022-06-01 00:00+00:00' & expiration_date >= '2021-12-01 00:00+00:00'")
.initial_pledge_in_fil
.sum())
print("---")
print(f"Total collateral (Million FIL): {sector_count / 1e6 :.3g}")
print(f"V1 collateral (Million FIL): {v1_count / 1e6 :.3g}")
print(f"V1 sectors share (%): {v1_count / sector_count :.1%}")
print("---")
print(f"6mo V1 sectors share (%) of total collateral: {v1_6mo_count / sector_count :.2%}")
print(f"12mo V1 sectors share (%) of total collateral: {v1_12mo_count / sector_count :.2%}")
print(f"18mo V1 sectors share (%) of total collateral: {v1_18mo_count / sector_count :.2%}")
print("---")
print(f"6mo V1 sectors share (%) of total collateral: {v1_6mo_count / v1_count :.2%}")
print(f"12mo V1 sectors share (%) of total collateral: {v1_12mo_count / v1_count :.2%}")
print(f"18mo V1 sectors share (%) of total collateral: {v1_18mo_count / v1_count :.2%}")
print("---")
--- Total collateral (Million FIL): 22.9 V1 collateral (Million FIL): 0.376 V1 sectors share (%): 1.6% --- 6mo V1 sectors share (%) of total collateral: 0.00% 12mo V1 sectors share (%) of total collateral: 0.00% 18mo V1 sectors share (%) of total collateral: 1.64% --- 6mo V1 sectors share (%) of total collateral: 0.00% 12mo V1 sectors share (%) of total collateral: 0.00% 18mo V1 sectors share (%) of total collateral: 100.00% --- time: 34.2 ms (started: 2021-08-16 20:07:27 +00:00)
resample_rule = '1m'
time_column = 'expiration_date'
value_column = 'initial_pledge_in_fil'
title = 'Initial Pledge (FIL) of Expiring Sectors (#)'
groups = [pd.Grouper(key='expiration_date', freq=resample_rule),
'is_v1']
fig_df = (df.groupby(groups)
.sum()
)
fig = px.bar(fig_df.reset_index(),
x=time_column,
y=value_column,
color='is_v1',
title=title)
fig.show()
time: 171 ms (started: 2021-08-16 20:07:27 +00:00)
resample_rule = '1m'
time_column = 'expiration_date'
value_column = 'initial_pledge_in_fil'
title = 'Initial Pledge (FIL) of Expiring Sectors, grouped by Sector Version'
groups = [pd.Grouper(key='expiration_date', freq=resample_rule),
'is_v1']
fig_df = (df.groupby(groups)
.sum()
.reset_index()
)
fig = px.bar(fig_df,
x=time_column,
y=value_column,
facet_col='is_v1',
title=title)
fig.show()
time: 90.3 ms (started: 2021-08-16 20:07:27 +00:00)
resample_rule = '1m'
time_column = 'activation_date'
value_column = ['initial_pledge_in_thousand_fil']
title = 'Sum of Initial Pledge (FIL) across activation dates'
resample_and_bar_plot(df, resample_rule, time_column, value_column, title).show()
time: 71.8 ms (started: 2021-08-16 20:07:27 +00:00)
resample_rule = '1m'
time_column = 'expiration_date'
value_column = 'network_power_in_pib'
title = 'RB Network Power (PiB) of Expiring Sectors (#)'
groups = [pd.Grouper(key='expiration_date', freq=resample_rule),
'is_v1']
fig_df = (df.groupby(groups)
.sum()
)
fig = px.bar(fig_df.reset_index(),
x=time_column,
y=value_column,
color='is_v1',
title=title)
fig.show()
time: 77.8 ms (started: 2021-08-16 20:07:27 +00:00)
resample_rule = '1m'
time_column = 'expiration_date'
value_column = 'network_power_in_pib'
title = 'RB Network Power (PiB) of Expiring Sectors, grouped by Sector Version'
groups = [pd.Grouper(key='expiration_date', freq=resample_rule),
'is_v1']
fig_df = (df.groupby(groups)
.sum()
.reset_index()
)
fig = px.bar(fig_df,
x=time_column,
y=value_column,
facet_col='is_v1',
title=title)
fig.show()
time: 88.5 ms (started: 2021-08-16 20:07:27 +00:00)
resample_rule = '1m'
time_column = 'activation_date'
value_column = ['network_power_in_pib']
title = 'Sum of RB Network Power (PiB) across activation dates'
resample_and_bar_plot(df, resample_rule, time_column, value_column, title).show()
time: 71.8 ms (started: 2021-08-16 20:07:27 +00:00)