logo

[time-series] Prophet

import pandas as pd
df = pd.read_excel('manning.xlsx')
df.head()
dsy
02007-12-109.590761
12007-12-118.519590
22007-12-128.183677
32007-12-138.072467
42007-12-147.893572
from prophet import Prophet
m = Prophet()
m.fit(df)
future = m.make_future_dataframe(periods=365)
future.tail()
ds
32652017-01-15
32662017-01-16
32672017-01-17
32682017-01-18
32692017-01-19
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
dsyhatyhat_loweryhat_upper
02007-12-108.8377508.1787539.425359
12007-12-118.5863217.9724229.216639
22007-12-128.3822307.7575058.987608
32007-12-138.3601497.7350658.952163
42007-12-148.3482047.6913699.002734
...............
32652017-01-158.2066057.4184598.965539
32662017-01-168.5315907.7740699.273478
32672017-01-178.3189857.5429909.073414
32682017-01-188.1516377.4301588.865084
32692017-01-198.1635287.4553948.959976

3270 rows × 4 columns

m.plot(forecast);
m.plot_components(forecast);
from prophet.plot import add_changepoints_to_plot
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), m, forecast)
m = Prophet(changepoints=['2014-01-01'])
forecast = m.fit(df).predict(future)
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), m, forecast)
h1 = pd.read_excel('manning.xlsx', sheet_name='playoff')
h1['holiday'] = 'playoff'

h2 = pd.read_excel('manning.xlsx', sheet_name='superbowls')
h2['holiday'] = 'superbowl'

holidays = pd.concat([h1, h2])
m = Prophet(holidays=holidays)
forecast = m.fit(df).predict(future)

m.plot_components(forecast);
23:26:11 - cmdstanpy - INFO - Chain [1] start processing
23:26:11 - cmdstanpy - INFO - Chain [1] done processing

result = forecast.query('playoff > 0 or superbowl > 0')
result[['ds', 'yhat']].head()
dsyhat
342008-01-139.983693
352008-01-1410.995413
3612009-01-039.056871
3622009-01-0410.080071
7302010-01-169.925495
m = Prophet(holidays=holidays)
m.add_country_holidays(country_name='US')
forecast = m.fit(df).predict(future)
23:26:44 - cmdstanpy - INFO - Chain [1] start processing
23:26:44 - cmdstanpy - INFO - Chain [1] done processing

result = forecast.query('`Christmas Day` != 0')
result[['ds', 'Christmas Day', 'yhat']]
dsChristmas Dayyhat
152007-12-25-0.4279698.206388
3522008-12-25-0.4279697.557798
7082009-12-25-0.4279698.166067
10622010-12-25-0.4279697.904445
17882012-12-25-0.4279698.540755
21522013-12-25-0.4279698.303612
25152014-12-25-0.4279697.942331
28782015-12-25-0.4279697.580366
32442016-12-25-0.4279697.299214
m = Prophet(weekly_seasonality=False)
d = pd.to_datetime(df.ds)

df['on_season'] = (d.dt.month < 2) | (d.dt.month > 8)
df['off_season'] = ~df['on_season']
m.add_seasonality(name='weekly_on_season', period=7, fourier_order=3,
                  condition_name='on_season')

m.add_seasonality(name='weekly_off_season', period=7, fourier_order=3,
                  condition_name='off_season')

<prophet.forecaster.Prophet at 0x18374540d30>
forecast = m.fit(df).predict(future)
fig = m.plot_components(forecast);
23:28:33 - cmdstanpy - INFO - Chain [1] start processing
23:28:33 - cmdstanpy - INFO - Chain [1] done processing

df = pd.read_excel('uschange.xlsx')

df.columns = ['ds', 'y', 'Income', 'Production', 'Savings', 'Unemployment']
m = Prophet()
m.add_regressor('Unemployment')
m.add_regressor('Income')

forecast = m.fit(df).predict(df)
23:28:57 - cmdstanpy - INFO - Chain [1] start processing
23:28:57 - cmdstanpy - INFO - Chain [1] done processing

from prophet.utilities import regressor_coefficients
regressor_coefficients(m)
regressorregressor_modecentercoef_lowercoefcoef_upper
0Unemploymentadditive0.007487-0.859028-0.859028-0.859028
1Incomeadditive0.7176270.1912130.1912130.191213
from statsmodels.formula.api import ols
ols('y ~ Unemployment + Income', data=df).fit().summary()
OLS Regression Results
Dep. Variable:y R-squared: 0.372
Model:OLS Adj. R-squared: 0.365
Method:Least Squares F-statistic: 54.40
Date:Sun, 28 May 2023 Prob (F-statistic):2.74e-19
Time:23:29:47 Log-Likelihood: -142.38
No. Observations: 187 AIC: 290.8
Df Residuals: 184 BIC: 300.4
Df Model: 2
Covariance Type:nonrobust
coefstd errtP>|t|[0.0250.975]
Intercept 0.6064 0.049 12.404 0.000 0.510 0.703
Unemployment -0.8275 0.105 -7.890 0.000 -1.034 -0.621
Income 0.2038 0.042 4.822 0.000 0.120 0.287
Omnibus: 8.056 Durbin-Watson: 1.864
Prob(Omnibus): 0.018 Jarque-Bera (JB): 8.321
Skew: 0.401 Prob(JB): 0.0156
Kurtosis: 3.653 Cond. No. 3.85



Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

df = pd.read_csv('log_visit.csv')
df.y.plot()
<Axes: >
df['cap'] = 8.5
m = Prophet(growth='logistic')
m.fit(df)
23:30:37 - cmdstanpy - INFO - Chain [1] start processing
23:30:37 - cmdstanpy - INFO - Chain [1] done processing

<prophet.forecaster.Prophet at 0x183773b9ed0>
future = m.make_future_dataframe(periods=1826)
future['cap'] = 8.5
fcst = m.predict(future)
m.plot(fcst);
Previous
생존 분석