Prophet :: 시계열 분석 - mindscale
Skip to content

Prophet

import pandas as pd
df = pd.read_excel('manning.xlsx')
df.head()
ds y
0 2007-12-10 9.590761
1 2007-12-11 8.519590
2 2007-12-12 8.183677
3 2007-12-13 8.072467
4 2007-12-14 7.893572
from prophet import Prophet
m = Prophet()
m.fit(df)
future = m.make_future_dataframe(periods=365)
future.tail()
ds
3265 2017-01-15
3266 2017-01-16
3267 2017-01-17
3268 2017-01-18
3269 2017-01-19
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
ds yhat yhat_lower yhat_upper
0 2007-12-10 8.837750 8.178753 9.425359
1 2007-12-11 8.586321 7.972422 9.216639
2 2007-12-12 8.382230 7.757505 8.987608
3 2007-12-13 8.360149 7.735065 8.952163
4 2007-12-14 8.348204 7.691369 9.002734
... ... ... ... ...
3265 2017-01-15 8.206605 7.418459 8.965539
3266 2017-01-16 8.531590 7.774069 9.273478
3267 2017-01-17 8.318985 7.542990 9.073414
3268 2017-01-18 8.151637 7.430158 8.865084
3269 2017-01-19 8.163528 7.455394 8.959976

3270 rows × 4 columns

m.plot(forecast);
m.plot_components(forecast);
from prophet.plot import add_changepoints_to_plot
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), m, forecast)
m = Prophet(changepoints=['2014-01-01'])
forecast = m.fit(df).predict(future)
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), m, forecast)
h1 = pd.read_excel('manning.xlsx', sheet_name='playoff')
h1['holiday'] = 'playoff'

h2 = pd.read_excel('manning.xlsx', sheet_name='superbowls')
h2['holiday'] = 'superbowl'

holidays = pd.concat([h1, h2])
m = Prophet(holidays=holidays)
forecast = m.fit(df).predict(future)

m.plot_components(forecast);
23:26:11 - cmdstanpy - INFO - Chain [1] start processing
23:26:11 - cmdstanpy - INFO - Chain [1] done processing
result = forecast.query('playoff > 0 or superbowl > 0')
result[['ds', 'yhat']].head()
ds yhat
34 2008-01-13 9.983693
35 2008-01-14 10.995413
361 2009-01-03 9.056871
362 2009-01-04 10.080071
730 2010-01-16 9.925495
m = Prophet(holidays=holidays)
m.add_country_holidays(country_name='US')
forecast = m.fit(df).predict(future)
23:26:44 - cmdstanpy - INFO - Chain [1] start processing
23:26:44 - cmdstanpy - INFO - Chain [1] done processing
result = forecast.query('`Christmas Day` != 0')
result[['ds', 'Christmas Day', 'yhat']]
ds Christmas Day yhat
15 2007-12-25 -0.427969 8.206388
352 2008-12-25 -0.427969 7.557798
708 2009-12-25 -0.427969 8.166067
1062 2010-12-25 -0.427969 7.904445
1788 2012-12-25 -0.427969 8.540755
2152 2013-12-25 -0.427969 8.303612
2515 2014-12-25 -0.427969 7.942331
2878 2015-12-25 -0.427969 7.580366
3244 2016-12-25 -0.427969 7.299214
m = Prophet(weekly_seasonality=False)
d = pd.to_datetime(df.ds)

df['on_season'] = (d.dt.month < 2) | (d.dt.month > 8)
df['off_season'] = ~df['on_season']
m.add_seasonality(name='weekly_on_season', period=7, fourier_order=3, 
                  condition_name='on_season')

m.add_seasonality(name='weekly_off_season', period=7, fourier_order=3, 
                  condition_name='off_season')
<prophet.forecaster.Prophet at 0x18374540d30>
forecast = m.fit(df).predict(future)
fig = m.plot_components(forecast);
23:28:33 - cmdstanpy - INFO - Chain [1] start processing
23:28:33 - cmdstanpy - INFO - Chain [1] done processing
df = pd.read_excel('uschange.xlsx')

df.columns = ['ds', 'y', 'Income', 'Production', 'Savings', 'Unemployment']
m = Prophet()
m.add_regressor('Unemployment')
m.add_regressor('Income')

forecast = m.fit(df).predict(df)
23:28:57 - cmdstanpy - INFO - Chain [1] start processing
23:28:57 - cmdstanpy - INFO - Chain [1] done processing
from prophet.utilities import regressor_coefficients
regressor_coefficients(m)
regressor regressor_mode center coef_lower coef coef_upper
0 Unemployment additive 0.007487 -0.859028 -0.859028 -0.859028
1 Income additive 0.717627 0.191213 0.191213 0.191213
from statsmodels.formula.api import ols
ols('y ~ Unemployment + Income', data=df).fit().summary()
OLS Regression Results
Dep. Variable: y R-squared: 0.372
Model: OLS Adj. R-squared: 0.365
Method: Least Squares F-statistic: 54.40
Date: Sun, 28 May 2023 Prob (F-statistic): 2.74e-19
Time: 23:29:47 Log-Likelihood: -142.38
No. Observations: 187 AIC: 290.8
Df Residuals: 184 BIC: 300.4
Df Model: 2
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
Intercept 0.6064 0.049 12.404 0.000 0.510 0.703
Unemployment -0.8275 0.105 -7.890 0.000 -1.034 -0.621
Income 0.2038 0.042 4.822 0.000 0.120 0.287
Omnibus: 8.056 Durbin-Watson: 1.864
Prob(Omnibus): 0.018 Jarque-Bera (JB): 8.321
Skew: 0.401 Prob(JB): 0.0156
Kurtosis: 3.653 Cond. No. 3.85




Notes:

[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df = pd.read_csv('log_visit.csv')
df.y.plot()
<Axes: >
df['cap'] = 8.5
m = Prophet(growth='logistic')
m.fit(df)
23:30:37 - cmdstanpy - INFO - Chain [1] start processing
23:30:37 - cmdstanpy - INFO - Chain [1] done processing
<prophet.forecaster.Prophet at 0x183773b9ed0>
future = m.make_future_dataframe(periods=1826)
future['cap'] = 8.5
fcst = m.predict(future)
m.plot(fcst);