Prophet

import pandas as pd
df = pd.read_excel('manning.xlsx')
df.head()

	ds	y
0	2007-12-10	9.590761
1	2007-12-11	8.519590
2	2007-12-12	8.183677
3	2007-12-13	8.072467
4	2007-12-14	7.893572

from prophet import Prophet
m = Prophet()
m.fit(df)

future = m.make_future_dataframe(periods=365)
future.tail()

	ds
3265	2017-01-15
3266	2017-01-16
3267	2017-01-17
3268	2017-01-18
3269	2017-01-19

forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

	ds	yhat	yhat_lower	yhat_upper
0	2007-12-10	8.837750	8.178753	9.425359
1	2007-12-11	8.586321	7.972422	9.216639
2	2007-12-12	8.382230	7.757505	8.987608
3	2007-12-13	8.360149	7.735065	8.952163
4	2007-12-14	8.348204	7.691369	9.002734
...	...	...	...	...
3265	2017-01-15	8.206605	7.418459	8.965539
3266	2017-01-16	8.531590	7.774069	9.273478
3267	2017-01-17	8.318985	7.542990	9.073414
3268	2017-01-18	8.151637	7.430158	8.865084
3269	2017-01-19	8.163528	7.455394	8.959976

3270 rows × 4 columns

m.plot(forecast);

m.plot_components(forecast);

from prophet.plot import add_changepoints_to_plot
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), m, forecast)

m = Prophet(changepoints=['2014-01-01'])
forecast = m.fit(df).predict(future)

fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), m, forecast)

h1 = pd.read_excel('manning.xlsx', sheet_name='playoff')
h1['holiday'] = 'playoff'

h2 = pd.read_excel('manning.xlsx', sheet_name='superbowls')
h2['holiday'] = 'superbowl'

holidays = pd.concat([h1, h2])

m = Prophet(holidays=holidays)
forecast = m.fit(df).predict(future)

m.plot_components(forecast);

23:26:11 - cmdstanpy - INFO - Chain [1] start processing
23:26:11 - cmdstanpy - INFO - Chain [1] done processing

result = forecast.query('playoff > 0 or superbowl > 0')
result[['ds', 'yhat']].head()

	ds	yhat
34	2008-01-13	9.983693
35	2008-01-14	10.995413
361	2009-01-03	9.056871
362	2009-01-04	10.080071
730	2010-01-16	9.925495

m = Prophet(holidays=holidays)
m.add_country_holidays(country_name='US')
forecast = m.fit(df).predict(future)

23:26:44 - cmdstanpy - INFO - Chain [1] start processing
23:26:44 - cmdstanpy - INFO - Chain [1] done processing

result = forecast.query('`Christmas Day` != 0')
result[['ds', 'Christmas Day', 'yhat']]

	ds	Christmas Day	yhat
15	2007-12-25	-0.427969	8.206388
352	2008-12-25	-0.427969	7.557798
708	2009-12-25	-0.427969	8.166067
1062	2010-12-25	-0.427969	7.904445
1788	2012-12-25	-0.427969	8.540755
2152	2013-12-25	-0.427969	8.303612
2515	2014-12-25	-0.427969	7.942331
2878	2015-12-25	-0.427969	7.580366
3244	2016-12-25	-0.427969	7.299214

m = Prophet(weekly_seasonality=False)

d = pd.to_datetime(df.ds)

df['on_season'] = (d.dt.month < 2) | (d.dt.month > 8)
df['off_season'] = ~df['on_season']

m.add_seasonality(name='weekly_on_season', period=7, fourier_order=3, 
                  condition_name='on_season')

m.add_seasonality(name='weekly_off_season', period=7, fourier_order=3, 
                  condition_name='off_season')

<prophet.forecaster.Prophet at 0x18374540d30>

forecast = m.fit(df).predict(future)
fig = m.plot_components(forecast);

23:28:33 - cmdstanpy - INFO - Chain [1] start processing
23:28:33 - cmdstanpy - INFO - Chain [1] done processing

df = pd.read_excel('uschange.xlsx')

df.columns = ['ds', 'y', 'Income', 'Production', 'Savings', 'Unemployment']

m = Prophet()
m.add_regressor('Unemployment')
m.add_regressor('Income')

forecast = m.fit(df).predict(df)

23:28:57 - cmdstanpy - INFO - Chain [1] start processing
23:28:57 - cmdstanpy - INFO - Chain [1] done processing

from prophet.utilities import regressor_coefficients
regressor_coefficients(m)

	regressor	regressor_mode	center	coef_lower	coef	coef_upper
0	Unemployment	additive	0.007487	-0.859028	-0.859028	-0.859028
1	Income	additive	0.717627	0.191213	0.191213	0.191213

from statsmodels.formula.api import ols
ols('y ~ Unemployment + Income', data=df).fit().summary()

OLS Regression Results
Dep. Variable:	y	R-squared:	0.372
Model:	OLS	Adj. R-squared:	0.365
Method:	Least Squares	F-statistic:	54.40
Date:	Sun, 28 May 2023	Prob (F-statistic):	2.74e-19
Time:	23:29:47	Log-Likelihood:	-142.38
No. Observations:	187	AIC:	290.8
Df Residuals:	184	BIC:	300.4
Df Model:	2
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	0.6064	0.049	12.404	0.000	0.510	0.703
Unemployment	-0.8275	0.105	-7.890	0.000	-1.034	-0.621
Income	0.2038	0.042	4.822	0.000	0.120	0.287

Omnibus:	8.056	Durbin-Watson:	1.864
Prob(Omnibus):	0.018	Jarque-Bera (JB):	8.321
Skew:	0.401	Prob(JB):	0.0156
Kurtosis:	3.653	Cond. No.	3.85

Notes:

[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

df = pd.read_csv('log_visit.csv')
df.y.plot()

<Axes: >

df['cap'] = 8.5

m = Prophet(growth='logistic')
m.fit(df)

23:30:37 - cmdstanpy - INFO - Chain [1] start processing
23:30:37 - cmdstanpy - INFO - Chain [1] done processing

<prophet.forecaster.Prophet at 0x183773b9ed0>

future = m.make_future_dataframe(periods=1826)
future['cap'] = 8.5

fcst = m.predict(future)
m.plot(fcst);