import pandas as pd
uc = pd.read_excel('uschange.xlsx')
uc.head()
| Date | Consumption | Income | Production | Savings | Unemployment |
|---|
| 0 | 1970-01-01 | 0.615986 | 0.972261 | -2.452700 | 4.810312 | 0.9 |
|---|
| 1 | 1970-04-01 | 0.460376 | 1.169085 | -0.551525 | 7.287992 | 0.5 |
|---|
| 2 | 1970-07-01 | 0.876791 | 1.553271 | -0.358708 | 7.289013 | 0.5 |
|---|
| 3 | 1970-10-01 | -0.274245 | -0.255272 | -2.185455 | 0.985230 | 0.7 |
|---|
| 4 | 1971-01-01 | 1.897371 | 1.987154 | 1.909734 | 3.657771 | -0.1 |
|---|
from statsmodels.formula.api import ols
ols('Consumption ~ Income', uc).fit().summary()
OLS Regression Results| Dep. Variable: | Consumption | R-squared: | 0.159 |
|---|
| Model: | OLS | Adj. R-squared: | 0.154 |
|---|
| Method: | Least Squares | F-statistic: | 34.98 |
|---|
| Date: | Sat, 01 Jul 2023 | Prob (F-statistic): | 1.58e-08 |
|---|
| Time: | 19:29:57 | Log-Likelihood: | -169.62 |
|---|
| No. Observations: | 187 | AIC: | 343.2 |
|---|
| Df Residuals: | 185 | BIC: | 349.7 |
|---|
| Df Model: | 1 | | |
|---|
| Covariance Type: | nonrobust | | |
|---|
| coef | std err | t | P>|t| | [0.025 | 0.975] |
|---|
| Intercept | 0.5451 | 0.056 | 9.789 | 0.000 | 0.435 | 0.655 |
|---|
| Income | 0.2806 | 0.047 | 5.915 | 0.000 | 0.187 | 0.374 |
|---|
| Omnibus: | 16.528 | Durbin-Watson: | 1.696 |
|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 29.145 |
|---|
| Skew: | -0.454 | Prob(JB): | 4.69e-07 |
|---|
| Kurtosis: | 4.707 | Cond. No. | 2.08 |
|---|
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
from patsy import dmatrices
y, x = dmatrices('Consumption ~ Income', uc)
from statsmodels.regression.linear_model import OLS
OLS(y, x).fit().summary()
OLS Regression Results| Dep. Variable: | Consumption | R-squared: | 0.159 |
|---|
| Model: | OLS | Adj. R-squared: | 0.154 |
|---|
| Method: | Least Squares | F-statistic: | 34.98 |
|---|
| Date: | Sat, 01 Jul 2023 | Prob (F-statistic): | 1.58e-08 |
|---|
| Time: | 19:30:13 | Log-Likelihood: | -169.62 |
|---|
| No. Observations: | 187 | AIC: | 343.2 |
|---|
| Df Residuals: | 185 | BIC: | 349.7 |
|---|
| Df Model: | 1 | | |
|---|
| Covariance Type: | nonrobust | | |
|---|
| coef | std err | t | P>|t| | [0.025 | 0.975] |
|---|
| Intercept | 0.5451 | 0.056 | 9.789 | 0.000 | 0.435 | 0.655 |
|---|
| Income | 0.2806 | 0.047 | 5.915 | 0.000 | 0.187 | 0.374 |
|---|
| Omnibus: | 16.528 | Durbin-Watson: | 1.696 |
|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 29.145 |
|---|
| Skew: | -0.454 | Prob(JB): | 4.69e-07 |
|---|
| Kurtosis: | 4.707 | Cond. No. | 2.08 |
|---|
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
from statsmodels.tsa.api import SARIMAX
m = SARIMAX(y, order=(1, 0, 0)).fit()
m.summary()
SARIMAX Results| Dep. Variable: | Consumption | No. Observations: | 187 |
|---|
| Model: | SARIMAX(1, 0, 0) | Log Likelihood | -196.683 |
|---|
| Date: | Sat, 01 Jul 2023 | AIC | 397.367 |
|---|
| Time: | 19:39:49 | BIC | 403.829 |
|---|
| Sample: | 0 | HQIC | 399.985 |
|---|
| - 187 | | |
|---|
| Covariance Type: | opg | | |
|---|
| coef | std err | z | P>|z| | [0.025 | 0.975] |
|---|
| ar.L1 | 0.7152 | 0.051 | 14.133 | 0.000 | 0.616 | 0.814 |
|---|
| sigma2 | 0.4780 | 0.040 | 11.973 | 0.000 | 0.400 | 0.556 |
|---|
| Ljung-Box (L1) (Q): | 31.48 | Jarque-Bera (JB): | 24.93 |
|---|
| Prob(Q): | 0.00 | Prob(JB): | 0.00 |
|---|
| Heteroskedasticity (H): | 0.22 | Skew: | 0.15 |
|---|
| Prob(H) (two-sided): | 0.00 | Kurtosis: | 4.76 |
|---|
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
import matplotlib.pyplot as plt
uc.Consumption.plot()
plt.plot(m.predict(start=0, end=186))
[<matplotlib.lines.Line2D at 0x2d376654ca0>]
m = SARIMAX(y, order=(0, 0, 1), trend='c').fit()
m.summary()
SARIMAX Results| Dep. Variable: | Consumption | No. Observations: | 187 |
|---|
| Model: | SARIMAX(0, 0, 1) | Log Likelihood | -177.185 |
|---|
| Date: | Sat, 01 Jul 2023 | AIC | 360.371 |
|---|
| Time: | 19:39:50 | BIC | 370.064 |
|---|
| Sample: | 0 | HQIC | 364.298 |
|---|
| - 187 | | |
|---|
| Covariance Type: | opg | | |
|---|
| coef | std err | z | P>|z| | [0.025 | 0.975] |
|---|
| intercept | 0.7463 | 0.063 | 11.844 | 0.000 | 0.623 | 0.870 |
|---|
| ma.L1 | 0.2666 | 0.069 | 3.878 | 0.000 | 0.132 | 0.401 |
|---|
| sigma2 | 0.3894 | 0.029 | 13.466 | 0.000 | 0.333 | 0.446 |
|---|
| Ljung-Box (L1) (Q): | 0.45 | Jarque-Bera (JB): | 52.67 |
|---|
| Prob(Q): | 0.50 | Prob(JB): | 0.00 |
|---|
| Heteroskedasticity (H): | 0.31 | Skew: | -0.59 |
|---|
| Prob(H) (two-sided): | 0.00 | Kurtosis: | 5.31 |
|---|
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
uc.Consumption.plot()
plt.plot(m.predict(start=0, end=186))
[<matplotlib.lines.Line2D at 0x2d3766d3e50>]
m = SARIMAX(y, x, order=(1, 0, 0)).fit()
m.summary()
SARIMAX Results| Dep. Variable: | y | No. Observations: | 187 |
|---|
| Model: | SARIMAX(1, 0, 0) | Log Likelihood | -166.064 |
|---|
| Date: | Sat, 01 Jul 2023 | AIC | 340.129 |
|---|
| Time: | 19:39:51 | BIC | 353.053 |
|---|
| Sample: | 0 | HQIC | 345.366 |
|---|
| - 187 | | |
|---|
| Covariance Type: | opg | | |
|---|
| coef | std err | z | P>|z| | [0.025 | 0.975] |
|---|
| const | 0.6093 | 0.062 | 9.898 | 0.000 | 0.489 | 0.730 |
|---|
| x1 | 0.1907 | 0.034 | 5.633 | 0.000 | 0.124 | 0.257 |
|---|
| ar.L1 | 0.2353 | 0.067 | 3.514 | 0.000 | 0.104 | 0.367 |
|---|
| sigma2 | 0.3457 | 0.028 | 12.308 | 0.000 | 0.291 | 0.401 |
|---|
| Ljung-Box (L1) (Q): | 0.72 | Jarque-Bera (JB): | 25.86 |
|---|
| Prob(Q): | 0.40 | Prob(JB): | 0.00 |
|---|
| Heteroskedasticity (H): | 0.42 | Skew: | -0.42 |
|---|
| Prob(H) (two-sided): | 0.00 | Kurtosis: | 4.62 |
|---|
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
uc.Consumption.plot()
plt.plot(m.predict(start=0, end=186, exog=x))
[<matplotlib.lines.Line2D at 0x2d3777a0220>]
m
<statsmodels.tsa.statespace.sarimax.SARIMAXResultsWrapper at 0x2d373963fd0>