Introduction
Economic time series present unique forecasting challenges due to seasonality, structural breaks, and regime changes. This guide provides practical implementations optimized for economic indicators.
Model Selection Framework
Choose models based on key data characteristics:
def select_forecasting_model(characteristics):
if characteristics['frequency'] == 'high' and characteristics['volatility'] == 'high':
return 'GARCH' # Financial market data
elif characteristics['seasonality'] == 'strong':
if characteristics['exogenous_factors'] == 'important':
return 'SARIMAX' # Seasonal with external factors
else:
return 'SARIMA' # Purely seasonal
elif characteristics['structural_breaks'] == 'present':
return 'Regime_Switching' # For series with policy changes
elif characteristics['sample_size'] == 'large':
return 'Prophet' # Large datasets with multiple seasonality
else:
return 'ARIMA' # Default choice
ARIMA for GDP Forecasting
ARIMA models work well for quarterly GDP data with gradual trends:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima
# Load quarterly GDP data
gdp_data = pd.read_csv('quarterly_gdp.csv', parse_dates=['date'], index_col='date')
# Find optimal parameters
model = auto_arima(gdp_data['value'], seasonal=False,
max_p=4, max_q=4, d=1, stepwise=True)
# Fit ARIMA model with optimal parameters
arima_model = ARIMA(gdp_data['value'], order=(2, 1, 2))
results = arima_model.fit()
# Forecast next 4 quarters
forecast = results.forecast(steps=4)
SARIMAX for CPI with Exogenous Variables
Inflation forecasting benefits from incorporating external factors:
from statsmodels.tsa.statespace.sarimax import SARIMAX
# Load CPI data and exogenous variables
cpi_data = pd.read_csv('monthly_cpi.csv', parse_dates=['date'], index_col='date')
exog_data = pd.read_csv('exogenous_factors.csv', parse_dates=['date'], index_col='date')
combined_data = cpi_data.join(exog_data, how='inner')
# Define target and exogenous variables
y_train = combined_data[:train_end]['cpi']
X_train = combined_data[:train_end][['energy_index', 'wage_index']]
# Fit SARIMAX model with yearly seasonality (s=12)
model = SARIMAX(
y_train,
exog=X_train,
order=(1, 1, 1), # p, d, q parameters
seasonal_order=(1, 1, 1, 12) # P, D, Q, s parameters
)
results = model.fit(disp=False)
Prophet for Retail Sales Forecasting
Prophet handles multiple seasonality patterns and special events:
from prophet import Prophet
import holidays
# Load retail sales data
retail_data = pd.read_csv('retail_sales.csv')
prophet_data = retail_data.rename(columns={'date': 'ds', 'sales': 'y'})
# Create holiday dataframe
us_holidays = holidays.US(years=range(2015, 2026))
holiday_list = [[name, date] for date, name in sorted(us_holidays.items())]
prophet_holidays = pd.DataFrame({
'holiday': [x[0] for x in holiday_list],
'ds': [x[1] for x in holiday_list]
})
# Add retail events like Black Friday
retail_events = pd.DataFrame({
'holiday': ['Black Friday'] * 3,
'ds': pd.to_datetime(['2023-11-24', '2024-11-29', '2025-11-28']),
'lower_window': -1,
'upper_window': 3
})
prophet_holidays = pd.concat([prophet_holidays, retail_events])
# Initialize and fit Prophet model
model = Prophet(
holidays=prophet_holidays,
yearly_seasonality=20,
weekly_seasonality=True
)
model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
model.fit(prophet_data)
GARCH Models for Volatility Forecasting
Financial market volatility exhibits clustering that GARCH models capture:
from arch import arch_model
import numpy as np
# Load daily market data and calculate returns
market_data = pd.read_csv('market_index.csv', parse_dates=['date'], index_col='date')
market_data['returns'] = 100 * market_data['close'].pct_change().dropna()
# Fit GARCH(1,1) model
model = arch_model(
market_data['returns'].dropna(),
vol='GARCH',
p=1, q=1,
mean='AR',
lags=1,
dist='studentst'
)
results = model.fit(disp='off')
# Forecast volatility for next 30 days
forecasts = results.forecast(horizon=30)
forecast_vol = np.sqrt(forecasts.variance.iloc[-1].values) * np.sqrt(252) # Annualized
Forecast Evaluation Framework
Evaluate economic forecasts with multiple metrics:
from sklearn.metrics import mean_squared_error, mean_absolute_error
def evaluate_forecast(actuals, forecasts):
y_true = np.array(actuals)
y_pred = np.array(forecasts)
# Error metrics
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mae = mean_absolute_error(y_true, y_pred)
mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
# Direction accuracy
directions_true = np.sign(np.diff(y_true))
directions_pred = np.sign(np.diff(y_pred[:-1]))
dir_accuracy = np.mean(directions_true == directions_pred) * 100
# Theil's U statistic (compares to naive forecast)
naive_mse = mean_squared_error(y_true[1:], y_true[:-1])
theil_u = np.sqrt(mean_squared_error(y_true, y_pred)) / np.sqrt(naive_mse)
return {
'rmse': rmse, 'mae': mae, 'mape': mape,
'dir_accuracy': dir_accuracy, 'theil_u': theil_u
}
Ensemble Forecasting
Combine multiple models for improved accuracy:
from sklearn.linear_model import LinearRegression
def create_forecast_ensemble(forecasts_dict, actual_values):
forecasts_df = pd.DataFrame(forecasts_dict)
train_size = len(actual_values) - len(forecasts_df)
if train_size > 0:
# Train meta-learner for optimal weights
X_train = forecasts_df.iloc[:train_size]
y_train = actual_values[:train_size]
metalearner = LinearRegression(positive=True)
metalearner.fit(X_train, y_train)
# Get normalized weights
weights = dict(zip(forecasts_dict.keys(), metalearner.coef_))
total = sum(weights.values())
weights = {k: v/total for k, v in weights.items()}
# Generate ensemble forecast
ensemble_forecast = sum(forecasts_df[model] * weight
for model, weight in weights.items())
else:
# Equal weighting if no training data
weight = 1.0 / len(forecasts_dict)
ensemble_forecast = sum(forecasts_df[model] * weight
for model in forecasts_dict.keys())
return ensemble_forecast
Handling Regime Changes
Detect structural breaks to improve forecast accuracy:
from arch.unitroot import StructBreak
def detect_and_forecast_with_regimes(time_series, forecast_horizon=12):
# Convert to numpy array if needed
if isinstance(time_series, pd.Series):
dates = time_series.index
values = time_series.values
else:
values = time_series
# Test for structural breaks
sb = StructBreak(values)
results = sb.breakpoint(max_breaks=3)
break_indices = results.breakpoints
# Use only the most recent regime for forecasting
if len(break_indices) > 0:
last_break = break_indices[-1]
current_regime = values[last_break:]
else:
current_regime = values
# Fit model to current regime only
model = ARIMA(current_regime, order=(2, 1, 2))
fit_results = model.fit()
# Generate forecast
forecast = fit_results.forecast(steps=forecast_horizon)
return forecast, break_indices
By selecting appropriate models, evaluating properly, using ensembles, and handling regime changes, you can generate more accurate predictions for economic time series data.