Time Series Forecasting for Economic Data: A Practical Guide

Introduction

Economic time series present unique forecasting challenges due to seasonality, structural breaks, and regime changes. This guide provides practical implementations optimized for economic indicators.

Model Selection Framework

Choose models based on key data characteristics:

def select_forecasting_model(characteristics):
    if characteristics['frequency'] == 'high' and characteristics['volatility'] == 'high':
        return 'GARCH'  # Financial market data
    elif characteristics['seasonality'] == 'strong':
        if characteristics['exogenous_factors'] == 'important':
            return 'SARIMAX'  # Seasonal with external factors
        else:
            return 'SARIMA'  # Purely seasonal
    elif characteristics['structural_breaks'] == 'present':
        return 'Regime_Switching'  # For series with policy changes
    elif characteristics['sample_size'] == 'large':
        return 'Prophet'  # Large datasets with multiple seasonality
    else:
        return 'ARIMA'  # Default choice

ARIMA for GDP Forecasting

ARIMA models work well for quarterly GDP data with gradual trends:

import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima

# Load quarterly GDP data
gdp_data = pd.read_csv('quarterly_gdp.csv', parse_dates=['date'], index_col='date')

# Find optimal parameters
model = auto_arima(gdp_data['value'], seasonal=False, 
                  max_p=4, max_q=4, d=1, stepwise=True)

# Fit ARIMA model with optimal parameters
arima_model = ARIMA(gdp_data['value'], order=(2, 1, 2))
results = arima_model.fit()

# Forecast next 4 quarters
forecast = results.forecast(steps=4)

SARIMAX for CPI with Exogenous Variables

Inflation forecasting benefits from incorporating external factors:

from statsmodels.tsa.statespace.sarimax import SARIMAX

# Load CPI data and exogenous variables
cpi_data = pd.read_csv('monthly_cpi.csv', parse_dates=['date'], index_col='date')
exog_data = pd.read_csv('exogenous_factors.csv', parse_dates=['date'], index_col='date')
combined_data = cpi_data.join(exog_data, how='inner')

# Define target and exogenous variables
y_train = combined_data[:train_end]['cpi']
X_train = combined_data[:train_end][['energy_index', 'wage_index']]

# Fit SARIMAX model with yearly seasonality (s=12)
model = SARIMAX(
    y_train,
    exog=X_train,
    order=(1, 1, 1),              # p, d, q parameters
    seasonal_order=(1, 1, 1, 12)  # P, D, Q, s parameters
)
results = model.fit(disp=False)

Prophet for Retail Sales Forecasting

Prophet handles multiple seasonality patterns and special events:

from prophet import Prophet
import holidays

# Load retail sales data
retail_data = pd.read_csv('retail_sales.csv')
prophet_data = retail_data.rename(columns={'date': 'ds', 'sales': 'y'})

# Create holiday dataframe
us_holidays = holidays.US(years=range(2015, 2026))
holiday_list = [[name, date] for date, name in sorted(us_holidays.items())]
prophet_holidays = pd.DataFrame({
    'holiday': [x[0] for x in holiday_list],
    'ds': [x[1] for x in holiday_list]
})

# Add retail events like Black Friday
retail_events = pd.DataFrame({
    'holiday': ['Black Friday'] * 3,
    'ds': pd.to_datetime(['2023-11-24', '2024-11-29', '2025-11-28']),
    'lower_window': -1,
    'upper_window': 3
})
prophet_holidays = pd.concat([prophet_holidays, retail_events])

# Initialize and fit Prophet model
model = Prophet(
    holidays=prophet_holidays,
    yearly_seasonality=20,
    weekly_seasonality=True
)
model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
model.fit(prophet_data)

GARCH Models for Volatility Forecasting

Financial market volatility exhibits clustering that GARCH models capture:

from arch import arch_model
import numpy as np

# Load daily market data and calculate returns
market_data = pd.read_csv('market_index.csv', parse_dates=['date'], index_col='date')
market_data['returns'] = 100 * market_data['close'].pct_change().dropna()

# Fit GARCH(1,1) model
model = arch_model(
    market_data['returns'].dropna(),
    vol='GARCH',
    p=1, q=1,
    mean='AR',
    lags=1,
    dist='studentst'
)
results = model.fit(disp='off')

# Forecast volatility for next 30 days
forecasts = results.forecast(horizon=30)
forecast_vol = np.sqrt(forecasts.variance.iloc[-1].values) * np.sqrt(252)  # Annualized

Forecast Evaluation Framework

Evaluate economic forecasts with multiple metrics:

from sklearn.metrics import mean_squared_error, mean_absolute_error

def evaluate_forecast(actuals, forecasts):
    y_true = np.array(actuals)
    y_pred = np.array(forecasts)
    
    # Error metrics
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mae = mean_absolute_error(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100
    
    # Direction accuracy
    directions_true = np.sign(np.diff(y_true))
    directions_pred = np.sign(np.diff(y_pred[:-1]))
    dir_accuracy = np.mean(directions_true == directions_pred) * 100
    
    # Theil's U statistic (compares to naive forecast)
    naive_mse = mean_squared_error(y_true[1:], y_true[:-1])
    theil_u = np.sqrt(mean_squared_error(y_true, y_pred)) / np.sqrt(naive_mse)
    
    return {
        'rmse': rmse, 'mae': mae, 'mape': mape,
        'dir_accuracy': dir_accuracy, 'theil_u': theil_u
    }

Ensemble Forecasting

Combine multiple models for improved accuracy:

from sklearn.linear_model import LinearRegression

def create_forecast_ensemble(forecasts_dict, actual_values):
    forecasts_df = pd.DataFrame(forecasts_dict)
    train_size = len(actual_values) - len(forecasts_df)
    
    if train_size > 0:
        # Train meta-learner for optimal weights
        X_train = forecasts_df.iloc[:train_size]
        y_train = actual_values[:train_size]
        metalearner = LinearRegression(positive=True)
        metalearner.fit(X_train, y_train)
        
        # Get normalized weights
        weights = dict(zip(forecasts_dict.keys(), metalearner.coef_))
        total = sum(weights.values())
        weights = {k: v/total for k, v in weights.items()}
        
        # Generate ensemble forecast
        ensemble_forecast = sum(forecasts_df[model] * weight 
                              for model, weight in weights.items())
    else:
        # Equal weighting if no training data
        weight = 1.0 / len(forecasts_dict)
        ensemble_forecast = sum(forecasts_df[model] * weight 
                              for model in forecasts_dict.keys())
    
    return ensemble_forecast

Handling Regime Changes

Detect structural breaks to improve forecast accuracy:

from arch.unitroot import StructBreak

def detect_and_forecast_with_regimes(time_series, forecast_horizon=12):
    # Convert to numpy array if needed
    if isinstance(time_series, pd.Series):
        dates = time_series.index
        values = time_series.values
    else:
        values = time_series
    
    # Test for structural breaks
    sb = StructBreak(values)
    results = sb.breakpoint(max_breaks=3)
    break_indices = results.breakpoints
    
    # Use only the most recent regime for forecasting
    if len(break_indices) > 0:
        last_break = break_indices[-1]
        current_regime = values[last_break:]
    else:
        current_regime = values
    
    # Fit model to current regime only
    model = ARIMA(current_regime, order=(2, 1, 2))
    fit_results = model.fit()
    
    # Generate forecast
    forecast = fit_results.forecast(steps=forecast_horizon)
    
    return forecast, break_indices

By selecting appropriate models, evaluating properly, using ensembles, and handling regime changes, you can generate more accurate predictions for economic time series data.

Recent Articles