feat: indicators + comments

This commit is contained in:
Giulio De Pasquale 2025-10-15 19:02:05 +01:00
parent 8fe934b971
commit 9fa0e61cbb

View File

@ -15,7 +15,7 @@ class TickerData:
@dataclass
class TimeSeriesFeatures:
class TimeSeriesTickerData:
ticker: str
target_date: datetime
current_day_data: TickerData
@ -23,63 +23,253 @@ class TimeSeriesFeatures:
@dataclass
class TradingFeatures:
class TradeFeatures:
"""
Comprehensive feature set for ML-based trading models.
This class combines raw price data, engineered time-series features,
and technical indicators across multiple categories to provide a
complete market picture for prediction models.
Feature Categories:
- Raw OHLCV data (current day)
- Lagged features (5-day lookback)
- Rolling window statistics (5d, 10d, 30d)
- VIX volatility index features
- Momentum indicators (trend direction and strength)
- Volatility indicators (price dispersion and risk)
- Trend indicators (trend presence and sustainability)
- Volume indicators (institutional participation)
- Support/Resistance levels (key price zones)
- Market regime indicators (market condition classification)
"""
ticker: str
target_date: datetime
# Current day features
current_open: float
current_high: float
current_low: float
current_close: float
current_volume: float
# ========================================================================
# CURRENT DAY FEATURES (Raw OHLCV Data)
# ========================================================================
# Basic price and volume data for the target trading day.
# Research shows raw price data often outperforms technical indicators
# in feature importance for ML models.
# Lagged price features (last 5 days)
close_lag_1: float
close_lag_2: float
close_lag_3: float
close_lag_4: float
close_lag_5: float
current_open: float # Opening price of the trading day
current_high: float # Highest price reached during the day
current_low: float # Lowest price reached during the day
current_close: float # Closing price of the trading day
current_volume: float # Total shares/contracts traded during the day
# Lagged volume features
volume_lag_1: float
volume_lag_2: float
volume_lag_3: float
volume_lag_4: float
volume_lag_5: float
# ========================================================================
# LAGGED PRICE FEATURES (Last 5 Trading Days)
# ========================================================================
# Historical closing prices from the previous 5 trading days.
# Captures short-term price memory and recent momentum patterns.
# Lag-1 (previous day) typically has highest predictive power.
# 5-day rolling window features
rolling_5d_mean: float
rolling_5d_std: float
rolling_5d_min: float
rolling_5d_max: float
rolling_5d_range: float
rolling_5d_volume_mean: float
rolling_5d_returns: float
close_lag_1: float # Closing price 1 trading day ago (t-1)
close_lag_2: float # Closing price 2 trading days ago (t-2)
close_lag_3: float # Closing price 3 trading days ago (t-3)
close_lag_4: float # Closing price 4 trading days ago (t-4)
close_lag_5: float # Closing price 5 trading days ago (t-5)
# 10-day rolling window features
rolling_10d_mean: float
rolling_10d_std: float
rolling_10d_min: float
rolling_10d_max: float
rolling_10d_range: float
rolling_10d_volume_mean: float
rolling_10d_returns: float
# ========================================================================
# LAGGED VOLUME FEATURES (Last 5 Trading Days)
# ========================================================================
# Historical volume from the previous 5 trading days.
# Volume patterns often precede price movements and indicate
# institutional participation or distribution.
# 30-day rolling window features
rolling_30d_mean: float
rolling_30d_std: float
rolling_30d_min: float
rolling_30d_max: float
rolling_30d_range: float
rolling_30d_volume_mean: float
rolling_30d_returns: float
volume_lag_1: float # Volume 1 trading day ago (t-1)
volume_lag_2: float # Volume 2 trading days ago (t-2)
volume_lag_3: float # Volume 3 trading days ago (t-3)
volume_lag_4: float # Volume 4 trading days ago (t-4)
volume_lag_5: float # Volume 5 trading days ago (t-5)
# VIX features (from separate VIX time series)
vix_current: float
vix_lag_1: float
vix_lag_5: float
vix_rolling_5d_mean: float
vix_rolling_10d_mean: float
vix_rolling_30d_mean: float
vix_rolling_30d_std: float
# ========================================================================
# 5-DAY ROLLING WINDOW FEATURES (Short-term trend)
# ========================================================================
# Statistical aggregates over the last 5 trading days (1 week).
# Captures short-term momentum, volatility, and recent price action.
rolling_5d_mean: float # Average closing price over 5 days
rolling_5d_std: float # Standard deviation (volatility measure)
rolling_5d_min: float # Minimum closing price in window
rolling_5d_max: float # Maximum closing price in window
rolling_5d_range: float # Price range (max high - min low)
rolling_5d_volume_mean: float # Average volume over 5 days
rolling_5d_returns: float # Total return over 5-day period
# ========================================================================
# 10-DAY ROLLING WINDOW FEATURES (Medium-term trend)
# ========================================================================
# Statistical aggregates over the last 10 trading days (2 weeks).
# Captures medium-term trends and smooths out short-term noise.
rolling_10d_mean: float # Average closing price over 10 days
rolling_10d_std: float # Standard deviation (volatility measure)
rolling_10d_min: float # Minimum closing price in window
rolling_10d_max: float # Maximum closing price in window
rolling_10d_range: float # Price range (max high - min low)
rolling_10d_volume_mean: float # Average volume over 10 days
rolling_10d_returns: float # Total return over 10-day period
# ========================================================================
# 30-DAY ROLLING WINDOW FEATURES (Long-term trend)
# ========================================================================
# Statistical aggregates over the last 30 trading days (~1 month).
# Captures longer-term trends and establishes baseline behavior.
rolling_30d_mean: float # Average closing price over 30 days
rolling_30d_std: float # Standard deviation (volatility measure)
rolling_30d_min: float # Minimum closing price in window
rolling_30d_max: float # Maximum closing price in window
rolling_30d_range: float # Price range (max high - min low)
rolling_30d_volume_mean: float # Average volume over 30 days
rolling_30d_returns: float # Total return over 30-day period
# ========================================================================
# VIX FEATURES (Market-wide volatility and fear gauge)
# ========================================================================
# CBOE Volatility Index (VIX) features. VIX measures market expectation
# of 30-day volatility from S&P 500 options. Often called the "fear index".
# High VIX (>30) indicates fear/uncertainty, low VIX (<15) indicates complacency.
# VIX often inversely correlates with market returns.
vix_current: float # Current VIX level
vix_lag_1: float # VIX level 1 day ago (recent change)
vix_lag_5: float # VIX level 5 days ago (weekly change)
vix_rolling_5d_mean: float # Average VIX over last 5 days (short-term fear)
vix_rolling_10d_mean: float # Average VIX over last 10 days (medium-term fear)
vix_rolling_30d_mean: float # Average VIX over last 30 days (baseline volatility)
vix_rolling_30d_std: float # VIX volatility (volatility of volatility)
# ========================================================================
# MOMENTUM INDICATORS (Trend Direction & Strength)
# ========================================================================
# Indicators that measure the rate of price change and identify
# overbought/oversold conditions. Essential for trend-following strategies.
# RSI (Relative Strength Index)
# Measures momentum on a 0-100 scale. Above 70 = overbought, below 30 = oversold.
# 14-period is standard, 20-period provides smoother, longer-term signal.
rsi_14: float # Standard 14-period RSI
rsi_20: float # Longer 20-period RSI for smoother signal
# MACD (Moving Average Convergence Divergence)
# Trend-following momentum indicator showing relationship between two EMAs.
# Particularly effective with GRU/LSTM neural networks for stock prediction.
# Crossovers and divergences signal potential trend changes.
macd_line: float # MACD line (12 EMA - 26 EMA)
macd_signal: float # Signal line (9-period EMA of MACD)
macd_histogram: float # Histogram (MACD - Signal), shows momentum strength
# Stochastic Oscillator
# Compares closing price to price range over period. 0-100 scale.
# Above 80 = overbought, below 20 = oversold. Captures short-term extremes.
# %K is fast line (14-period), %D is slow line (3-period SMA of %K).
stoch_k: float # Fast stochastic %K (14-period)
stoch_d: float # Slow stochastic %D (3-period SMA of %K)
# ========================================================================
# VOLATILITY INDICATORS (Price Dispersion & Risk)
# ========================================================================
# Indicators that measure how much price fluctuates. Critical for
# risk management and identifying potential breakout/breakdown scenarios.
# Bollinger Bands
# Volatility bands plotted at standard deviations from moving average.
# Performs exceptionally well with LSTM networks by reducing noise.
# Bands expand during high volatility, contract during low volatility.
# Price at upper band = strong uptrend, at lower band = strong downtrend.
bb_upper: float # Upper Bollinger Band (SMA + 2*std)
bb_middle: float # Middle band (20-period SMA)
bb_lower: float # Lower Bollinger Band (SMA - 2*std)
bb_width: float # Band width (upper - lower), measures volatility magnitude
bb_percent: (
float # %B indicator: (close - lower) / (upper - lower), position in bands
)
# ATR (Average True Range)
# Measures absolute volatility independent of price direction.
# Higher ATR = higher volatility, useful for stop-loss placement.
# 14-period is industry standard.
atr_14: float # 14-period Average True Range
# ========================================================================
# TREND INDICATORS (Trend Presence & Sustainability)
# ========================================================================
# Unlike momentum indicators, these measure whether a trend EXISTS
# and how strong it is, not just the direction.
# ADX (Average Directional Index)
# Measures trend strength on 0-100 scale, regardless of direction.
# ADX > 25 = strong trend worth trading, ADX < 20 = weak/no trend.
# +DI and -DI show bullish vs bearish pressure.
adx_14: float # 14-period ADX (trend strength)
di_plus: float # +DI (bullish directional indicator)
di_minus: float # -DI (bearish directional indicator)
# Parabolic SAR (Stop and Reverse)
# Provides dynamic support/resistance levels and trailing stop points.
# SAR below price = uptrend (long), SAR above price = downtrend (short).
# Dots "flip" when trend reverses.
sar: float # Current Parabolic SAR level
# ========================================================================
# VOLUME INDICATORS (Institutional Participation)
# ========================================================================
# Volume precedes price. These indicators track smart money flow
# and institutional accumulation/distribution patterns.
# OBV (On-Balance Volume)
# Cumulative volume flow indicator. Rising OBV = accumulation (bullish),
# falling OBV = distribution (bearish). OBV divergences often predict reversals.
# 60-70% of volatility contraction pattern breakouts succeed with strong volume.
obv: float # On-Balance Volume cumulative total
obv_sma_20: float # 20-day SMA of OBV (trend confirmation)
# Volume Rate of Change
# Measures percentage change in volume. Spikes indicate increased interest.
# High positive values confirm price moves, negative values suggest weakness.
volume_roc_5: float # 5-day volume rate of change (%)
# ========================================================================
# SUPPORT/RESISTANCE INDICATORS (Key Price Levels)
# ========================================================================
# Identify potential price floors (support) and ceilings (resistance)
# where price may reverse or consolidate.
# Fibonacci Retracement Levels
# Based on Fibonacci ratios, commonly used to identify retracement targets.
# Performed well in ML models for price movement prediction.
# 23.6% = shallow retracement, 38.2% = moderate, 61.8% = deep (golden ratio)
fib_236: float # 23.6% Fibonacci retracement level
fib_382: float # 38.2% Fibonacci retracement level
fib_618: float # 61.8% Fibonacci retracement level (golden ratio)
# Pivot Points
# Classic support/resistance levels calculated from previous day's OHLC.
# Widely used by floor traders and algorithmic systems.
# Price above pivot = bullish bias, below = bearish bias.
pivot_point: float # Standard pivot point (High + Low + Close) / 3
resistance_1: float # First resistance level (R1)
support_1: float # First support level (S1)
# ========================================================================
# MARKET REGIME INDICATORS (Market Condition Classification)
# ========================================================================
# Help identify what type of market environment we're in
# (trending, ranging, volatile, calm, etc.)
# CCI (Commodity Channel Index)
# Identifies cyclical trends and extreme market conditions.
# Above +100 = overbought/strong uptrend, below -100 = oversold/strong downtrend.
# Particularly good at capturing short-term price movements.
cci_20: float # 20-period Commodity Channel Index
# Williams %R
# Momentum oscillator on -100 to 0 scale (inverted from Stochastic).
# Above -20 = overbought, below -80 = oversold.
# Complements RSI with different sensitivity and faster signals.
williams_r_14: float # 14-period Williams %R