diff --git a/paperone/data.py b/paperone/data.py index 3d552f4..a1134ec 100644 --- a/paperone/data.py +++ b/paperone/data.py @@ -15,7 +15,7 @@ class TickerData: @dataclass -class TimeSeriesFeatures: +class TimeSeriesTickerData: ticker: str target_date: datetime current_day_data: TickerData @@ -23,63 +23,253 @@ class TimeSeriesFeatures: @dataclass -class TradingFeatures: +class TradeFeatures: + """ + Comprehensive feature set for ML-based trading models. + + This class combines raw price data, engineered time-series features, + and technical indicators across multiple categories to provide a + complete market picture for prediction models. + + Feature Categories: + - Raw OHLCV data (current day) + - Lagged features (5-day lookback) + - Rolling window statistics (5d, 10d, 30d) + - VIX volatility index features + - Momentum indicators (trend direction and strength) + - Volatility indicators (price dispersion and risk) + - Trend indicators (trend presence and sustainability) + - Volume indicators (institutional participation) + - Support/Resistance levels (key price zones) + - Market regime indicators (market condition classification) + """ + ticker: str target_date: datetime - # Current day features - current_open: float - current_high: float - current_low: float - current_close: float - current_volume: float + # ======================================================================== + # CURRENT DAY FEATURES (Raw OHLCV Data) + # ======================================================================== + # Basic price and volume data for the target trading day. + # Research shows raw price data often outperforms technical indicators + # in feature importance for ML models. - # Lagged price features (last 5 days) - close_lag_1: float - close_lag_2: float - close_lag_3: float - close_lag_4: float - close_lag_5: float + current_open: float # Opening price of the trading day + current_high: float # Highest price reached during the day + current_low: float # Lowest price reached during the day + current_close: float # Closing price of the trading day + current_volume: float # Total shares/contracts traded during the day - # Lagged volume features - volume_lag_1: float - volume_lag_2: float - volume_lag_3: float - volume_lag_4: float - volume_lag_5: float + # ======================================================================== + # LAGGED PRICE FEATURES (Last 5 Trading Days) + # ======================================================================== + # Historical closing prices from the previous 5 trading days. + # Captures short-term price memory and recent momentum patterns. + # Lag-1 (previous day) typically has highest predictive power. - # 5-day rolling window features - rolling_5d_mean: float - rolling_5d_std: float - rolling_5d_min: float - rolling_5d_max: float - rolling_5d_range: float - rolling_5d_volume_mean: float - rolling_5d_returns: float + close_lag_1: float # Closing price 1 trading day ago (t-1) + close_lag_2: float # Closing price 2 trading days ago (t-2) + close_lag_3: float # Closing price 3 trading days ago (t-3) + close_lag_4: float # Closing price 4 trading days ago (t-4) + close_lag_5: float # Closing price 5 trading days ago (t-5) - # 10-day rolling window features - rolling_10d_mean: float - rolling_10d_std: float - rolling_10d_min: float - rolling_10d_max: float - rolling_10d_range: float - rolling_10d_volume_mean: float - rolling_10d_returns: float + # ======================================================================== + # LAGGED VOLUME FEATURES (Last 5 Trading Days) + # ======================================================================== + # Historical volume from the previous 5 trading days. + # Volume patterns often precede price movements and indicate + # institutional participation or distribution. - # 30-day rolling window features - rolling_30d_mean: float - rolling_30d_std: float - rolling_30d_min: float - rolling_30d_max: float - rolling_30d_range: float - rolling_30d_volume_mean: float - rolling_30d_returns: float + volume_lag_1: float # Volume 1 trading day ago (t-1) + volume_lag_2: float # Volume 2 trading days ago (t-2) + volume_lag_3: float # Volume 3 trading days ago (t-3) + volume_lag_4: float # Volume 4 trading days ago (t-4) + volume_lag_5: float # Volume 5 trading days ago (t-5) - # VIX features (from separate VIX time series) - vix_current: float - vix_lag_1: float - vix_lag_5: float - vix_rolling_5d_mean: float - vix_rolling_10d_mean: float - vix_rolling_30d_mean: float - vix_rolling_30d_std: float + # ======================================================================== + # 5-DAY ROLLING WINDOW FEATURES (Short-term trend) + # ======================================================================== + # Statistical aggregates over the last 5 trading days (1 week). + # Captures short-term momentum, volatility, and recent price action. + + rolling_5d_mean: float # Average closing price over 5 days + rolling_5d_std: float # Standard deviation (volatility measure) + rolling_5d_min: float # Minimum closing price in window + rolling_5d_max: float # Maximum closing price in window + rolling_5d_range: float # Price range (max high - min low) + rolling_5d_volume_mean: float # Average volume over 5 days + rolling_5d_returns: float # Total return over 5-day period + + # ======================================================================== + # 10-DAY ROLLING WINDOW FEATURES (Medium-term trend) + # ======================================================================== + # Statistical aggregates over the last 10 trading days (2 weeks). + # Captures medium-term trends and smooths out short-term noise. + + rolling_10d_mean: float # Average closing price over 10 days + rolling_10d_std: float # Standard deviation (volatility measure) + rolling_10d_min: float # Minimum closing price in window + rolling_10d_max: float # Maximum closing price in window + rolling_10d_range: float # Price range (max high - min low) + rolling_10d_volume_mean: float # Average volume over 10 days + rolling_10d_returns: float # Total return over 10-day period + + # ======================================================================== + # 30-DAY ROLLING WINDOW FEATURES (Long-term trend) + # ======================================================================== + # Statistical aggregates over the last 30 trading days (~1 month). + # Captures longer-term trends and establishes baseline behavior. + + rolling_30d_mean: float # Average closing price over 30 days + rolling_30d_std: float # Standard deviation (volatility measure) + rolling_30d_min: float # Minimum closing price in window + rolling_30d_max: float # Maximum closing price in window + rolling_30d_range: float # Price range (max high - min low) + rolling_30d_volume_mean: float # Average volume over 30 days + rolling_30d_returns: float # Total return over 30-day period + + # ======================================================================== + # VIX FEATURES (Market-wide volatility and fear gauge) + # ======================================================================== + # CBOE Volatility Index (VIX) features. VIX measures market expectation + # of 30-day volatility from S&P 500 options. Often called the "fear index". + # High VIX (>30) indicates fear/uncertainty, low VIX (<15) indicates complacency. + # VIX often inversely correlates with market returns. + + vix_current: float # Current VIX level + vix_lag_1: float # VIX level 1 day ago (recent change) + vix_lag_5: float # VIX level 5 days ago (weekly change) + vix_rolling_5d_mean: float # Average VIX over last 5 days (short-term fear) + vix_rolling_10d_mean: float # Average VIX over last 10 days (medium-term fear) + vix_rolling_30d_mean: float # Average VIX over last 30 days (baseline volatility) + vix_rolling_30d_std: float # VIX volatility (volatility of volatility) + + # ======================================================================== + # MOMENTUM INDICATORS (Trend Direction & Strength) + # ======================================================================== + # Indicators that measure the rate of price change and identify + # overbought/oversold conditions. Essential for trend-following strategies. + + # RSI (Relative Strength Index) + # Measures momentum on a 0-100 scale. Above 70 = overbought, below 30 = oversold. + # 14-period is standard, 20-period provides smoother, longer-term signal. + rsi_14: float # Standard 14-period RSI + rsi_20: float # Longer 20-period RSI for smoother signal + + # MACD (Moving Average Convergence Divergence) + # Trend-following momentum indicator showing relationship between two EMAs. + # Particularly effective with GRU/LSTM neural networks for stock prediction. + # Crossovers and divergences signal potential trend changes. + macd_line: float # MACD line (12 EMA - 26 EMA) + macd_signal: float # Signal line (9-period EMA of MACD) + macd_histogram: float # Histogram (MACD - Signal), shows momentum strength + + # Stochastic Oscillator + # Compares closing price to price range over period. 0-100 scale. + # Above 80 = overbought, below 20 = oversold. Captures short-term extremes. + # %K is fast line (14-period), %D is slow line (3-period SMA of %K). + stoch_k: float # Fast stochastic %K (14-period) + stoch_d: float # Slow stochastic %D (3-period SMA of %K) + + # ======================================================================== + # VOLATILITY INDICATORS (Price Dispersion & Risk) + # ======================================================================== + # Indicators that measure how much price fluctuates. Critical for + # risk management and identifying potential breakout/breakdown scenarios. + + # Bollinger Bands + # Volatility bands plotted at standard deviations from moving average. + # Performs exceptionally well with LSTM networks by reducing noise. + # Bands expand during high volatility, contract during low volatility. + # Price at upper band = strong uptrend, at lower band = strong downtrend. + bb_upper: float # Upper Bollinger Band (SMA + 2*std) + bb_middle: float # Middle band (20-period SMA) + bb_lower: float # Lower Bollinger Band (SMA - 2*std) + bb_width: float # Band width (upper - lower), measures volatility magnitude + bb_percent: ( + float # %B indicator: (close - lower) / (upper - lower), position in bands + ) + + # ATR (Average True Range) + # Measures absolute volatility independent of price direction. + # Higher ATR = higher volatility, useful for stop-loss placement. + # 14-period is industry standard. + atr_14: float # 14-period Average True Range + + # ======================================================================== + # TREND INDICATORS (Trend Presence & Sustainability) + # ======================================================================== + # Unlike momentum indicators, these measure whether a trend EXISTS + # and how strong it is, not just the direction. + + # ADX (Average Directional Index) + # Measures trend strength on 0-100 scale, regardless of direction. + # ADX > 25 = strong trend worth trading, ADX < 20 = weak/no trend. + # +DI and -DI show bullish vs bearish pressure. + adx_14: float # 14-period ADX (trend strength) + di_plus: float # +DI (bullish directional indicator) + di_minus: float # -DI (bearish directional indicator) + + # Parabolic SAR (Stop and Reverse) + # Provides dynamic support/resistance levels and trailing stop points. + # SAR below price = uptrend (long), SAR above price = downtrend (short). + # Dots "flip" when trend reverses. + sar: float # Current Parabolic SAR level + + # ======================================================================== + # VOLUME INDICATORS (Institutional Participation) + # ======================================================================== + # Volume precedes price. These indicators track smart money flow + # and institutional accumulation/distribution patterns. + + # OBV (On-Balance Volume) + # Cumulative volume flow indicator. Rising OBV = accumulation (bullish), + # falling OBV = distribution (bearish). OBV divergences often predict reversals. + # 60-70% of volatility contraction pattern breakouts succeed with strong volume. + obv: float # On-Balance Volume cumulative total + obv_sma_20: float # 20-day SMA of OBV (trend confirmation) + + # Volume Rate of Change + # Measures percentage change in volume. Spikes indicate increased interest. + # High positive values confirm price moves, negative values suggest weakness. + volume_roc_5: float # 5-day volume rate of change (%) + + # ======================================================================== + # SUPPORT/RESISTANCE INDICATORS (Key Price Levels) + # ======================================================================== + # Identify potential price floors (support) and ceilings (resistance) + # where price may reverse or consolidate. + + # Fibonacci Retracement Levels + # Based on Fibonacci ratios, commonly used to identify retracement targets. + # Performed well in ML models for price movement prediction. + # 23.6% = shallow retracement, 38.2% = moderate, 61.8% = deep (golden ratio) + fib_236: float # 23.6% Fibonacci retracement level + fib_382: float # 38.2% Fibonacci retracement level + fib_618: float # 61.8% Fibonacci retracement level (golden ratio) + + # Pivot Points + # Classic support/resistance levels calculated from previous day's OHLC. + # Widely used by floor traders and algorithmic systems. + # Price above pivot = bullish bias, below = bearish bias. + pivot_point: float # Standard pivot point (High + Low + Close) / 3 + resistance_1: float # First resistance level (R1) + support_1: float # First support level (S1) + + # ======================================================================== + # MARKET REGIME INDICATORS (Market Condition Classification) + # ======================================================================== + # Help identify what type of market environment we're in + # (trending, ranging, volatile, calm, etc.) + + # CCI (Commodity Channel Index) + # Identifies cyclical trends and extreme market conditions. + # Above +100 = overbought/strong uptrend, below -100 = oversold/strong downtrend. + # Particularly good at capturing short-term price movements. + cci_20: float # 20-period Commodity Channel Index + + # Williams %R + # Momentum oscillator on -100 to 0 scale (inverted from Stochastic). + # Above -20 = overbought, below -80 = oversold. + # Complements RSI with different sensitivity and faster signals. + williams_r_14: float # 14-period Williams %R