feat: indicators + comments
This commit is contained in:
parent
8fe934b971
commit
9fa0e61cbb
294
paperone/data.py
294
paperone/data.py
@ -15,7 +15,7 @@ class TickerData:
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimeSeriesFeatures:
|
||||
class TimeSeriesTickerData:
|
||||
ticker: str
|
||||
target_date: datetime
|
||||
current_day_data: TickerData
|
||||
@ -23,63 +23,253 @@ class TimeSeriesFeatures:
|
||||
|
||||
|
||||
@dataclass
|
||||
class TradingFeatures:
|
||||
class TradeFeatures:
|
||||
"""
|
||||
Comprehensive feature set for ML-based trading models.
|
||||
|
||||
This class combines raw price data, engineered time-series features,
|
||||
and technical indicators across multiple categories to provide a
|
||||
complete market picture for prediction models.
|
||||
|
||||
Feature Categories:
|
||||
- Raw OHLCV data (current day)
|
||||
- Lagged features (5-day lookback)
|
||||
- Rolling window statistics (5d, 10d, 30d)
|
||||
- VIX volatility index features
|
||||
- Momentum indicators (trend direction and strength)
|
||||
- Volatility indicators (price dispersion and risk)
|
||||
- Trend indicators (trend presence and sustainability)
|
||||
- Volume indicators (institutional participation)
|
||||
- Support/Resistance levels (key price zones)
|
||||
- Market regime indicators (market condition classification)
|
||||
"""
|
||||
|
||||
ticker: str
|
||||
target_date: datetime
|
||||
|
||||
# Current day features
|
||||
current_open: float
|
||||
current_high: float
|
||||
current_low: float
|
||||
current_close: float
|
||||
current_volume: float
|
||||
# ========================================================================
|
||||
# CURRENT DAY FEATURES (Raw OHLCV Data)
|
||||
# ========================================================================
|
||||
# Basic price and volume data for the target trading day.
|
||||
# Research shows raw price data often outperforms technical indicators
|
||||
# in feature importance for ML models.
|
||||
|
||||
# Lagged price features (last 5 days)
|
||||
close_lag_1: float
|
||||
close_lag_2: float
|
||||
close_lag_3: float
|
||||
close_lag_4: float
|
||||
close_lag_5: float
|
||||
current_open: float # Opening price of the trading day
|
||||
current_high: float # Highest price reached during the day
|
||||
current_low: float # Lowest price reached during the day
|
||||
current_close: float # Closing price of the trading day
|
||||
current_volume: float # Total shares/contracts traded during the day
|
||||
|
||||
# Lagged volume features
|
||||
volume_lag_1: float
|
||||
volume_lag_2: float
|
||||
volume_lag_3: float
|
||||
volume_lag_4: float
|
||||
volume_lag_5: float
|
||||
# ========================================================================
|
||||
# LAGGED PRICE FEATURES (Last 5 Trading Days)
|
||||
# ========================================================================
|
||||
# Historical closing prices from the previous 5 trading days.
|
||||
# Captures short-term price memory and recent momentum patterns.
|
||||
# Lag-1 (previous day) typically has highest predictive power.
|
||||
|
||||
# 5-day rolling window features
|
||||
rolling_5d_mean: float
|
||||
rolling_5d_std: float
|
||||
rolling_5d_min: float
|
||||
rolling_5d_max: float
|
||||
rolling_5d_range: float
|
||||
rolling_5d_volume_mean: float
|
||||
rolling_5d_returns: float
|
||||
close_lag_1: float # Closing price 1 trading day ago (t-1)
|
||||
close_lag_2: float # Closing price 2 trading days ago (t-2)
|
||||
close_lag_3: float # Closing price 3 trading days ago (t-3)
|
||||
close_lag_4: float # Closing price 4 trading days ago (t-4)
|
||||
close_lag_5: float # Closing price 5 trading days ago (t-5)
|
||||
|
||||
# 10-day rolling window features
|
||||
rolling_10d_mean: float
|
||||
rolling_10d_std: float
|
||||
rolling_10d_min: float
|
||||
rolling_10d_max: float
|
||||
rolling_10d_range: float
|
||||
rolling_10d_volume_mean: float
|
||||
rolling_10d_returns: float
|
||||
# ========================================================================
|
||||
# LAGGED VOLUME FEATURES (Last 5 Trading Days)
|
||||
# ========================================================================
|
||||
# Historical volume from the previous 5 trading days.
|
||||
# Volume patterns often precede price movements and indicate
|
||||
# institutional participation or distribution.
|
||||
|
||||
# 30-day rolling window features
|
||||
rolling_30d_mean: float
|
||||
rolling_30d_std: float
|
||||
rolling_30d_min: float
|
||||
rolling_30d_max: float
|
||||
rolling_30d_range: float
|
||||
rolling_30d_volume_mean: float
|
||||
rolling_30d_returns: float
|
||||
volume_lag_1: float # Volume 1 trading day ago (t-1)
|
||||
volume_lag_2: float # Volume 2 trading days ago (t-2)
|
||||
volume_lag_3: float # Volume 3 trading days ago (t-3)
|
||||
volume_lag_4: float # Volume 4 trading days ago (t-4)
|
||||
volume_lag_5: float # Volume 5 trading days ago (t-5)
|
||||
|
||||
# VIX features (from separate VIX time series)
|
||||
vix_current: float
|
||||
vix_lag_1: float
|
||||
vix_lag_5: float
|
||||
vix_rolling_5d_mean: float
|
||||
vix_rolling_10d_mean: float
|
||||
vix_rolling_30d_mean: float
|
||||
vix_rolling_30d_std: float
|
||||
# ========================================================================
|
||||
# 5-DAY ROLLING WINDOW FEATURES (Short-term trend)
|
||||
# ========================================================================
|
||||
# Statistical aggregates over the last 5 trading days (1 week).
|
||||
# Captures short-term momentum, volatility, and recent price action.
|
||||
|
||||
rolling_5d_mean: float # Average closing price over 5 days
|
||||
rolling_5d_std: float # Standard deviation (volatility measure)
|
||||
rolling_5d_min: float # Minimum closing price in window
|
||||
rolling_5d_max: float # Maximum closing price in window
|
||||
rolling_5d_range: float # Price range (max high - min low)
|
||||
rolling_5d_volume_mean: float # Average volume over 5 days
|
||||
rolling_5d_returns: float # Total return over 5-day period
|
||||
|
||||
# ========================================================================
|
||||
# 10-DAY ROLLING WINDOW FEATURES (Medium-term trend)
|
||||
# ========================================================================
|
||||
# Statistical aggregates over the last 10 trading days (2 weeks).
|
||||
# Captures medium-term trends and smooths out short-term noise.
|
||||
|
||||
rolling_10d_mean: float # Average closing price over 10 days
|
||||
rolling_10d_std: float # Standard deviation (volatility measure)
|
||||
rolling_10d_min: float # Minimum closing price in window
|
||||
rolling_10d_max: float # Maximum closing price in window
|
||||
rolling_10d_range: float # Price range (max high - min low)
|
||||
rolling_10d_volume_mean: float # Average volume over 10 days
|
||||
rolling_10d_returns: float # Total return over 10-day period
|
||||
|
||||
# ========================================================================
|
||||
# 30-DAY ROLLING WINDOW FEATURES (Long-term trend)
|
||||
# ========================================================================
|
||||
# Statistical aggregates over the last 30 trading days (~1 month).
|
||||
# Captures longer-term trends and establishes baseline behavior.
|
||||
|
||||
rolling_30d_mean: float # Average closing price over 30 days
|
||||
rolling_30d_std: float # Standard deviation (volatility measure)
|
||||
rolling_30d_min: float # Minimum closing price in window
|
||||
rolling_30d_max: float # Maximum closing price in window
|
||||
rolling_30d_range: float # Price range (max high - min low)
|
||||
rolling_30d_volume_mean: float # Average volume over 30 days
|
||||
rolling_30d_returns: float # Total return over 30-day period
|
||||
|
||||
# ========================================================================
|
||||
# VIX FEATURES (Market-wide volatility and fear gauge)
|
||||
# ========================================================================
|
||||
# CBOE Volatility Index (VIX) features. VIX measures market expectation
|
||||
# of 30-day volatility from S&P 500 options. Often called the "fear index".
|
||||
# High VIX (>30) indicates fear/uncertainty, low VIX (<15) indicates complacency.
|
||||
# VIX often inversely correlates with market returns.
|
||||
|
||||
vix_current: float # Current VIX level
|
||||
vix_lag_1: float # VIX level 1 day ago (recent change)
|
||||
vix_lag_5: float # VIX level 5 days ago (weekly change)
|
||||
vix_rolling_5d_mean: float # Average VIX over last 5 days (short-term fear)
|
||||
vix_rolling_10d_mean: float # Average VIX over last 10 days (medium-term fear)
|
||||
vix_rolling_30d_mean: float # Average VIX over last 30 days (baseline volatility)
|
||||
vix_rolling_30d_std: float # VIX volatility (volatility of volatility)
|
||||
|
||||
# ========================================================================
|
||||
# MOMENTUM INDICATORS (Trend Direction & Strength)
|
||||
# ========================================================================
|
||||
# Indicators that measure the rate of price change and identify
|
||||
# overbought/oversold conditions. Essential for trend-following strategies.
|
||||
|
||||
# RSI (Relative Strength Index)
|
||||
# Measures momentum on a 0-100 scale. Above 70 = overbought, below 30 = oversold.
|
||||
# 14-period is standard, 20-period provides smoother, longer-term signal.
|
||||
rsi_14: float # Standard 14-period RSI
|
||||
rsi_20: float # Longer 20-period RSI for smoother signal
|
||||
|
||||
# MACD (Moving Average Convergence Divergence)
|
||||
# Trend-following momentum indicator showing relationship between two EMAs.
|
||||
# Particularly effective with GRU/LSTM neural networks for stock prediction.
|
||||
# Crossovers and divergences signal potential trend changes.
|
||||
macd_line: float # MACD line (12 EMA - 26 EMA)
|
||||
macd_signal: float # Signal line (9-period EMA of MACD)
|
||||
macd_histogram: float # Histogram (MACD - Signal), shows momentum strength
|
||||
|
||||
# Stochastic Oscillator
|
||||
# Compares closing price to price range over period. 0-100 scale.
|
||||
# Above 80 = overbought, below 20 = oversold. Captures short-term extremes.
|
||||
# %K is fast line (14-period), %D is slow line (3-period SMA of %K).
|
||||
stoch_k: float # Fast stochastic %K (14-period)
|
||||
stoch_d: float # Slow stochastic %D (3-period SMA of %K)
|
||||
|
||||
# ========================================================================
|
||||
# VOLATILITY INDICATORS (Price Dispersion & Risk)
|
||||
# ========================================================================
|
||||
# Indicators that measure how much price fluctuates. Critical for
|
||||
# risk management and identifying potential breakout/breakdown scenarios.
|
||||
|
||||
# Bollinger Bands
|
||||
# Volatility bands plotted at standard deviations from moving average.
|
||||
# Performs exceptionally well with LSTM networks by reducing noise.
|
||||
# Bands expand during high volatility, contract during low volatility.
|
||||
# Price at upper band = strong uptrend, at lower band = strong downtrend.
|
||||
bb_upper: float # Upper Bollinger Band (SMA + 2*std)
|
||||
bb_middle: float # Middle band (20-period SMA)
|
||||
bb_lower: float # Lower Bollinger Band (SMA - 2*std)
|
||||
bb_width: float # Band width (upper - lower), measures volatility magnitude
|
||||
bb_percent: (
|
||||
float # %B indicator: (close - lower) / (upper - lower), position in bands
|
||||
)
|
||||
|
||||
# ATR (Average True Range)
|
||||
# Measures absolute volatility independent of price direction.
|
||||
# Higher ATR = higher volatility, useful for stop-loss placement.
|
||||
# 14-period is industry standard.
|
||||
atr_14: float # 14-period Average True Range
|
||||
|
||||
# ========================================================================
|
||||
# TREND INDICATORS (Trend Presence & Sustainability)
|
||||
# ========================================================================
|
||||
# Unlike momentum indicators, these measure whether a trend EXISTS
|
||||
# and how strong it is, not just the direction.
|
||||
|
||||
# ADX (Average Directional Index)
|
||||
# Measures trend strength on 0-100 scale, regardless of direction.
|
||||
# ADX > 25 = strong trend worth trading, ADX < 20 = weak/no trend.
|
||||
# +DI and -DI show bullish vs bearish pressure.
|
||||
adx_14: float # 14-period ADX (trend strength)
|
||||
di_plus: float # +DI (bullish directional indicator)
|
||||
di_minus: float # -DI (bearish directional indicator)
|
||||
|
||||
# Parabolic SAR (Stop and Reverse)
|
||||
# Provides dynamic support/resistance levels and trailing stop points.
|
||||
# SAR below price = uptrend (long), SAR above price = downtrend (short).
|
||||
# Dots "flip" when trend reverses.
|
||||
sar: float # Current Parabolic SAR level
|
||||
|
||||
# ========================================================================
|
||||
# VOLUME INDICATORS (Institutional Participation)
|
||||
# ========================================================================
|
||||
# Volume precedes price. These indicators track smart money flow
|
||||
# and institutional accumulation/distribution patterns.
|
||||
|
||||
# OBV (On-Balance Volume)
|
||||
# Cumulative volume flow indicator. Rising OBV = accumulation (bullish),
|
||||
# falling OBV = distribution (bearish). OBV divergences often predict reversals.
|
||||
# 60-70% of volatility contraction pattern breakouts succeed with strong volume.
|
||||
obv: float # On-Balance Volume cumulative total
|
||||
obv_sma_20: float # 20-day SMA of OBV (trend confirmation)
|
||||
|
||||
# Volume Rate of Change
|
||||
# Measures percentage change in volume. Spikes indicate increased interest.
|
||||
# High positive values confirm price moves, negative values suggest weakness.
|
||||
volume_roc_5: float # 5-day volume rate of change (%)
|
||||
|
||||
# ========================================================================
|
||||
# SUPPORT/RESISTANCE INDICATORS (Key Price Levels)
|
||||
# ========================================================================
|
||||
# Identify potential price floors (support) and ceilings (resistance)
|
||||
# where price may reverse or consolidate.
|
||||
|
||||
# Fibonacci Retracement Levels
|
||||
# Based on Fibonacci ratios, commonly used to identify retracement targets.
|
||||
# Performed well in ML models for price movement prediction.
|
||||
# 23.6% = shallow retracement, 38.2% = moderate, 61.8% = deep (golden ratio)
|
||||
fib_236: float # 23.6% Fibonacci retracement level
|
||||
fib_382: float # 38.2% Fibonacci retracement level
|
||||
fib_618: float # 61.8% Fibonacci retracement level (golden ratio)
|
||||
|
||||
# Pivot Points
|
||||
# Classic support/resistance levels calculated from previous day's OHLC.
|
||||
# Widely used by floor traders and algorithmic systems.
|
||||
# Price above pivot = bullish bias, below = bearish bias.
|
||||
pivot_point: float # Standard pivot point (High + Low + Close) / 3
|
||||
resistance_1: float # First resistance level (R1)
|
||||
support_1: float # First support level (S1)
|
||||
|
||||
# ========================================================================
|
||||
# MARKET REGIME INDICATORS (Market Condition Classification)
|
||||
# ========================================================================
|
||||
# Help identify what type of market environment we're in
|
||||
# (trending, ranging, volatile, calm, etc.)
|
||||
|
||||
# CCI (Commodity Channel Index)
|
||||
# Identifies cyclical trends and extreme market conditions.
|
||||
# Above +100 = overbought/strong uptrend, below -100 = oversold/strong downtrend.
|
||||
# Particularly good at capturing short-term price movements.
|
||||
cci_20: float # 20-period Commodity Channel Index
|
||||
|
||||
# Williams %R
|
||||
# Momentum oscillator on -100 to 0 scale (inverted from Stochastic).
|
||||
# Above -20 = overbought, below -80 = oversold.
|
||||
# Complements RSI with different sensitivity and faster signals.
|
||||
williams_r_14: float # 14-period Williams %R
|
||||
|
Loading…
x
Reference in New Issue
Block a user