feat: indicators + comments
This commit is contained in:
parent
8fe934b971
commit
9fa0e61cbb
294
paperone/data.py
294
paperone/data.py
@ -15,7 +15,7 @@ class TickerData:
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TimeSeriesFeatures:
|
class TimeSeriesTickerData:
|
||||||
ticker: str
|
ticker: str
|
||||||
target_date: datetime
|
target_date: datetime
|
||||||
current_day_data: TickerData
|
current_day_data: TickerData
|
||||||
@ -23,63 +23,253 @@ class TimeSeriesFeatures:
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TradingFeatures:
|
class TradeFeatures:
|
||||||
|
"""
|
||||||
|
Comprehensive feature set for ML-based trading models.
|
||||||
|
|
||||||
|
This class combines raw price data, engineered time-series features,
|
||||||
|
and technical indicators across multiple categories to provide a
|
||||||
|
complete market picture for prediction models.
|
||||||
|
|
||||||
|
Feature Categories:
|
||||||
|
- Raw OHLCV data (current day)
|
||||||
|
- Lagged features (5-day lookback)
|
||||||
|
- Rolling window statistics (5d, 10d, 30d)
|
||||||
|
- VIX volatility index features
|
||||||
|
- Momentum indicators (trend direction and strength)
|
||||||
|
- Volatility indicators (price dispersion and risk)
|
||||||
|
- Trend indicators (trend presence and sustainability)
|
||||||
|
- Volume indicators (institutional participation)
|
||||||
|
- Support/Resistance levels (key price zones)
|
||||||
|
- Market regime indicators (market condition classification)
|
||||||
|
"""
|
||||||
|
|
||||||
ticker: str
|
ticker: str
|
||||||
target_date: datetime
|
target_date: datetime
|
||||||
|
|
||||||
# Current day features
|
# ========================================================================
|
||||||
current_open: float
|
# CURRENT DAY FEATURES (Raw OHLCV Data)
|
||||||
current_high: float
|
# ========================================================================
|
||||||
current_low: float
|
# Basic price and volume data for the target trading day.
|
||||||
current_close: float
|
# Research shows raw price data often outperforms technical indicators
|
||||||
current_volume: float
|
# in feature importance for ML models.
|
||||||
|
|
||||||
# Lagged price features (last 5 days)
|
current_open: float # Opening price of the trading day
|
||||||
close_lag_1: float
|
current_high: float # Highest price reached during the day
|
||||||
close_lag_2: float
|
current_low: float # Lowest price reached during the day
|
||||||
close_lag_3: float
|
current_close: float # Closing price of the trading day
|
||||||
close_lag_4: float
|
current_volume: float # Total shares/contracts traded during the day
|
||||||
close_lag_5: float
|
|
||||||
|
|
||||||
# Lagged volume features
|
# ========================================================================
|
||||||
volume_lag_1: float
|
# LAGGED PRICE FEATURES (Last 5 Trading Days)
|
||||||
volume_lag_2: float
|
# ========================================================================
|
||||||
volume_lag_3: float
|
# Historical closing prices from the previous 5 trading days.
|
||||||
volume_lag_4: float
|
# Captures short-term price memory and recent momentum patterns.
|
||||||
volume_lag_5: float
|
# Lag-1 (previous day) typically has highest predictive power.
|
||||||
|
|
||||||
# 5-day rolling window features
|
close_lag_1: float # Closing price 1 trading day ago (t-1)
|
||||||
rolling_5d_mean: float
|
close_lag_2: float # Closing price 2 trading days ago (t-2)
|
||||||
rolling_5d_std: float
|
close_lag_3: float # Closing price 3 trading days ago (t-3)
|
||||||
rolling_5d_min: float
|
close_lag_4: float # Closing price 4 trading days ago (t-4)
|
||||||
rolling_5d_max: float
|
close_lag_5: float # Closing price 5 trading days ago (t-5)
|
||||||
rolling_5d_range: float
|
|
||||||
rolling_5d_volume_mean: float
|
|
||||||
rolling_5d_returns: float
|
|
||||||
|
|
||||||
# 10-day rolling window features
|
# ========================================================================
|
||||||
rolling_10d_mean: float
|
# LAGGED VOLUME FEATURES (Last 5 Trading Days)
|
||||||
rolling_10d_std: float
|
# ========================================================================
|
||||||
rolling_10d_min: float
|
# Historical volume from the previous 5 trading days.
|
||||||
rolling_10d_max: float
|
# Volume patterns often precede price movements and indicate
|
||||||
rolling_10d_range: float
|
# institutional participation or distribution.
|
||||||
rolling_10d_volume_mean: float
|
|
||||||
rolling_10d_returns: float
|
|
||||||
|
|
||||||
# 30-day rolling window features
|
volume_lag_1: float # Volume 1 trading day ago (t-1)
|
||||||
rolling_30d_mean: float
|
volume_lag_2: float # Volume 2 trading days ago (t-2)
|
||||||
rolling_30d_std: float
|
volume_lag_3: float # Volume 3 trading days ago (t-3)
|
||||||
rolling_30d_min: float
|
volume_lag_4: float # Volume 4 trading days ago (t-4)
|
||||||
rolling_30d_max: float
|
volume_lag_5: float # Volume 5 trading days ago (t-5)
|
||||||
rolling_30d_range: float
|
|
||||||
rolling_30d_volume_mean: float
|
|
||||||
rolling_30d_returns: float
|
|
||||||
|
|
||||||
# VIX features (from separate VIX time series)
|
# ========================================================================
|
||||||
vix_current: float
|
# 5-DAY ROLLING WINDOW FEATURES (Short-term trend)
|
||||||
vix_lag_1: float
|
# ========================================================================
|
||||||
vix_lag_5: float
|
# Statistical aggregates over the last 5 trading days (1 week).
|
||||||
vix_rolling_5d_mean: float
|
# Captures short-term momentum, volatility, and recent price action.
|
||||||
vix_rolling_10d_mean: float
|
|
||||||
vix_rolling_30d_mean: float
|
rolling_5d_mean: float # Average closing price over 5 days
|
||||||
vix_rolling_30d_std: float
|
rolling_5d_std: float # Standard deviation (volatility measure)
|
||||||
|
rolling_5d_min: float # Minimum closing price in window
|
||||||
|
rolling_5d_max: float # Maximum closing price in window
|
||||||
|
rolling_5d_range: float # Price range (max high - min low)
|
||||||
|
rolling_5d_volume_mean: float # Average volume over 5 days
|
||||||
|
rolling_5d_returns: float # Total return over 5-day period
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# 10-DAY ROLLING WINDOW FEATURES (Medium-term trend)
|
||||||
|
# ========================================================================
|
||||||
|
# Statistical aggregates over the last 10 trading days (2 weeks).
|
||||||
|
# Captures medium-term trends and smooths out short-term noise.
|
||||||
|
|
||||||
|
rolling_10d_mean: float # Average closing price over 10 days
|
||||||
|
rolling_10d_std: float # Standard deviation (volatility measure)
|
||||||
|
rolling_10d_min: float # Minimum closing price in window
|
||||||
|
rolling_10d_max: float # Maximum closing price in window
|
||||||
|
rolling_10d_range: float # Price range (max high - min low)
|
||||||
|
rolling_10d_volume_mean: float # Average volume over 10 days
|
||||||
|
rolling_10d_returns: float # Total return over 10-day period
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# 30-DAY ROLLING WINDOW FEATURES (Long-term trend)
|
||||||
|
# ========================================================================
|
||||||
|
# Statistical aggregates over the last 30 trading days (~1 month).
|
||||||
|
# Captures longer-term trends and establishes baseline behavior.
|
||||||
|
|
||||||
|
rolling_30d_mean: float # Average closing price over 30 days
|
||||||
|
rolling_30d_std: float # Standard deviation (volatility measure)
|
||||||
|
rolling_30d_min: float # Minimum closing price in window
|
||||||
|
rolling_30d_max: float # Maximum closing price in window
|
||||||
|
rolling_30d_range: float # Price range (max high - min low)
|
||||||
|
rolling_30d_volume_mean: float # Average volume over 30 days
|
||||||
|
rolling_30d_returns: float # Total return over 30-day period
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# VIX FEATURES (Market-wide volatility and fear gauge)
|
||||||
|
# ========================================================================
|
||||||
|
# CBOE Volatility Index (VIX) features. VIX measures market expectation
|
||||||
|
# of 30-day volatility from S&P 500 options. Often called the "fear index".
|
||||||
|
# High VIX (>30) indicates fear/uncertainty, low VIX (<15) indicates complacency.
|
||||||
|
# VIX often inversely correlates with market returns.
|
||||||
|
|
||||||
|
vix_current: float # Current VIX level
|
||||||
|
vix_lag_1: float # VIX level 1 day ago (recent change)
|
||||||
|
vix_lag_5: float # VIX level 5 days ago (weekly change)
|
||||||
|
vix_rolling_5d_mean: float # Average VIX over last 5 days (short-term fear)
|
||||||
|
vix_rolling_10d_mean: float # Average VIX over last 10 days (medium-term fear)
|
||||||
|
vix_rolling_30d_mean: float # Average VIX over last 30 days (baseline volatility)
|
||||||
|
vix_rolling_30d_std: float # VIX volatility (volatility of volatility)
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# MOMENTUM INDICATORS (Trend Direction & Strength)
|
||||||
|
# ========================================================================
|
||||||
|
# Indicators that measure the rate of price change and identify
|
||||||
|
# overbought/oversold conditions. Essential for trend-following strategies.
|
||||||
|
|
||||||
|
# RSI (Relative Strength Index)
|
||||||
|
# Measures momentum on a 0-100 scale. Above 70 = overbought, below 30 = oversold.
|
||||||
|
# 14-period is standard, 20-period provides smoother, longer-term signal.
|
||||||
|
rsi_14: float # Standard 14-period RSI
|
||||||
|
rsi_20: float # Longer 20-period RSI for smoother signal
|
||||||
|
|
||||||
|
# MACD (Moving Average Convergence Divergence)
|
||||||
|
# Trend-following momentum indicator showing relationship between two EMAs.
|
||||||
|
# Particularly effective with GRU/LSTM neural networks for stock prediction.
|
||||||
|
# Crossovers and divergences signal potential trend changes.
|
||||||
|
macd_line: float # MACD line (12 EMA - 26 EMA)
|
||||||
|
macd_signal: float # Signal line (9-period EMA of MACD)
|
||||||
|
macd_histogram: float # Histogram (MACD - Signal), shows momentum strength
|
||||||
|
|
||||||
|
# Stochastic Oscillator
|
||||||
|
# Compares closing price to price range over period. 0-100 scale.
|
||||||
|
# Above 80 = overbought, below 20 = oversold. Captures short-term extremes.
|
||||||
|
# %K is fast line (14-period), %D is slow line (3-period SMA of %K).
|
||||||
|
stoch_k: float # Fast stochastic %K (14-period)
|
||||||
|
stoch_d: float # Slow stochastic %D (3-period SMA of %K)
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# VOLATILITY INDICATORS (Price Dispersion & Risk)
|
||||||
|
# ========================================================================
|
||||||
|
# Indicators that measure how much price fluctuates. Critical for
|
||||||
|
# risk management and identifying potential breakout/breakdown scenarios.
|
||||||
|
|
||||||
|
# Bollinger Bands
|
||||||
|
# Volatility bands plotted at standard deviations from moving average.
|
||||||
|
# Performs exceptionally well with LSTM networks by reducing noise.
|
||||||
|
# Bands expand during high volatility, contract during low volatility.
|
||||||
|
# Price at upper band = strong uptrend, at lower band = strong downtrend.
|
||||||
|
bb_upper: float # Upper Bollinger Band (SMA + 2*std)
|
||||||
|
bb_middle: float # Middle band (20-period SMA)
|
||||||
|
bb_lower: float # Lower Bollinger Band (SMA - 2*std)
|
||||||
|
bb_width: float # Band width (upper - lower), measures volatility magnitude
|
||||||
|
bb_percent: (
|
||||||
|
float # %B indicator: (close - lower) / (upper - lower), position in bands
|
||||||
|
)
|
||||||
|
|
||||||
|
# ATR (Average True Range)
|
||||||
|
# Measures absolute volatility independent of price direction.
|
||||||
|
# Higher ATR = higher volatility, useful for stop-loss placement.
|
||||||
|
# 14-period is industry standard.
|
||||||
|
atr_14: float # 14-period Average True Range
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# TREND INDICATORS (Trend Presence & Sustainability)
|
||||||
|
# ========================================================================
|
||||||
|
# Unlike momentum indicators, these measure whether a trend EXISTS
|
||||||
|
# and how strong it is, not just the direction.
|
||||||
|
|
||||||
|
# ADX (Average Directional Index)
|
||||||
|
# Measures trend strength on 0-100 scale, regardless of direction.
|
||||||
|
# ADX > 25 = strong trend worth trading, ADX < 20 = weak/no trend.
|
||||||
|
# +DI and -DI show bullish vs bearish pressure.
|
||||||
|
adx_14: float # 14-period ADX (trend strength)
|
||||||
|
di_plus: float # +DI (bullish directional indicator)
|
||||||
|
di_minus: float # -DI (bearish directional indicator)
|
||||||
|
|
||||||
|
# Parabolic SAR (Stop and Reverse)
|
||||||
|
# Provides dynamic support/resistance levels and trailing stop points.
|
||||||
|
# SAR below price = uptrend (long), SAR above price = downtrend (short).
|
||||||
|
# Dots "flip" when trend reverses.
|
||||||
|
sar: float # Current Parabolic SAR level
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# VOLUME INDICATORS (Institutional Participation)
|
||||||
|
# ========================================================================
|
||||||
|
# Volume precedes price. These indicators track smart money flow
|
||||||
|
# and institutional accumulation/distribution patterns.
|
||||||
|
|
||||||
|
# OBV (On-Balance Volume)
|
||||||
|
# Cumulative volume flow indicator. Rising OBV = accumulation (bullish),
|
||||||
|
# falling OBV = distribution (bearish). OBV divergences often predict reversals.
|
||||||
|
# 60-70% of volatility contraction pattern breakouts succeed with strong volume.
|
||||||
|
obv: float # On-Balance Volume cumulative total
|
||||||
|
obv_sma_20: float # 20-day SMA of OBV (trend confirmation)
|
||||||
|
|
||||||
|
# Volume Rate of Change
|
||||||
|
# Measures percentage change in volume. Spikes indicate increased interest.
|
||||||
|
# High positive values confirm price moves, negative values suggest weakness.
|
||||||
|
volume_roc_5: float # 5-day volume rate of change (%)
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# SUPPORT/RESISTANCE INDICATORS (Key Price Levels)
|
||||||
|
# ========================================================================
|
||||||
|
# Identify potential price floors (support) and ceilings (resistance)
|
||||||
|
# where price may reverse or consolidate.
|
||||||
|
|
||||||
|
# Fibonacci Retracement Levels
|
||||||
|
# Based on Fibonacci ratios, commonly used to identify retracement targets.
|
||||||
|
# Performed well in ML models for price movement prediction.
|
||||||
|
# 23.6% = shallow retracement, 38.2% = moderate, 61.8% = deep (golden ratio)
|
||||||
|
fib_236: float # 23.6% Fibonacci retracement level
|
||||||
|
fib_382: float # 38.2% Fibonacci retracement level
|
||||||
|
fib_618: float # 61.8% Fibonacci retracement level (golden ratio)
|
||||||
|
|
||||||
|
# Pivot Points
|
||||||
|
# Classic support/resistance levels calculated from previous day's OHLC.
|
||||||
|
# Widely used by floor traders and algorithmic systems.
|
||||||
|
# Price above pivot = bullish bias, below = bearish bias.
|
||||||
|
pivot_point: float # Standard pivot point (High + Low + Close) / 3
|
||||||
|
resistance_1: float # First resistance level (R1)
|
||||||
|
support_1: float # First support level (S1)
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# MARKET REGIME INDICATORS (Market Condition Classification)
|
||||||
|
# ========================================================================
|
||||||
|
# Help identify what type of market environment we're in
|
||||||
|
# (trending, ranging, volatile, calm, etc.)
|
||||||
|
|
||||||
|
# CCI (Commodity Channel Index)
|
||||||
|
# Identifies cyclical trends and extreme market conditions.
|
||||||
|
# Above +100 = overbought/strong uptrend, below -100 = oversold/strong downtrend.
|
||||||
|
# Particularly good at capturing short-term price movements.
|
||||||
|
cci_20: float # 20-period Commodity Channel Index
|
||||||
|
|
||||||
|
# Williams %R
|
||||||
|
# Momentum oscillator on -100 to 0 scale (inverted from Stochastic).
|
||||||
|
# Above -20 = overbought, below -80 = oversold.
|
||||||
|
# Complements RSI with different sensitivity and faster signals.
|
||||||
|
williams_r_14: float # 14-period Williams %R
|
||||||
|
Loading…
x
Reference in New Issue
Block a user