From 9f2cce1d240458a16b96d3fb1dd72e2870a9e62d Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sun, 22 Sep 2024 14:34:10 -0400 Subject: [PATCH 01/32] add new parameters filter_rolling_window, filter_relative_volume_clip_pct --- configs/template.json | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/configs/template.json b/configs/template.json index c9d30da2a..88062f907 100644 --- a/configs/template.json +++ b/configs/template.json @@ -20,6 +20,8 @@ "entry_trailing_grid_ratio": -0.28, "entry_trailing_retracement_pct": 0.0024735, "entry_trailing_threshold_pct": -0.062799, + "filter_rolling_window": 60, + "filter_relative_volume_clip_pct": 0.95, "n_positions": 10.776, "total_wallet_exposure_limit": 0.97499, "unstuck_close_pct": 0.049666, @@ -43,6 +45,8 @@ "entry_trailing_grid_ratio": -0.3633, "entry_trailing_retracement_pct": 0.06044, "entry_trailing_threshold_pct": -0.084207, + "filter_rolling_window": 60, + "filter_relative_volume_clip_pct": 0.95, "n_positions": 7.6679, "total_wallet_exposure_limit": 0.0, "unstuck_close_pct": 0.052781, @@ -61,10 +65,8 @@ "max_n_cancellations_per_batch": 5, "max_n_creations_per_batch": 3, "minimum_coin_age_days": 30.0, - "ohlcv_rolling_window": 60, "pnls_max_lookback_days": 30.0, "price_distance_threshold": 0.002, - "relative_volume_filter_clip_pct": 0.5, "time_in_force": "good_till_cancelled", "user": "bybit_01"}, "optimize": {"bounds": {"long_close_grid_markup_range": [0.0, 0.03], @@ -84,6 +86,8 @@ "long_entry_trailing_grid_ratio": [-1.0, 1.0], "long_entry_trailing_retracement_pct": [0.0, 0.1], "long_entry_trailing_threshold_pct": [-0.1, 0.1], + "long_filter_rolling_window": [10.0, 1440.0], + "long_filter_relative_volume_clip_pct": [0.0, 1.0], "long_n_positions": [1.0, 20.0], "long_total_wallet_exposure_limit": [0.0, 5.0], "long_unstuck_close_pct": [0.001, 0.1], @@ -107,6 +111,8 @@ "short_entry_trailing_grid_ratio": [-1.0, 1.0], "short_entry_trailing_retracement_pct": [0.0, 0.1], "short_entry_trailing_threshold_pct": [-0.1, 0.1], + "short_filter_rolling_window": [10.0, 1440.0], + "short_filter_relative_volume_clip_pct": [0.0, 1.0], "short_n_positions": [1.0, 20.0], "short_total_wallet_exposure_limit": [0.0, 5.0], "short_unstuck_close_pct": [0.001, 0.1], From 13fae9934449acb8786b21a5b05d067ed8336e54 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sun, 22 Sep 2024 14:35:28 -0400 Subject: [PATCH 02/32] update for new filter params --- notebooks/notes_backtest.ipynb | 38 +++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/notebooks/notes_backtest.ipynb b/notebooks/notes_backtest.ipynb index 57f7b4056..8163a7697 100644 --- a/notebooks/notes_backtest.ipynb +++ b/notebooks/notes_backtest.ipynb @@ -28,13 +28,26 @@ "cell_type": "code", "execution_count": null, "id": "97f846a3-874c-48f4-93d8-829edc0b4bce", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "config = load_config('configs/template.json')\n", "{k: config[k] for k in ['backtest', 'bot', 'live']}" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "60155aa0-a1d3-4763-b314-5e015643b527", + "metadata": {}, + "outputs": [], + "source": [ + "#config['backtest']['symbols'] = config['backtest']['symbols'][::10]\n", + "config['backtest']['start_date'] = '2024-03-01'\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -43,18 +56,19 @@ "outputs": [], "source": [ "symbols = config['backtest']['symbols']\n", - "symbols, hlcvs, mss, results_path = await prepare_hlcvs_mss(config)\n", - "hlcs = hlcvs[:,:,:3]" + "symbols, hlcvs, mss, results_path = await prepare_hlcvs_mss(config)" ] }, { "cell_type": "code", "execution_count": null, - "id": "896e5b9f-e71f-4abb-b33d-085065fbd3eb", + "id": "2f473002-2ba5-41e3-9f06-ff56aaea2318", "metadata": {}, "outputs": [], "source": [ - "preferred_coins = calc_preferred_coins(hlcvs, config)" + "#config['bot']['long']['n_positions'] = 3\n", + "#config['bot']['long']['filter_rolling_window'] = 1440\n", + "#config['bot']['long']['filter_relative_volume_clip_pct'] = 0.5" ] }, { @@ -64,13 +78,13 @@ "metadata": {}, "outputs": [], "source": [ - "fills, equities, analysis = run_backtest(hlcs, preferred_coins, mss, config)" + "fills, equities, analysis = run_backtest(hlcvs, mss, config)" ] }, { "cell_type": "code", "execution_count": null, - "id": "b3ce4490-63ad-4e1f-8477-d188cc058fa4", + "id": "b0a86c90-e834-4016-abfa-8158e2d227cb", "metadata": {}, "outputs": [], "source": [ @@ -79,7 +93,7 @@ "print(f'elapsed {utc_ms() - sts}')\n", "sts = utc_ms()\n", "equities = pd.Series(equities)\n", - "analysis_py, balance_and_equity = analyze_fills_forager(config['backtest']['symbols'], hlcs, fdf, equities)\n", + "analysis_py, balance_and_equity = analyze_fills_forager(config['backtest']['symbols'], hlcvs, fdf, equities)\n", "for k in analysis_py:\n", " if k not in analysis:\n", " analysis[k] = analysis_py[k]\n", @@ -88,6 +102,14 @@ "balance_and_equity.plot()" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4be85e1-6a27-4864-8e5f-cd78d23abf52", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, From 060e29480ddbca93284bed847012a2ebe533cc67 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sun, 22 Sep 2024 14:35:51 -0400 Subject: [PATCH 03/32] compute preferred coins on the fly --- passivbot-rust/src/backtest.rs | 264 ++++++++++++++++++++------------ passivbot-rust/src/constants.rs | 1 + passivbot-rust/src/lib.rs | 2 - passivbot-rust/src/python.rs | 54 ++----- passivbot-rust/src/types.rs | 2 + 5 files changed, 180 insertions(+), 143 deletions(-) diff --git a/passivbot-rust/src/backtest.rs b/passivbot-rust/src/backtest.rs index 44f46108e..e14c0c55c 100644 --- a/passivbot-rust/src/backtest.rs +++ b/passivbot-rust/src/backtest.rs @@ -1,7 +1,7 @@ use crate::closes::{ calc_closes_long, calc_closes_short, calc_next_close_long, calc_next_close_short, }; -use crate::constants::{CLOSE, HIGH, LONG, LOW, NO_POS, SHORT}; +use crate::constants::{CLOSE, HIGH, LONG, LOW, NO_POS, SHORT, VOLUME}; use crate::entries::{ calc_entries_long, calc_entries_short, calc_min_entry_qty, calc_next_entry_long, calc_next_entry_short, @@ -15,8 +15,7 @@ use crate::utils::{ calc_pprice_diff_int, calc_wallet_exposure, cost_to_qty, qty_to_cost, round_, round_dn, round_up, }; -use ndarray::s; -use ndarray::{Array1, Array2, Array3, Array4}; +use ndarray::{s, Array1, Array2, Array3, Array4, Axis}; use std::cmp::Ordering; use std::collections::{HashMap, HashSet}; @@ -122,9 +121,14 @@ pub struct TradingEnabled { short: bool, } +pub struct PreferredCoins { + long: Vec, + short: Vec, +} + pub struct Backtest { - hlcs: Array3, // 3D array: (n_timesteps, n_markets, 3) - preferred_coins: Array2, // 2D array: (n_timesteps, n_markets) + hlcvs: Array3, // 3D array: (n_timesteps, n_markets, 4) + preferred_coins: PreferredCoins, bot_params_pair: BotParamsPair, exchange_params_list: Vec, backtest_params: BacktestParams, @@ -147,33 +151,46 @@ pub struct Backtest { delist_timestamps: HashMap, did_fill_long: HashSet, did_fill_short: HashSet, + rolling_volumes: Vec>, } impl Backtest { pub fn new( - hlcs: Array3, - preferred_coins: Array2, + hlcvs: Array3, bot_params_pair: BotParamsPair, exchange_params_list: Vec, backtest_params: &BacktestParams, ) -> Self { - let n_markets = hlcs.shape()[1]; + let n_timesteps = hlcvs.shape()[0]; + let n_markets = hlcvs.shape()[1]; + let max_window = bot_params_pair + .long + .filter_rolling_window + .max(bot_params_pair.short.filter_rolling_window); + + // Initialize rolling_volumes with zeros + let rolling_volumes = vec![vec![0.0; n_markets]; n_timesteps]; + let initial_emas = (0..n_markets) .map(|i| { - let close_price = hlcs[[0, i, CLOSE]]; + let close_price = hlcvs[[0, i, CLOSE]]; EMAs { long: [close_price; 3], short: [close_price; 3], } }) .collect(); + let preferred_coins = PreferredCoins { + long: Vec::::new(), + short: Vec::::new(), + }; let mut equities = Vec::::new(); equities.push(backtest_params.starting_balance); let mut bot_params_pair_cloned = bot_params_pair.clone(); bot_params_pair_cloned.long.n_positions = n_markets.min(bot_params_pair.long.n_positions); bot_params_pair_cloned.short.n_positions = n_markets.min(bot_params_pair.short.n_positions); - Backtest { - hlcs, + let mut backtest = Backtest { + hlcvs, preferred_coins, bot_params_pair: bot_params_pair_cloned, exchange_params_list, @@ -207,12 +224,109 @@ impl Backtest { delist_timestamps: HashMap::new(), did_fill_long: HashSet::new(), did_fill_short: HashSet::new(), + rolling_volumes, + }; + backtest.initialize_rolling_volumes(max_window); + backtest + } + + fn initialize_rolling_volumes(&mut self, max_window: usize) { + let n_markets = self.hlcvs.shape()[1]; + let n_timesteps = self.hlcvs.shape()[0]; + + for k in 0..n_timesteps { + let start = k.saturating_sub(max_window - 1); + for i in 0..n_markets { + // Update rolling volume + self.rolling_volumes[k][i] = self.hlcvs.slice(s![start..=k, i, VOLUME]).sum(); + } } } + fn update_rolling_volumes(&mut self, k: usize) { + let n_markets = self.hlcvs.shape()[1]; + let max_window = self + .bot_params_pair + .long + .filter_rolling_window + .max(self.bot_params_pair.short.filter_rolling_window); + + if k >= max_window { + let old_k = k - max_window; + for i in 0..n_markets { + self.rolling_volumes[k][i] = self.rolling_volumes[k - 1][i] + + self.hlcvs[[k, i, VOLUME]] + - self.hlcvs[[old_k, i, VOLUME]]; + } + } else { + // For the first max_window steps, we need to recalculate the full sum + let start = 0; + for i in 0..n_markets { + self.rolling_volumes[k][i] = self.hlcvs.slice(s![start..=k, i, VOLUME]).sum(); + } + } + } + + fn calc_noisiness(&self, k: usize, idx: usize, window: usize) -> f64 { + let start = k.saturating_sub(window - 1); + let slice = self.hlcvs.slice(s![start..=k, idx, ..]); + let nrr_sum: f64 = slice + .axis_iter(Axis(0)) + .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE]) + .sum(); + nrr_sum / (k - start + 1) as f64 + } + + fn calc_preferred_coins(&self, k: usize, pside: usize) -> Vec { + let bot_params = match pside { + LONG => &self.bot_params_pair.long, + SHORT => &self.bot_params_pair.short, + _ => panic!("Invalid pside"), + }; + + let n_coins = self.hlcvs.shape()[1]; + + // Use pre-computed rolling volumes + let mut volume_sums: Vec<(usize, f64)> = self.rolling_volumes[k] + .iter() + .enumerate() + .map(|(idx, &sum)| (idx, sum)) + .collect(); + + // Sort by volume in descending order + volume_sums.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); + + // Filter by volume + let n_eligible = bot_params.n_positions.max( + (n_coins as f64 * (1.0 - bot_params.filter_relative_volume_clip_pct)).round() as usize, + ); + let filtered_indices: Vec = volume_sums + .iter() + .take(n_eligible) + .map(|&(idx, _)| idx) + .collect(); + + // Calculate noisiness on-the-fly for filtered coins + let mut noisiness: Vec<(usize, f64)> = filtered_indices + .into_iter() + .map(|idx| { + ( + idx, + self.calc_noisiness(k, idx, bot_params.filter_rolling_window), + ) + }) + .collect(); + + // Sort by noisiness in descending order + noisiness.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); + + // Return indices sorted by noisiness + noisiness.into_iter().map(|(idx, _)| idx).collect() + } + pub fn run(&mut self) -> (Vec, Vec) { let check_points: Vec = (0..7).map(|i| i * 60 * 24).collect(); - let n_timesteps = self.hlcs.shape()[0]; + let n_timesteps = self.hlcvs.shape()[0]; for idx in 0..self.n_markets { self.trailing_prices @@ -224,18 +338,18 @@ impl Backtest { // check if the coin was delisted at any point if n_timesteps > *check_points.last().unwrap() { - let last_hlc_close = self.hlcs[[n_timesteps - 1, idx, CLOSE]]; + let last_hlc_close = self.hlcvs[[n_timesteps - 1, idx, CLOSE]]; if check_points.iter().all(|&point| { - self.hlcs[[n_timesteps - 1 - point, idx, HIGH]] == last_hlc_close - && self.hlcs[[n_timesteps - 1 - point, idx, LOW]] == last_hlc_close - && self.hlcs[[n_timesteps - 1 - point, idx, CLOSE]] == last_hlc_close + self.hlcvs[[n_timesteps - 1 - point, idx, HIGH]] == last_hlc_close + && self.hlcvs[[n_timesteps - 1 - point, idx, LOW]] == last_hlc_close + && self.hlcvs[[n_timesteps - 1 - point, idx, CLOSE]] == last_hlc_close }) { // was delisted. Find timestamp of delisting let mut i = n_timesteps - check_points.last().unwrap(); while i > 0 - && self.hlcs[[i, idx, HIGH]] == last_hlc_close - && self.hlcs[[i, idx, LOW]] == last_hlc_close - && self.hlcs[[i, idx, CLOSE]] == last_hlc_close + && self.hlcvs[[i, idx, HIGH]] == last_hlc_close + && self.hlcvs[[i, idx, LOW]] == last_hlc_close + && self.hlcvs[[i, idx, CLOSE]] == last_hlc_close { i -= 1; } @@ -246,6 +360,7 @@ impl Backtest { } } for k in 1..(n_timesteps - 1) { + self.update_rolling_volumes(k); self.check_for_fills(k); self.update_emas(k); self.update_open_orders(k); @@ -255,7 +370,7 @@ impl Backtest { } fn create_state_params(&self, k: usize, idx: usize, pside: usize) -> StateParams { - let close_price = self.hlcs[[k, idx, CLOSE]]; + let close_price = self.hlcvs[[k, idx, CLOSE]]; StateParams { balance: self.balance, order_book: OrderBook { @@ -278,7 +393,7 @@ impl Backtest { let mut equity = self.balance; // Calculate unrealized PnL for long positions for (&idx, position) in &self.positions.long { - let current_price = self.hlcs[[k, idx, CLOSE]]; + let current_price = self.hlcvs[[k, idx, CLOSE]]; let upnl = calc_pnl_long( position.price, current_price, @@ -289,7 +404,7 @@ impl Backtest { } // Calculate unrealized PnL for short positions for (&idx, position) in &self.positions.short { - let current_price = self.hlcs[[k, idx, CLOSE]]; + let current_price = self.hlcvs[[k, idx, CLOSE]]; let upnl = calc_pnl_short( position.price, current_price, @@ -302,6 +417,9 @@ impl Backtest { } fn update_actives(&mut self, k: usize, pside: usize) -> Vec { + // Calculate preferred coins first + let preferred_coins = self.calc_preferred_coins(k, pside); + let (actives, positions, n_positions) = match pside { LONG => ( &mut self.actives.long, @@ -315,23 +433,26 @@ impl Backtest { ), _ => panic!("Invalid pside"), }; + let mut actives_without_pos = Vec::with_capacity(n_positions); actives.clear(); + + // First, add all markets with existing positions for &market_idx in positions.keys() { actives.insert(market_idx); } - // Add additional markets based on preferred_coins - for &market_idx in self.preferred_coins.row(k).iter() { - let market_idx = market_idx as usize; + + // Then, add additional markets based on preferred_coins + for &market_idx in &preferred_coins { if actives.len() < n_positions { if actives.insert(market_idx) { - // Only add to actives_without_pos if it's a new insertion actives_without_pos.push(market_idx); } } else { break; } } + actives_without_pos } @@ -705,21 +826,21 @@ impl Backtest { } else { self.trailing_prices.short.entry(idx).or_default() }; - if self.hlcs[[k, idx, LOW]] < trailing_price_bundle.min_since_open { - trailing_price_bundle.min_since_open = self.hlcs[[k, idx, LOW]]; - trailing_price_bundle.max_since_min = self.hlcs[[k, idx, CLOSE]]; + if self.hlcvs[[k, idx, LOW]] < trailing_price_bundle.min_since_open { + trailing_price_bundle.min_since_open = self.hlcvs[[k, idx, LOW]]; + trailing_price_bundle.max_since_min = self.hlcvs[[k, idx, CLOSE]]; } else { trailing_price_bundle.max_since_min = trailing_price_bundle .max_since_min - .max(self.hlcs[[k, idx, HIGH]]); + .max(self.hlcvs[[k, idx, HIGH]]); } - if self.hlcs[[k, idx, HIGH]] > trailing_price_bundle.max_since_open { - trailing_price_bundle.max_since_open = self.hlcs[[k, idx, HIGH]]; - trailing_price_bundle.min_since_max = self.hlcs[[k, idx, CLOSE]]; + if self.hlcvs[[k, idx, HIGH]] > trailing_price_bundle.max_since_open { + trailing_price_bundle.max_since_open = self.hlcvs[[k, idx, HIGH]]; + trailing_price_bundle.min_since_max = self.hlcvs[[k, idx, CLOSE]]; } else { trailing_price_bundle.min_since_max = trailing_price_bundle .min_since_max - .min(self.hlcs[[k, idx, LOW]]); + .min(self.hlcvs[[k, idx, LOW]]); } } @@ -767,7 +888,7 @@ impl Backtest { qty: -self.positions.long[&idx].size, price: round_( f64::min( - self.hlcs[[k, idx, HIGH]] - self.exchange_params_list[idx].price_step, + self.hlcvs[[k, idx, HIGH]] - self.exchange_params_list[idx].price_step, self.positions.long[&idx].price, ), self.exchange_params_list[idx].price_step, @@ -839,7 +960,7 @@ impl Backtest { qty: self.positions.short[&idx].size.abs(), price: round_( f64::max( - self.hlcs[[k, idx, LOW]] + self.exchange_params_list[idx].price_step, + self.hlcvs[[k, idx, LOW]] + self.exchange_params_list[idx].price_step, self.positions.short[&idx].price, ), self.exchange_params_list[idx].price_step, @@ -900,9 +1021,9 @@ impl Backtest { fn order_filled(&self, k: usize, idx: usize, order: &Order) -> bool { // check if will fill in next candle if order.qty > 0.0 { - self.hlcs[[k, idx, LOW]] < order.price + self.hlcvs[[k, idx, LOW]] < order.price } else if order.qty < 0.0 { - self.hlcs[[k, idx, HIGH]] > order.price + self.hlcvs[[k, idx, HIGH]] > order.price } else { false } @@ -933,7 +1054,7 @@ impl Backtest { > self.bot_params_pair.long.unstuck_threshold { let pprice_diff = - calc_pprice_diff_int(LONG, position.price, self.hlcs[[k, idx, CLOSE]]); + calc_pprice_diff_int(LONG, position.price, self.hlcvs[[k, idx, CLOSE]]); stuck_positions.push((idx, LONG, pprice_diff)); } } @@ -960,8 +1081,11 @@ impl Backtest { if wallet_exposure / self.bot_params_pair.short.wallet_exposure_limit > self.bot_params_pair.short.unstuck_threshold { - let pprice_diff = - calc_pprice_diff_int(SHORT, position.price, self.hlcs[[k, idx, CLOSE]]); + let pprice_diff = calc_pprice_diff_int( + SHORT, + position.price, + self.hlcvs[[k, idx, CLOSE]], + ); stuck_positions.push((idx, SHORT, pprice_diff)); } } @@ -976,7 +1100,7 @@ impl Backtest { match pside { LONG => { let close_price = f64::max( - self.hlcs[[k, idx, CLOSE]], + self.hlcvs[[k, idx, CLOSE]], round_up( self.emas[idx].compute_bands(LONG).upper * (1.0 + self.bot_params_pair.long.unstuck_ema_dist), @@ -1018,7 +1142,7 @@ impl Backtest { } SHORT => { let close_price = f64::min( - self.hlcs[[k, idx, CLOSE]], + self.hlcvs[[k, idx, CLOSE]], round_dn( self.emas[idx].compute_bands(SHORT).lower * (1.0 - self.bot_params_pair.short.unstuck_ema_dist), @@ -1240,7 +1364,7 @@ impl Backtest { #[inline] fn update_emas(&mut self, k: usize) { for i in 0..self.n_markets { - let close_price = self.hlcs[[k, i, CLOSE]]; + let close_price = self.hlcvs[[k, i, CLOSE]]; let long_alphas = &self.ema_alphas.long.alphas; let long_alphas_inv = &self.ema_alphas.long.alphas_inv; @@ -1410,55 +1534,3 @@ fn calc_drawdowns(equity_series: &[f64]) -> Vec { .map(|(&ret, &max)| (ret - max) / max) .collect() } - -pub fn calc_noisiness(hlcs: &Array3, window: usize) -> Array2 { - let (n_minutes, n_coins, _) = hlcs.dim(); - - // Calculate Normalized Relative Range (NRR) - let nrrs = - (&hlcs.slice(s![.., .., 0]) - &hlcs.slice(s![.., .., 1])) / &hlcs.slice(s![.., .., 2]); - - let mut noisiness = Array2::::zeros((n_minutes, n_coins)); - let mut sums = vec![0.0; n_coins]; - - for i in 1..n_minutes { - let idx_start = i.saturating_sub(window); - - for j in 0..n_coins { - sums[j] += nrrs[[i - 1, j]]; - - if idx_start > 0 { - sums[j] -= nrrs[[idx_start - 1, j]]; - noisiness[[i, j]] = sums[j] / window as f64; - } else { - noisiness[[i, j]] = sums[j] / i as f64; - } - } - } - noisiness -} - -pub fn calc_volumes(hlcvs: &Array3, window: usize) -> Array2 { - let (n_minutes, n_coins, _) = hlcvs.dim(); - - // Calculate volume in quote currency (close * volume) - let quote_volumes = &hlcvs.slice(s![.., .., 2]) * &hlcvs.slice(s![.., .., 3]); - - let mut rolling_volumes = Array2::::zeros((n_minutes, n_coins)); - let mut sums = vec![0.0; n_coins]; - - for i in 0..n_minutes { - let idx_start = i.saturating_sub(window); - for j in 0..n_coins { - sums[j] += quote_volumes[[i, j]]; - if i >= window { - sums[j] -= quote_volumes[[idx_start, j]]; - rolling_volumes[[i, j]] = sums[j]; - } else { - rolling_volumes[[i, j]] = sums[j]; - } - } - } - - rolling_volumes -} diff --git a/passivbot-rust/src/constants.rs b/passivbot-rust/src/constants.rs index fc495db0a..b64b12186 100644 --- a/passivbot-rust/src/constants.rs +++ b/passivbot-rust/src/constants.rs @@ -1,6 +1,7 @@ pub const HIGH: usize = 0; pub const LOW: usize = 1; pub const CLOSE: usize = 2; +pub const VOLUME: usize = 3; pub const LONG: usize = 3; pub const SHORT: usize = 4; diff --git a/passivbot-rust/src/lib.rs b/passivbot-rust/src/lib.rs index ca533bce0..04ec1fe0d 100644 --- a/passivbot-rust/src/lib.rs +++ b/passivbot-rust/src/lib.rs @@ -41,7 +41,5 @@ fn passivbot_rust(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(calc_closes_long_py, m)?)?; m.add_function(wrap_pyfunction!(calc_closes_short_py, m)?)?; m.add_function(wrap_pyfunction!(run_backtest, m)?)?; - m.add_function(wrap_pyfunction!(calc_noisiness_py, m)?)?; - m.add_function(wrap_pyfunction!(calc_volumes_py, m)?)?; Ok(()) } diff --git a/passivbot-rust/src/python.rs b/passivbot-rust/src/python.rs index 1b3db2871..dc4983e7f 100644 --- a/passivbot-rust/src/python.rs +++ b/passivbot-rust/src/python.rs @@ -1,4 +1,4 @@ -use crate::backtest::{analyze_backtest, calc_noisiness, calc_volumes, Backtest}; +use crate::backtest::{analyze_backtest, Backtest}; use crate::closes::{ calc_closes_long, calc_closes_short, calc_grid_close_long, calc_next_close_long, calc_next_close_short, calc_trailing_close_long, @@ -21,54 +21,14 @@ use pyo3::prelude::*; use pyo3::types::{PyDict, PyList}; use pyo3::wrap_pyfunction; -#[pyfunction] -pub fn calc_volumes_py(hlcvs: PyReadonlyArray3, window: usize) -> PyResult>> { - // Convert PyReadonlyArray3 to owned Array3 - let hlcvs_rust: Array3 = hlcvs.as_array().to_owned(); - - // Call the existing calc_volumes function - let volumes = calc_volumes(&hlcvs_rust, window); - - // Convert the result back to a PyArray - Python::with_gil(|py| Ok(volumes.into_pyarray(py).to_owned())) -} - -#[pyfunction] -pub fn calc_noisiness_py( - hlcs: PyReadonlyArray3, - window: usize, -) -> PyResult>> { - // Convert PyReadonlyArray3 to owned Array3 - let hlcs_rust: Array3 = hlcs.as_array().to_owned(); - - // Call the existing calc_noisiness function - let noisiness = calc_noisiness(&hlcs_rust, window); - - // Convert the result back to a PyArray - Python::with_gil(|py| Ok(noisiness.into_pyarray(py).to_owned())) -} - #[pyfunction] pub fn run_backtest( - hlcs: PyReadonlyArray3, - preferred_coins: &PyAny, + hlcvs: PyReadonlyArray3, bot_params_pair_dict: &PyDict, exchange_params_list: &PyAny, backtest_params_dict: &PyDict, ) -> PyResult<(Py>, Py>, Py)> { - let hlcs_rust = hlcs.as_array(); - - let preferred_coins_rust: Array2 = - if let Ok(arr) = preferred_coins.downcast::>() { - unsafe { arr.as_array().to_owned() } - } else if let Ok(arr) = preferred_coins.downcast::>() { - let preferred_coins_i64: ArrayBase<_, _> = unsafe { arr.as_array() }; - preferred_coins_i64.mapv(|x| x as i32) - } else { - return Err(PyValueError::new_err( - "Unsupported data type for preferred_coins", - )); - }; + let hlcvs_rust = hlcvs.as_array(); let bot_params_pair = bot_params_pair_from_dict(bot_params_pair_dict)?; let exchange_params = { @@ -95,8 +55,7 @@ pub fn run_backtest( let backtest_params = backtest_params_from_dict(backtest_params_dict)?; let mut backtest = Backtest::new( - hlcs_rust.to_owned(), - preferred_coins_rust, + hlcvs_rust.to_owned(), bot_params_pair, exchange_params, &backtest_params, @@ -186,6 +145,11 @@ fn bot_params_from_dict(dict: &PyDict) -> PyResult { entry_trailing_retracement_pct: extract_value(dict, "entry_trailing_retracement_pct")?, entry_trailing_grid_ratio: extract_value(dict, "entry_trailing_grid_ratio")?, entry_trailing_threshold_pct: extract_value(dict, "entry_trailing_threshold_pct")?, + filter_rolling_window: { + let filter_rolling_window_float: f64 = extract_value(dict, "filter_rolling_window")?; + filter_rolling_window_float.round() as usize + }, + filter_relative_volume_clip_pct: extract_value(dict, "filter_relative_volume_clip_pct")?, ema_span_0: extract_value(dict, "ema_span_0")?, ema_span_1: extract_value(dict, "ema_span_1")?, n_positions: { diff --git a/passivbot-rust/src/types.rs b/passivbot-rust/src/types.rs index c6cd13245..98714ba4b 100644 --- a/passivbot-rust/src/types.rs +++ b/passivbot-rust/src/types.rs @@ -110,6 +110,8 @@ pub struct BotParams { pub entry_trailing_retracement_pct: f64, pub entry_trailing_grid_ratio: f64, pub entry_trailing_threshold_pct: f64, + pub filter_rolling_window: usize, + pub filter_relative_volume_clip_pct: f64, pub ema_span_0: f64, pub ema_span_1: f64, pub n_positions: usize, From 66379438900cf1c7fd1f99abc434230324099b8e Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sun, 22 Sep 2024 14:36:13 -0400 Subject: [PATCH 04/32] preferred coins are computed inside rust backtester --- src/backtest.py | 72 ++++++++----------------------------------------- 1 file changed, 11 insertions(+), 61 deletions(-) diff --git a/src/backtest.py b/src/backtest.py index cc6e38bfe..dcfdb5d7e 100644 --- a/src/backtest.py +++ b/src/backtest.py @@ -54,7 +54,7 @@ def process_forager_fills(fills): return fdf -def analyze_fills_forager(symbols, hlcs, fdf, equities): +def analyze_fills_forager(symbols, hlcvs, fdf, equities): analysis = {} pnls = {} for pside in ["long", "short"]: @@ -163,22 +163,20 @@ def prep_backtest_args(config, mss, exchange_params=None, backtest_params=None): return bot_params, exchange_params, backtest_params -def run_backtest(hlcs, preferred_coins, mss, config: dict): +def run_backtest(hlcvs, mss, config: dict): bot_params, exchange_params, backtest_params = prep_backtest_args(config, mss) print(f"Starting backtest...") sts = utc_ms() - fills, equities, analysis = pbr.run_backtest( - hlcs, preferred_coins, bot_params, exchange_params, backtest_params - ) + fills, equities, analysis = pbr.run_backtest(hlcvs, bot_params, exchange_params, backtest_params) print(f"seconds elapsed for backtest: {(utc_ms() - sts) / 1000:.4f}") return fills, equities, analysis -def post_process(config, hlcs, fills, equities, analysis, results_path): +def post_process(config, hlcvs, fills, equities, analysis, results_path): sts = utc_ms() fdf = process_forager_fills(fills) equities = pd.Series(equities) - analysis_py, bal_eq = analyze_fills_forager(config["backtest"]["symbols"], hlcs, fdf, equities) + analysis_py, bal_eq = analyze_fills_forager(config["backtest"]["symbols"], hlcvs, fdf, equities) for k in analysis_py: if k not in analysis: analysis[k] = analysis_py[k] @@ -191,10 +189,10 @@ def post_process(config, hlcs, fills, equities, analysis, results_path): config["analysis"] = analysis dump_config(config, f"{results_path}config.json") fdf.to_csv(f"{results_path}fills.csv") - plot_forager(results_path, config["backtest"]["symbols"], fdf, bal_eq, hlcs) + plot_forager(results_path, config["backtest"]["symbols"], fdf, bal_eq, hlcvs) -def plot_forager(results_path, symbols: [str], fdf: pd.DataFrame, bal_eq, hlcs): +def plot_forager(results_path, symbols: [str], fdf: pd.DataFrame, bal_eq, hlcvs): plots_dir = make_get_filepath(oj(results_path, "fills_plots", "")) plt.clf() bal_eq.plot() @@ -203,10 +201,10 @@ def plot_forager(results_path, symbols: [str], fdf: pd.DataFrame, bal_eq, hlcs): for i, symbol in enumerate(symbols): try: print(f"Plotting fills for {symbol}") - hlcs_df = pd.DataFrame(hlcs[:, i, :], columns=["high", "low", "close"]) + hlcvs_df = pd.DataFrame(hlcvs[:, i, :3], columns=["high", "low", "close"]) fdfc = fdf[fdf.symbol == symbol] plt.clf() - plot_fills_forager(fdfc, hlcs_df) + plot_fills_forager(fdfc, hlcvs_df) plt.title(f"Fills {symbol}") plt.xlabel = "time" plt.ylabel = "price" @@ -215,50 +213,6 @@ def plot_forager(results_path, symbols: [str], fdf: pd.DataFrame, bal_eq, hlcs): print(f"Error plotting {symbol} {e}") -def calc_preferred_coins(hlcvs, config): - w_size = config["live"]["ohlcv_rolling_window"] - n_coins = hlcvs.shape[1] - - # Calculate noisiness indices - noisiness_indices = np.argsort(-pbr.calc_noisiness_py(hlcvs[:, :, :3], w_size)) - - # Calculate volume-based eligibility - if config["live"]["relative_volume_filter_clip_pct"] > 0.0: - n_eligibles = int(round(n_coins * (1 - config["live"]["relative_volume_filter_clip_pct"]))) - - for pside in ["long", "short"]: - if ( - config["bot"][pside]["n_positions"] > 0.0 - and config["bot"][pside]["total_wallet_exposure_limit"] > 0.0 - ): - n_eligibles = max(n_eligibles, int(round(config["bot"][pside]["n_positions"]))) - - if n_eligibles < n_coins: - # Calculate rolling volumes and get volume-based ranking - rolling_volumes = pbr.calc_volumes_py(hlcvs, w_size) - volume_ranking = np.argsort(-rolling_volumes, axis=1) - - # Create a mask for eligible coins based on volume (vectorized) - rows = np.arange(hlcvs.shape[0])[:, None] - cols = volume_ranking[:, :n_eligibles] - eligibility_mask = np.zeros((hlcvs.shape[0], n_coins), dtype=bool) - eligibility_mask[rows, cols] = True - - # Filter noisiness_indices based on eligibility - filtered_noisiness_indices = np.array( - [ - indices[mask] - for indices, mask in zip( - noisiness_indices, eligibility_mask[rows, noisiness_indices] - ) - ] - ) - - return filtered_noisiness_indices - - return noisiness_indices - - async def main(): manage_rust_compilation() logging.basicConfig( @@ -274,8 +228,6 @@ async def main(): "approved_coins", "ignored_coins", "minimum_coin_age_days", - "ohlcv_rolling_window", - "relative_volume_filter_clip_pct", } for key in sorted(template_config["live"]): if key not in keep_live_keys: @@ -287,10 +239,8 @@ async def main(): config = format_config(config) symbols, hlcvs, mss, results_path = await prepare_hlcvs_mss(config) config["backtest"]["symbols"] = symbols - preferred_coins = calc_preferred_coins(hlcvs, config) - hlcs = hlcvs[:, :, :3] - fills, equities, analysis = run_backtest(hlcs, preferred_coins, mss, config) - post_process(config, hlcs, fills, equities, analysis, results_path) + fills, equities, analysis = run_backtest(hlcvs, mss, config) + post_process(config, hlcvs, fills, equities, analysis, results_path) if __name__ == "__main__": From 9750f3b1a132f9814f0cac272a0d831ddbf848a7 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sun, 22 Sep 2024 14:36:23 -0400 Subject: [PATCH 05/32] volume is in quote --- src/downloader.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/downloader.py b/src/downloader.py index c217ae0a2..5c9863bb8 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -1520,6 +1520,9 @@ def unify_hlcv_data(hlcv_list) -> (np.ndarray, np.ndarray): # Extract the required data (high, low, close, volume) coin_data = ohlcv[:, 1:] + # Use quote volume as volume + coin_data[:, 3] = coin_data[:, 2] * coin_data[:, 3] + # Place the data in the unified array unified_array[start_idx:end_idx, i, :] = coin_data From 571b2c7f190808e8d61908448c85efd11914b48f Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sun, 22 Sep 2024 14:36:41 -0400 Subject: [PATCH 06/32] cleanup bybit fetch pnls --- src/exchanges/bybit.py | 129 ++++++++++++----------------------------- 1 file changed, 36 insertions(+), 93 deletions(-) diff --git a/src/exchanges/bybit.py b/src/exchanges/bybit.py index 927c2376c..5d82f254e 100644 --- a/src/exchanges/bybit.py +++ b/src/exchanges/bybit.py @@ -293,7 +293,9 @@ async def fetch_pnls_sub( start_time: int = None, end_time: int = None, ): - if start_time is not None: + if start_time is None: + pnls = await self.fetch_pnl(start_time=start_time, end_time=end_time) + else: week = 1000 * 60 * 60 * 24 * 7 pnls = [] if end_time is None: @@ -310,9 +312,7 @@ async def fetch_pnls_sub( if sts <= start_time: break i += 1 - logging.info(f"fetching pnls for more than a week {ts_to_date_utc(sts)}") - else: - pnls = await self.fetch_pnl(start_time=start_time, end_time=end_time) + logging.info(f"fetched pnls for more than a week {ts_to_date_utc(sts)}") return sorted(pnls, key=lambda x: x["timestamp"]) async def fetch_pnl( @@ -332,38 +332,44 @@ async def fetch_pnl( params["startTime"] = int(start_time) if end_time is not None: params["endTime"] = int(end_time) - fetched = await self.cca.private_get_v5_position_closed_pnl(params) - fetched["result"]["list"] = sorted( - floatify(fetched["result"]["list"]), key=lambda x: x["updatedTime"] - ) + fetched = (await self.cca.private_get_v5_position_closed_pnl(params))["result"] while True: - if fetched["result"]["list"] == []: + fetched["list"] = sorted( + floatify(fetched["list"]), key=lambda x: float(x["updatedTime"]) + ) + for i in range(len(fetched["list"])): + fetched["list"][i]["timestamp"] = float(fetched["list"][i]["updatedTime"]) + fetched["list"][i]["symbol"] = self.get_symbol_id_inv( + fetched["list"][i]["symbol"] + ) + fetched["list"][i]["pnl"] = float(fetched["list"][i]["closedPnl"]) + fetched["list"][i]["side"] = fetched["list"][i]["side"].lower() + fetched["list"][i]["position_side"] = ( + "long" if fetched["list"][i]["side"] == "sell" else "short" + ) + if fetched["list"] == []: break if ( - fetched["result"]["list"][0]["orderId"] in ids_seen - and fetched["result"]["list"][-1]["orderId"] in ids_seen + fetched["list"][0]["orderId"] in ids_seen + and fetched["list"][-1]["orderId"] in ids_seen ): break - all_pnls.extend(fetched["result"]["list"]) - for elm in fetched["result"]["list"]: + all_pnls.extend(fetched["list"]) + for elm in fetched["list"]: ids_seen.add(elm["orderId"]) if start_time is None: break - if fetched["result"]["list"][0]["updatedTime"] <= start_time: + if fetched["list"][0]["updatedTime"] <= start_time: break - if not fetched["result"]["nextPageCursor"]: + if not fetched["nextPageCursor"]: + break + if len(fetched["list"]) < limit: break logging.info( - f"fetching pnls {ts_to_date_utc(fetched['result']['list'][-1]['updatedTime'])}" - ) - params["cursor"] = fetched["result"]["nextPageCursor"] - fetched = await self.cca.private_get_v5_position_closed_pnl(params) - fetched["result"]["list"] = sorted( - floatify(fetched["result"]["list"]), key=lambda x: x["updatedTime"] + f"fetched pnls from {ts_to_date_utc(fetched['list'][-1]['updatedTime'])} n pnls: {len(fetched['list'])}" ) - for i in range(len(all_pnls)): - all_pnls[i]["pnl"] = all_pnls[i]["closedPnl"] - all_pnls[i]["timestamp"] = all_pnls[i]["updatedTime"] + params["cursor"] = fetched["nextPageCursor"] + fetched = (await self.cca.private_get_v5_position_closed_pnl(params))["result"] return sorted(all_pnls, key=lambda x: x["updatedTime"]) except Exception as e: logging.error(f"error fetching pnls {e}") @@ -371,54 +377,7 @@ async def fetch_pnl( traceback.print_exc() return [] - async def fetch_fills_sub_sub(self, start_time=None, end_time=None): - assert start_time is not None - params = {"limit": 100} - all_fetched = [] - week = 1000 * 60 * 60 * 24 * 7 - fetch_windows = [ - (i, min(i + week, end_time)) for i in range(int(start_time), int(end_time), int(week)) - ] - results = await asyncio.gather( - *[ - self.cca.fetch_my_trades(params={"paginate": True, "endTime": int(ets)}) - for sts, ets in fetch_windows - ] - ) - result = sorted(flatten(results), key=lambda x: x["timestamp"]) - return result - if start_time and end_time and end_time - start_time > week: - start_end_times = [start_time] - result = await self.cca.fetch_my_trades( - since=int(start_time) if start_time else start_time, params=params - ) - return sorted(result, key=lambda x: x["timestamp"]) - - async def fetch_fills_sub(self, start_time=None, end_time=None): - if start_time is None: - result = await self.cca.fetch_my_trades() - return sorted(result, key=lambda x: x["timestamp"]) - if end_time is None: - end_time = int(self.get_exchange_time() + 1000 * 60 * 60 * 24) - all_fetched_fills = [] - for _ in range(100): - fills = await self.cca.fetch_my_trades( - params={"paginate": True, "endTime": int(end_time)} - ) - if not fills: - break - all_fetched_fills += fills - if fills[0]["timestamp"] <= start_time: - break - logging.info( - f"fetched fills: {fills[0]['datetime']} {fills[-1]['datetime']} {len(fills)}" - ) - end_time = fills[0]["timestamp"] - else: - logging.error(f"more than 100 calls to ccxt fetch_my_trades") - return sorted(all_fetched_fills, key=lambda x: x["timestamp"]) - - async def fetch_fills2_sub_sub(self, start_time, end_time, limit=None): + async def fetch_fills(self, start_time, end_time, limit=None): if start_time is None: result = await self.cca.fetch_my_trades() return sorted(result, key=lambda x: x["timestamp"]) @@ -436,39 +395,23 @@ async def fetch_fills2_sub_sub(self, start_time, end_time, limit=None): if fills[0]["timestamp"] <= start_time: break logging.info( - f"fetched fills: {fills[0]['datetime']} {fills[-1]['datetime']} {len(fills)}" + f"fetched fills from {fills[0]['datetime']} to {fills[-1]['datetime']} n fills: {len(fills)}" ) end_time = fills[0]["timestamp"] + limit = 1000 else: logging.error(f"more than 100 calls to ccxt fetch_my_trades") return sorted(all_fetched_fills, key=lambda x: x["timestamp"]) - async def fetch_fills2_sub(self, start_time, end_time): - if start_time is None: - result = await self.cca.fetch_my_trades() - return sorted(result, key=lambda x: x["timestamp"]) - if end_time is None: - end_time = int(self.get_exchange_time() + 1000 * 60 * 60 * 24) - params = {"limit": 100} - all_fetched = [] - week = 1000 * 60 * 60 * 24 * 7 - fetch_windows = [ - (i, min(i + week, end_time)) for i in range(int(start_time), int(end_time), int(week)) - ] - results = await asyncio.gather( - *[self.fetch_fills2_sub_sub(sts, ets) for sts, ets in fetch_windows] - ) - result = sorted(flatten(results), key=lambda x: x["timestamp"]) - return result - async def fetch_pnls(self, start_time=None, end_time=None, limit=None): # fetch fills first, then pnls (bybit has them in separate endpoints) if start_time: if self.get_exchange_time() - start_time < 1000 * 60 * 60 * 4 and limit == 100: + # set start time to None (fetch latest) if start time is recent start_time = None - fills = await self.fetch_fills2_sub_sub(start_time=start_time, end_time=end_time, limit=limit) + fills = await self.fetch_fills(start_time=start_time, end_time=end_time, limit=limit) if start_time: - fills = [x for x in fills if x["timestamp"] >= start_time - 1000 * 60 * 60 * 4] + fills = [x for x in fills if x["timestamp"] >= start_time - 1000 * 60 * 60] if not fills: return [] start_time = fills[0]["timestamp"] From 95faed09cefe150c842c63a2234e3eb010645fae Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sun, 22 Sep 2024 14:36:59 -0400 Subject: [PATCH 07/32] update for new filter params --- src/optimize.py | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/src/optimize.py b/src/optimize.py index 4ce20c01f..0fae3a202 100644 --- a/src/optimize.py +++ b/src/optimize.py @@ -9,7 +9,6 @@ from backtest import ( prepare_hlcvs_mss, prep_backtest_args, - calc_preferred_coins, ) from pure_funcs import ( get_template_live_config, @@ -146,26 +145,15 @@ def config_to_individual(config): class Evaluator: - def __init__(self, hlcs, preferred_coins, config, mss): - self.hlcs = hlcs - self.shared_hlcs = shared_memory.SharedMemory(create=True, size=self.hlcs.nbytes) - self.shared_hlcs_np = np.ndarray( - self.hlcs.shape, dtype=self.hlcs.dtype, buffer=self.shared_hlcs.buf + def __init__(self, hlcvs, config, mss): + self.hlcvs = hlcvs + self.shared_hlcvs = shared_memory.SharedMemory(create=True, size=self.hlcvs.nbytes) + self.shared_hlcvs_np = np.ndarray( + self.hlcvs.shape, dtype=self.hlcvs.dtype, buffer=self.shared_hlcvs.buf ) - np.copyto(self.shared_hlcs_np, self.hlcs) - del self.hlcs + np.copyto(self.shared_hlcvs_np, self.hlcvs) + del self.hlcvs - self.preferred_coins = preferred_coins - self.shared_preferred_coins = shared_memory.SharedMemory( - create=True, size=self.preferred_coins.nbytes - ) - self.shared_preferred_coins_np = np.ndarray( - self.preferred_coins.shape, - dtype=self.preferred_coins.dtype, - buffer=self.shared_preferred_coins.buf, - ) - np.copyto(self.shared_preferred_coins_np, self.preferred_coins) - del self.preferred_coins self.config = config _, self.exchange_params, self.backtest_params = prep_backtest_args(config, mss) @@ -176,8 +164,7 @@ def evaluate(self, individual): config, [], exchange_params=self.exchange_params, backtest_params=self.backtest_params ) fills, equities, analysis = pbr.run_backtest( - self.shared_hlcs_np, - self.shared_preferred_coins_np, + self.shared_hlcvs_np, bot_params, self.exchange_params, self.backtest_params, @@ -208,10 +195,8 @@ def calc_fitness(self, analysis): def cleanup(self): # Close and unlink the shared memory - self.shared_hlcs.close() - self.shared_hlcs.unlink() - self.shared_preferred_coins.close() - self.shared_preferred_coins.unlink() + self.shared_hlcvs.close() + self.shared_hlcvs.unlink() def add_extra_options(parser): @@ -290,8 +275,6 @@ async def main(): config = format_config(config) symbols, hlcvs, mss, results_path = await prepare_hlcvs_mss(config) config["backtest"]["symbols"] = symbols - preferred_coins = calc_preferred_coins(hlcvs, config) - hlcs = hlcvs[:, :, :3] date_fname = ts_to_date_utc(utc_ms())[:19].replace(":", "_") coins = [symbol_to_coin(s) for s in config["backtest"]["symbols"]] coins_fname = "_".join(coins) if len(coins) <= 6 else f"{len(coins)}_coins" @@ -300,7 +283,7 @@ async def main(): f"optimize_results/{date_fname}_{coins_fname}_{hash_snippet}_all_results.txt" ) try: - evaluator = Evaluator(hlcs, preferred_coins, config, mss) + evaluator = Evaluator(hlcvs, config, mss) creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -1.0)) # Minimize both objectives creator.create("Individual", list, fitness=creator.FitnessMulti) From e87a13c38acf649dee11a0fb3fe5d6254360c575 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sun, 22 Sep 2024 14:37:12 -0400 Subject: [PATCH 08/32] update for new filter params --- src/procedures.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/procedures.py b/src/procedures.py index a62849ba8..16a860f6a 100644 --- a/src/procedures.py +++ b/src/procedures.py @@ -134,17 +134,37 @@ def format_config(config: dict, verbose=True) -> dict: result = deepcopy(config["config"]) else: raise Exception(f"failed to format config") - for k0, v0, v1 in [("close_trailing_qty_pct", 1.0, [0.05, 1.0])]: + for k0, v0, v1 in [ + ("close_trailing_qty_pct", 1.0, [0.05, 1.0]), + ( + "filter_rolling_window", + ( + result["live"]["ohlcv_rolling_window"] + if "ohlcv_rolling_window" in result["live"] + else 60.0 + ), + [10.0, 1440.0], + ), + ( + "filter_relative_volume_clip_pct", + ( + result["live"]["relative_volume_filter_clip_pct"] + if "relative_volume_filter_clip_pct" in result["live"] + else 0.5 + ), + [0.0, 1.0], + ), + ]: for pside in ["long", "short"]: if k0 not in result["bot"][pside]: result["bot"][pside][k0] = v0 if verbose: - print(f"adding missing parameter {k0}: {v0}") + print(f"adding missing backtest parameter {pside} {k0}: {v0}") opt_key = f"{pside}_{k0}" if opt_key not in result["optimize"]["bounds"]: result["optimize"]["bounds"][opt_key] = v1 if verbose: - print(f"adding missing parameter {opt_key}: {v1}") + print(f"adding missing optimize parameter {pside} {opt_key}: {v1}") for k0, src, dst in [ ("live", "minimum_market_age_days", "minimum_coin_age_days"), ("live", "noisiness_rolling_mean_window_size", "ohlcv_rolling_window"), @@ -156,7 +176,6 @@ def format_config(config: dict, verbose=True) -> dict: del result[k0][src] for k0, k1, v in [ ("live", "time_in_force", "good_till_cancelled"), - ("live", "ohlcv_rolling_window", 60), ("optimize", "scoring", ["mdg", "sharpe_ratio"]), ]: if k1 not in result[k0]: From 70a5608f9bd7c6ed6c264a2886be57d88ccde08d Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sun, 22 Sep 2024 14:37:27 -0400 Subject: [PATCH 09/32] new filter params --- src/pure_funcs.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pure_funcs.py b/src/pure_funcs.py index 7d14af16c..5560d7855 100644 --- a/src/pure_funcs.py +++ b/src/pure_funcs.py @@ -523,6 +523,8 @@ def get_template_live_config(passivbot_mode="neat_grid"): "entry_trailing_grid_ratio": 0.5, "entry_trailing_retracement_pct": 0.01, "entry_trailing_threshold_pct": 0.05, + "filter_rolling_window": 60, + "filter_relative_volume_clip_pct": 0.95, "n_positions": 10.0, "total_wallet_exposure_limit": 1.7, "unstuck_close_pct": 0.001, @@ -548,6 +550,8 @@ def get_template_live_config(passivbot_mode="neat_grid"): "entry_trailing_grid_ratio": 0.5, "entry_trailing_retracement_pct": 0.01, "entry_trailing_threshold_pct": 0.05, + "filter_rolling_window": 60, + "filter_relative_volume_clip_pct": 0.95, "n_positions": 10.0, "total_wallet_exposure_limit": 1.7, "unstuck_close_pct": 0.001, @@ -569,10 +573,8 @@ def get_template_live_config(passivbot_mode="neat_grid"): "max_n_cancellations_per_batch": 5, "max_n_creations_per_batch": 3, "minimum_coin_age_days": 7.0, - "ohlcv_rolling_window": 60, "pnls_max_lookback_days": 30.0, "price_distance_threshold": 0.002, - "relative_volume_filter_clip_pct": 0.1, "time_in_force": "good_till_cancelled", "user": "bybit_01", }, From 9b003ded5da493be3c64332587b3934ea13f4e0e Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sun, 22 Sep 2024 14:37:55 -0400 Subject: [PATCH 10/32] up version --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f9c9842e6..1cb15828c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ :warning: **Used at one's own risk** :warning: -v7.0.7 +v7.1.0 ## Overview From 4c38f1a8e198f32af9d732556da409b9fb9cdea8 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Wed, 25 Sep 2024 15:18:01 -0400 Subject: [PATCH 11/32] cleanup rust backtest --- passivbot-rust/src/backtest.rs | 177 +++++++++++++-------------------- 1 file changed, 71 insertions(+), 106 deletions(-) diff --git a/passivbot-rust/src/backtest.rs b/passivbot-rust/src/backtest.rs index e14c0c55c..48b85dd92 100644 --- a/passivbot-rust/src/backtest.rs +++ b/passivbot-rust/src/backtest.rs @@ -121,19 +121,18 @@ pub struct TradingEnabled { short: bool, } -pub struct PreferredCoins { - long: Vec, - short: Vec, +pub struct RollingVolumeSum { + long: HashMap, + short: HashMap, } pub struct Backtest { - hlcvs: Array3, // 3D array: (n_timesteps, n_markets, 4) - preferred_coins: PreferredCoins, + hlcvs: Array3, // 3D array: (n_timesteps, n_coins, 4) bot_params_pair: BotParamsPair, exchange_params_list: Vec, backtest_params: BacktestParams, balance: f64, - n_markets: usize, + n_coins: usize, ema_alphas: EmaAlphas, emas: Vec, positions: Positions, @@ -151,7 +150,9 @@ pub struct Backtest { delist_timestamps: HashMap, did_fill_long: HashSet, did_fill_short: HashSet, - rolling_volumes: Vec>, + n_eligible_long: usize, + n_eligible_short: usize, + rolling_volume_sum: RollingVolumeSum, } impl Backtest { @@ -162,16 +163,16 @@ impl Backtest { backtest_params: &BacktestParams, ) -> Self { let n_timesteps = hlcvs.shape()[0]; - let n_markets = hlcvs.shape()[1]; - let max_window = bot_params_pair - .long - .filter_rolling_window - .max(bot_params_pair.short.filter_rolling_window); - - // Initialize rolling_volumes with zeros - let rolling_volumes = vec![vec![0.0; n_markets]; n_timesteps]; - - let initial_emas = (0..n_markets) + let n_coins = hlcvs.shape()[1]; + let n_eligible_long = bot_params_pair.long.n_positions.max( + (n_coins as f64 * (1.0 - bot_params_pair.long.filter_relative_volume_clip_pct)).round() + as usize, + ); + let n_eligible_short = bot_params_pair.short.n_positions.max( + (n_coins as f64 * (1.0 - bot_params_pair.short.filter_relative_volume_clip_pct)).round() + as usize, + ); + let initial_emas = (0..n_coins) .map(|i| { let close_price = hlcvs[[0, i, CLOSE]]; EMAs { @@ -180,23 +181,18 @@ impl Backtest { } }) .collect(); - let preferred_coins = PreferredCoins { - long: Vec::::new(), - short: Vec::::new(), - }; let mut equities = Vec::::new(); equities.push(backtest_params.starting_balance); let mut bot_params_pair_cloned = bot_params_pair.clone(); - bot_params_pair_cloned.long.n_positions = n_markets.min(bot_params_pair.long.n_positions); - bot_params_pair_cloned.short.n_positions = n_markets.min(bot_params_pair.short.n_positions); - let mut backtest = Backtest { + bot_params_pair_cloned.long.n_positions = n_coins.min(bot_params_pair.long.n_positions); + bot_params_pair_cloned.short.n_positions = n_coins.min(bot_params_pair.short.n_positions); + Backtest { hlcvs, - preferred_coins, bot_params_pair: bot_params_pair_cloned, exchange_params_list, backtest_params: backtest_params.clone(), balance: backtest_params.starting_balance, - n_markets, + n_coins, ema_alphas: calc_ema_alphas(&bot_params_pair), emas: initial_emas, positions: Positions::default(), @@ -224,60 +220,16 @@ impl Backtest { delist_timestamps: HashMap::new(), did_fill_long: HashSet::new(), did_fill_short: HashSet::new(), - rolling_volumes, - }; - backtest.initialize_rolling_volumes(max_window); - backtest - } - - fn initialize_rolling_volumes(&mut self, max_window: usize) { - let n_markets = self.hlcvs.shape()[1]; - let n_timesteps = self.hlcvs.shape()[0]; - - for k in 0..n_timesteps { - let start = k.saturating_sub(max_window - 1); - for i in 0..n_markets { - // Update rolling volume - self.rolling_volumes[k][i] = self.hlcvs.slice(s![start..=k, i, VOLUME]).sum(); - } - } - } - - fn update_rolling_volumes(&mut self, k: usize) { - let n_markets = self.hlcvs.shape()[1]; - let max_window = self - .bot_params_pair - .long - .filter_rolling_window - .max(self.bot_params_pair.short.filter_rolling_window); - - if k >= max_window { - let old_k = k - max_window; - for i in 0..n_markets { - self.rolling_volumes[k][i] = self.rolling_volumes[k - 1][i] - + self.hlcvs[[k, i, VOLUME]] - - self.hlcvs[[old_k, i, VOLUME]]; - } - } else { - // For the first max_window steps, we need to recalculate the full sum - let start = 0; - for i in 0..n_markets { - self.rolling_volumes[k][i] = self.hlcvs.slice(s![start..=k, i, VOLUME]).sum(); - } + n_eligible_long, + n_eligible_short, + rolling_volume_sum: RollingVolumeSum { + long: HashMap::new(), + short: HashMap::new(), + }, } } - fn calc_noisiness(&self, k: usize, idx: usize, window: usize) -> f64 { - let start = k.saturating_sub(window - 1); - let slice = self.hlcvs.slice(s![start..=k, idx, ..]); - let nrr_sum: f64 = slice - .axis_iter(Axis(0)) - .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE]) - .sum(); - nrr_sum / (k - start + 1) as f64 - } - - fn calc_preferred_coins(&self, k: usize, pside: usize) -> Vec { + pub fn calc_preferred_coins(&self, k: usize, pside: usize) -> Vec { let bot_params = match pside { LONG => &self.bot_params_pair.long, SHORT => &self.bot_params_pair.short, @@ -285,12 +237,14 @@ impl Backtest { }; let n_coins = self.hlcvs.shape()[1]; + let start_idx = k.saturating_sub(bot_params.filter_rolling_window); - // Use pre-computed rolling volumes - let mut volume_sums: Vec<(usize, f64)> = self.rolling_volumes[k] - .iter() - .enumerate() - .map(|(idx, &sum)| (idx, sum)) + // Calculate volume sums + let mut volume_sums: Vec<(usize, f64)> = (0..n_coins) + .map(|idx| { + let sum = self.hlcvs.slice(s![start_idx..k, idx, VOLUME]).sum(); + (idx, sum) + }) .collect(); // Sort by volume in descending order @@ -301,19 +255,22 @@ impl Backtest { (n_coins as f64 * (1.0 - bot_params.filter_relative_volume_clip_pct)).round() as usize, ); let filtered_indices: Vec = volume_sums - .iter() + .into_iter() .take(n_eligible) - .map(|&(idx, _)| idx) + .map(|(idx, _)| idx) .collect(); - // Calculate noisiness on-the-fly for filtered coins + // Calculate noisiness let mut noisiness: Vec<(usize, f64)> = filtered_indices .into_iter() .map(|idx| { - ( - idx, - self.calc_noisiness(k, idx, bot_params.filter_rolling_window), - ) + let slice = self.hlcvs.slice(s![start_idx..k, idx, ..]); + let nrr_sum: f64 = slice + .axis_iter(Axis(0)) + .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE]) + .sum(); + let mean_nrr = nrr_sum / (k - start_idx) as f64; + (idx, mean_nrr) }) .collect(); @@ -328,7 +285,7 @@ impl Backtest { let check_points: Vec = (0..7).map(|i| i * 60 * 24).collect(); let n_timesteps = self.hlcvs.shape()[0]; - for idx in 0..self.n_markets { + for idx in 0..self.n_coins { self.trailing_prices .long .insert(idx, TrailingPriceBundle::default()); @@ -360,7 +317,6 @@ impl Backtest { } } for k in 1..(n_timesteps - 1) { - self.update_rolling_volumes(k); self.check_for_fills(k); self.update_emas(k); self.update_open_orders(k); @@ -417,32 +373,41 @@ impl Backtest { } fn update_actives(&mut self, k: usize, pside: usize) -> Vec { - // Calculate preferred coins first - let preferred_coins = self.calc_preferred_coins(k, pside); - - let (actives, positions, n_positions) = match pside { - LONG => ( - &mut self.actives.long, - &self.positions.long, - self.bot_params_pair.long.n_positions, - ), + // Calculate all the information we need before borrowing + let (positions, n_positions) = match pside { + LONG => (&self.positions.long, self.bot_params_pair.long.n_positions), SHORT => ( - &mut self.actives.short, &self.positions.short, self.bot_params_pair.short.n_positions, ), _ => panic!("Invalid pside"), }; - let mut actives_without_pos = Vec::with_capacity(n_positions); + let current_positions: Vec = positions.keys().cloned().collect(); + let mut preferred_coins = Vec::new(); + + // Only calculate preferred coins if there are open slots + if current_positions.len() < n_positions { + preferred_coins = self.calc_preferred_coins(k, pside); + } + + // Now we can mutably borrow self.actives + let actives = match pside { + LONG => &mut self.actives.long, + SHORT => &mut self.actives.short, + _ => unreachable!(), + }; + actives.clear(); - // First, add all markets with existing positions - for &market_idx in positions.keys() { + // Add all markets with existing positions + for &market_idx in ¤t_positions { actives.insert(market_idx); } - // Then, add additional markets based on preferred_coins + let mut actives_without_pos = Vec::new(); + + // Add additional markets based on preferred_coins for &market_idx in &preferred_coins { if actives.len() < n_positions { if actives.insert(market_idx) { @@ -1363,7 +1328,7 @@ impl Backtest { #[inline] fn update_emas(&mut self, k: usize) { - for i in 0..self.n_markets { + for i in 0..self.n_coins { let close_price = self.hlcvs[[k, i, CLOSE]]; let long_alphas = &self.ema_alphas.long.alphas; From 5fcd43e872d7da2ff2b5db3c50b64125555b25b5 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Wed, 25 Sep 2024 18:25:28 -0400 Subject: [PATCH 12/32] bug fix, refactor, etc --- passivbot-rust/src/backtest.rs | 225 +++++++++++++++++++++----------- passivbot-rust/src/closes.rs | 1 - passivbot-rust/src/constants.rs | 6 +- 3 files changed, 149 insertions(+), 83 deletions(-) diff --git a/passivbot-rust/src/backtest.rs b/passivbot-rust/src/backtest.rs index 48b85dd92..78ec7125e 100644 --- a/passivbot-rust/src/backtest.rs +++ b/passivbot-rust/src/backtest.rs @@ -69,12 +69,6 @@ impl EMAs { } } -#[derive(Debug, Default)] -pub struct OpenOrders { - pub long: HashMap, - pub short: HashMap, -} - #[derive(Debug, Default)] pub struct OpenOrdersNew { pub long: HashMap, @@ -87,12 +81,6 @@ pub struct OpenOrderBundleNew { pub closes: Vec, } -#[derive(Debug, Default)] -pub struct OpenOrderBundle { - pub entry: Order, - pub close: Order, -} - #[derive(Default, Debug)] pub struct Actives { long: HashSet, @@ -124,6 +112,50 @@ pub struct TradingEnabled { pub struct RollingVolumeSum { long: HashMap, short: HashMap, + prev_k_long: usize, + prev_k_short: usize, +} + +impl RollingVolumeSum { + fn new() -> Self { + RollingVolumeSum { + long: HashMap::new(), + short: HashMap::new(), + prev_k_long: 0, + prev_k_short: 0, + } + } + + fn update(&mut self, hlcvs: &Array3, k: usize, pside: usize, window: usize) { + let (volume_sums, prev_k) = match pside { + LONG => (&mut self.long, &mut self.prev_k_long), + SHORT => (&mut self.short, &mut self.prev_k_short), + _ => panic!("Invalid pside"), + }; + + let start_idx = k.saturating_sub(window); + let prev_start_idx = prev_k.saturating_sub(window); + + for idx in 0..hlcvs.shape()[1] { + let mut sum = *volume_sums.entry(idx).or_insert(0.0); + + // Remove volumes outside the new window + if *prev_k > start_idx { + for i in prev_start_idx..start_idx { + sum -= hlcvs[[i, idx, VOLUME]]; + } + } + + // Add new volumes + for i in (*prev_k).max(start_idx)..k { + sum += hlcvs[[i, idx, VOLUME]]; + } + + volume_sums.insert(idx, sum); + } + + *prev_k = k; + } } pub struct Backtest { @@ -136,8 +168,7 @@ pub struct Backtest { ema_alphas: EmaAlphas, emas: Vec, positions: Positions, - open_orders: OpenOrders, // keys are symbol indices - open_orders_new: OpenOrdersNew, + open_orders: OpenOrdersNew, trailing_prices: TrailingPrices, actives: Actives, pnl_cumsum_running: f64, @@ -196,8 +227,7 @@ impl Backtest { ema_alphas: calc_ema_alphas(&bot_params_pair), emas: initial_emas, positions: Positions::default(), - open_orders: OpenOrders::default(), - open_orders_new: OpenOrdersNew::default(), + open_orders: OpenOrdersNew::default(), trailing_prices: TrailingPrices::default(), actives: Actives::default(), pnl_cumsum_running: 0.0, @@ -222,25 +252,70 @@ impl Backtest { did_fill_short: HashSet::new(), n_eligible_long, n_eligible_short, - rolling_volume_sum: RollingVolumeSum { - long: HashMap::new(), - short: HashMap::new(), - }, + rolling_volume_sum: RollingVolumeSum::new(), } } - pub fn calc_preferred_coins(&self, k: usize, pside: usize) -> Vec { - let bot_params = match pside { - LONG => &self.bot_params_pair.long, - SHORT => &self.bot_params_pair.short, + pub fn calc_preferred_coins(&mut self, k: usize, pside: usize) -> Vec { + let (bot_params, n_eligible) = match pside { + LONG => (&self.bot_params_pair.long, self.n_eligible_long), + SHORT => (&self.bot_params_pair.short, self.n_eligible_short), + _ => panic!("Invalid pside"), + }; + + let window = bot_params.filter_rolling_window; + let start_idx = k.saturating_sub(window); + + // Calculate volume sums for all coins + let mut volume_sums: Vec<(usize, f64)> = (0..self.n_coins) + .map(|idx| { + let sum = self.hlcvs.slice(s![start_idx..k, idx, VOLUME]).sum(); + (idx, sum) + }) + .collect(); + + // Sort by volume in descending order + volume_sums.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); + + // Take top n_eligible indices + let filtered_indices: Vec = volume_sums + .into_iter() + .take(n_eligible) + .map(|(idx, _)| idx) + .collect(); + + // Calculate noisiness + let mut noisiness: Vec<(usize, f64)> = filtered_indices + .into_iter() + .map(|idx| { + let slice = self.hlcvs.slice(s![start_idx..k, idx, ..]); + let nrr_sum: f64 = slice + .axis_iter(Axis(0)) + .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE]) + .sum(); + let mean_nrr = nrr_sum / (k - start_idx) as f64; + (idx, mean_nrr) + }) + .collect(); + + // Sort by noisiness in descending order + noisiness.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); + + // Return indices sorted by noisiness + noisiness.into_iter().map(|(idx, _)| idx).collect() + } + + pub fn calc_preferred_coins_old(&self, k: usize, pside: usize) -> Vec { + let (bot_params, n_eligible) = match pside { + LONG => (&self.bot_params_pair.long, self.n_eligible_long), + SHORT => (&self.bot_params_pair.short, self.n_eligible_short), _ => panic!("Invalid pside"), }; - let n_coins = self.hlcvs.shape()[1]; let start_idx = k.saturating_sub(bot_params.filter_rolling_window); // Calculate volume sums - let mut volume_sums: Vec<(usize, f64)> = (0..n_coins) + let mut volume_sums: Vec<(usize, f64)> = (0..self.n_coins) .map(|idx| { let sum = self.hlcvs.slice(s![start_idx..k, idx, VOLUME]).sum(); (idx, sum) @@ -251,9 +326,6 @@ impl Backtest { volume_sums.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); // Filter by volume - let n_eligible = bot_params.n_positions.max( - (n_coins as f64 * (1.0 - bot_params.filter_relative_volume_clip_pct)).round() as usize, - ); let filtered_indices: Vec = volume_sums .into_iter() .take(n_eligible) @@ -426,14 +498,14 @@ impl Backtest { self.did_fill_short.clear(); if self.trading_enabled.long { let mut open_orders_keys_long: Vec = - self.open_orders_new.long.keys().cloned().collect(); + self.open_orders.long.keys().cloned().collect(); open_orders_keys_long.sort(); for idx in open_orders_keys_long { // Process close fills long - if !self.open_orders_new.long[&idx].closes.is_empty() { + if !self.open_orders.long[&idx].closes.is_empty() { let mut closes_to_process = Vec::new(); { - for close_order in &self.open_orders_new.long[&idx].closes { + for close_order in &self.open_orders.long[&idx].closes { if self.order_filled(k, idx, close_order) { closes_to_process.push(close_order.clone()); } @@ -450,10 +522,10 @@ impl Backtest { } } // Process entry fills long - if !self.open_orders_new.long[&idx].entries.is_empty() { + if !self.open_orders.long[&idx].entries.is_empty() { let mut entries_to_process = Vec::new(); { - for entry_order in &self.open_orders_new.long[&idx].entries { + for entry_order in &self.open_orders.long[&idx].entries { if self.order_filled(k, idx, entry_order) { entries_to_process.push(entry_order.clone()); } @@ -469,14 +541,14 @@ impl Backtest { } if self.trading_enabled.short { let mut open_orders_keys_short: Vec = - self.open_orders_new.short.keys().cloned().collect(); + self.open_orders.short.keys().cloned().collect(); open_orders_keys_short.sort(); for idx in open_orders_keys_short { // Process close fills short - if !self.open_orders_new.short[&idx].closes.is_empty() { + if !self.open_orders.short[&idx].closes.is_empty() { let mut closes_to_process = Vec::new(); { - for close_order in &self.open_orders_new.short[&idx].closes { + for close_order in &self.open_orders.short[&idx].closes { if self.order_filled(k, idx, close_order) { closes_to_process.push(close_order.clone()); } @@ -491,10 +563,10 @@ impl Backtest { } } // Process entry fills short - if !self.open_orders_new.short[&idx].entries.is_empty() { + if !self.open_orders.short[&idx].entries.is_empty() { let mut entries_to_process = Vec::new(); { - for entry_order in &self.open_orders_new.short[&idx].entries { + for entry_order in &self.open_orders.short[&idx].entries { if self.order_filled(k, idx, entry_order) { entries_to_process.push(entry_order.clone()); } @@ -849,7 +921,7 @@ impl Backtest { // check if coin is delisted; if so, close pos as unstuck close if let Some(&delist_timestamp) = self.delist_timestamps.get(&idx) { if k >= delist_timestamp && self.positions.long.contains_key(&idx) { - self.open_orders_new.long.get_mut(&idx).unwrap().closes = [Order { + self.open_orders.long.get_mut(&idx).unwrap().closes = [Order { qty: -self.positions.long[&idx].size, price: round_( f64::min( @@ -861,7 +933,7 @@ impl Backtest { order_type: OrderType::CloseUnstuckLong, }] .to_vec(); - self.open_orders_new.long.entry(idx).or_default().entries = Vec::new(); + self.open_orders.long.entry(idx).or_default().entries = Vec::new(); return; } } @@ -876,7 +948,7 @@ impl Backtest { if self.order_filled(k + 1, idx, &next_entry_order) && self.has_next_grid_order(&next_entry_order, LONG) { - self.open_orders_new.long.entry(idx).or_default().entries = calc_entries_long( + self.open_orders.long.entry(idx).or_default().entries = calc_entries_long( &self.exchange_params_list[idx], &state_params, &self.bot_params_pair.long, @@ -884,7 +956,7 @@ impl Backtest { &self.trailing_prices.long[&idx], ); } else { - self.open_orders_new.long.entry(idx).or_default().entries = [next_entry_order].to_vec(); + self.open_orders.long.entry(idx).or_default().entries = [next_entry_order].to_vec(); } let next_close_order = calc_next_close_long( &self.exchange_params_list[idx], @@ -897,7 +969,7 @@ impl Backtest { if self.order_filled(k + 1, idx, &next_close_order) && self.has_next_grid_order(&next_close_order, LONG) { - self.open_orders_new.long.entry(idx).or_default().closes = calc_closes_long( + self.open_orders.long.entry(idx).or_default().closes = calc_closes_long( &self.exchange_params_list[idx], &state_params, &self.bot_params_pair.long, @@ -905,7 +977,7 @@ impl Backtest { &self.trailing_prices.long[&idx], ); } else { - self.open_orders_new.long.entry(idx).or_default().closes = [next_close_order].to_vec(); + self.open_orders.long.entry(idx).or_default().closes = [next_close_order].to_vec(); } } @@ -921,7 +993,7 @@ impl Backtest { // check if coin is delisted; if so, close pos as unstuck close if let Some(&delist_timestamp) = self.delist_timestamps.get(&idx) { if k >= delist_timestamp && self.positions.short.contains_key(&idx) { - self.open_orders_new.short.get_mut(&idx).unwrap().closes = [Order { + self.open_orders.short.get_mut(&idx).unwrap().closes = [Order { qty: self.positions.short[&idx].size.abs(), price: round_( f64::max( @@ -933,7 +1005,7 @@ impl Backtest { order_type: OrderType::CloseUnstuckLong, }] .to_vec(); - self.open_orders_new.short.entry(idx).or_default().entries = Vec::new(); + self.open_orders.short.entry(idx).or_default().entries = Vec::new(); return; } } @@ -948,7 +1020,7 @@ impl Backtest { if self.order_filled(k + 1, idx, &next_entry_order) && self.has_next_grid_order(&next_entry_order, SHORT) { - self.open_orders_new.short.entry(idx).or_default().entries = calc_entries_short( + self.open_orders.short.entry(idx).or_default().entries = calc_entries_short( &self.exchange_params_list[idx], &state_params, &self.bot_params_pair.short, @@ -956,8 +1028,7 @@ impl Backtest { &self.trailing_prices.short[&idx], ); } else { - self.open_orders_new.short.entry(idx).or_default().entries = - [next_entry_order].to_vec(); + self.open_orders.short.entry(idx).or_default().entries = [next_entry_order].to_vec(); } let next_close_order = calc_next_close_short( @@ -971,7 +1042,7 @@ impl Backtest { if self.order_filled(k + 1, idx, &next_close_order) && self.has_next_grid_order(&next_close_order, SHORT) { - self.open_orders_new.short.entry(idx).or_default().closes = calc_closes_short( + self.open_orders.short.entry(idx).or_default().closes = calc_closes_short( &self.exchange_params_list[idx], &state_params, &self.bot_params_pair.short, @@ -979,7 +1050,7 @@ impl Backtest { &self.trailing_prices.short[&idx], ); } else { - self.open_orders_new.short.entry(idx).or_default().closes = [next_close_order].to_vec() + self.open_orders.short.entry(idx).or_default().closes = [next_close_order].to_vec() } } @@ -1072,9 +1143,9 @@ impl Backtest { self.exchange_params_list[idx].price_step, ), ); - if self.open_orders_new.long[&idx].closes.is_empty() - || self.open_orders_new.long[&idx].closes[0].qty == 0.0 - || close_price < self.open_orders_new.long[&idx].closes[0].price + if self.open_orders.long[&idx].closes.is_empty() + || self.open_orders.long[&idx].closes[0].qty == 0.0 + || close_price < self.open_orders.long[&idx].closes[0].price { let close_qty = -f64::min( self.positions.long[&idx].size, @@ -1114,9 +1185,9 @@ impl Backtest { self.exchange_params_list[idx].price_step, ), ); - if self.open_orders_new.short[&idx].closes.is_empty() - || self.open_orders_new.short[&idx].closes[0].qty == 0.0 - || close_price > self.open_orders_new.short[&idx].closes[0].price + if self.open_orders.short[&idx].closes.is_empty() + || self.open_orders.short[&idx].closes[0].qty == 0.0 + || close_price > self.open_orders.short[&idx].closes[0].price { let close_qty = f64::min( self.positions.short[&idx].size.abs(), @@ -1196,17 +1267,17 @@ impl Backtest { } } let (unstucking_idx, unstucking_pside, unstucking_close) = self.calc_unstucking_close(k); - if unstucking_idx != NO_POS { + if unstucking_pside != NO_POS { match unstucking_pside { LONG => { - self.open_orders_new + self.open_orders .long .get_mut(&unstucking_idx) .unwrap() .closes = [unstucking_close].to_vec(); } SHORT => { - self.open_orders_new + self.open_orders .short .get_mut(&unstucking_idx) .unwrap() @@ -1235,7 +1306,7 @@ impl Backtest { let mut actives_without_pos = Vec::::new(); if positions_long_indices.len() < self.bot_params_pair.long.n_positions { actives_without_pos = self.update_actives(k, LONG); - self.open_orders_new + self.open_orders .long .retain(|&idx, _| self.actives.long.contains(&idx)); } @@ -1243,7 +1314,7 @@ impl Backtest { for idx in active_long_indices { if actives_without_pos.contains(&idx) - || self.open_orders_new.long.get(&idx).map_or(false, |orders| { + || self.open_orders.long.get(&idx).map_or(false, |orders| { orders.closes.iter().any(|order| { order.order_type == OrderType::CloseUnstuckLong || order.order_type == OrderType::CloseTrailingLong @@ -1271,26 +1342,22 @@ impl Backtest { let mut actives_without_pos = Vec::::new(); if positions_short_indices.len() < self.bot_params_pair.short.n_positions { actives_without_pos = self.update_actives(k, SHORT); - self.open_orders_new + self.open_orders .short .retain(|&idx, _| self.actives.short.contains(&idx)); } let active_short_indices: Vec = self.actives.short.iter().cloned().collect(); for idx in active_short_indices { if actives_without_pos.contains(&idx) - || self - .open_orders_new - .short - .get(&idx) - .map_or(false, |orders| { - orders.closes.iter().any(|order| { - order.order_type == OrderType::CloseUnstuckShort - || order.order_type == OrderType::CloseTrailingShort - }) || orders.entries.iter().any(|order| { - order.order_type == OrderType::EntryTrailingNormalShort - || order.order_type == OrderType::EntryTrailingCroppedShort - }) + || self.open_orders.short.get(&idx).map_or(false, |orders| { + orders.closes.iter().any(|order| { + order.order_type == OrderType::CloseUnstuckShort + || order.order_type == OrderType::CloseTrailingShort + }) || orders.entries.iter().any(|order| { + order.order_type == OrderType::EntryTrailingNormalShort + || order.order_type == OrderType::EntryTrailingCroppedShort }) + }) { self.update_open_orders_short_single(k, idx); } @@ -1300,15 +1367,15 @@ impl Backtest { if !self.is_stuck.long.is_empty() || !self.is_stuck.short.is_empty() { let (unstucking_idx, unstucking_pside, unstucking_close) = self.calc_unstucking_close(k); - if unstucking_idx != NO_POS { + if unstucking_pside != NO_POS { match unstucking_pside { LONG => { - if let Some(orders) = self.open_orders_new.long.get_mut(&unstucking_idx) { + if let Some(orders) = self.open_orders.long.get_mut(&unstucking_idx) { orders.closes = vec![unstucking_close]; } } SHORT => { - if let Some(orders) = self.open_orders_new.short.get_mut(&unstucking_idx) { + if let Some(orders) = self.open_orders.short.get_mut(&unstucking_idx) { orders.closes = vec![unstucking_close]; } } diff --git a/passivbot-rust/src/closes.rs b/passivbot-rust/src/closes.rs index 7d0fbe418..8a53958fa 100644 --- a/passivbot-rust/src/closes.rs +++ b/passivbot-rust/src/closes.rs @@ -1,4 +1,3 @@ -use crate::constants::{CLOSE, LONG, NO_POS, SHORT}; use crate::entries::calc_min_entry_qty; use crate::types::{ BotParams, BotParamsPair, EMABands, ExchangeParams, Order, OrderType, Position, Positions, diff --git a/passivbot-rust/src/constants.rs b/passivbot-rust/src/constants.rs index b64b12186..b6cb1e407 100644 --- a/passivbot-rust/src/constants.rs +++ b/passivbot-rust/src/constants.rs @@ -3,6 +3,6 @@ pub const LOW: usize = 1; pub const CLOSE: usize = 2; pub const VOLUME: usize = 3; -pub const LONG: usize = 3; -pub const SHORT: usize = 4; -pub const NO_POS: usize = 5; +pub const LONG: usize = 0; +pub const SHORT: usize = 1; +pub const NO_POS: usize = 2; From 1df94277773da212a1d8153d5d36464d233eb430 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Wed, 25 Sep 2024 18:25:46 -0400 Subject: [PATCH 13/32] sort symbol list --- src/downloader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/downloader.py b/src/downloader.py index 5c9863bb8..0c8686745 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -1360,10 +1360,10 @@ async def prepare_hlcvs(config: dict): np.diff(data[:, 0]) == interval_ms ).all(), f"gaps in hlcv data {symbol}" # verify integrous 1m hlcs hlcvsd[symbol] = data - symbols = list(hlcvsd.keys()) + symbols = sorted(hlcvsd.keys()) if len(symbols) > 1: print(f"Unifying data for {len(symbols)} coins into single numpy array...") - timestamps, unified_data = unify_hlcv_data(hlcvsd.values()) + timestamps, unified_data = unify_hlcv_data([hlcvsd[s] for s in symbols]) return symbols, timestamps, unified_data From 834f187feccd985c961ecddee1b3dbe968991a90 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Thu, 26 Sep 2024 12:15:34 -0400 Subject: [PATCH 14/32] optimize calc_preferred_coins --- passivbot-rust/src/backtest.rs | 179 +++++++++++---------------------- 1 file changed, 58 insertions(+), 121 deletions(-) diff --git a/passivbot-rust/src/backtest.rs b/passivbot-rust/src/backtest.rs index 78ec7125e..e8b8a02a3 100644 --- a/passivbot-rust/src/backtest.rs +++ b/passivbot-rust/src/backtest.rs @@ -110,54 +110,12 @@ pub struct TradingEnabled { } pub struct RollingVolumeSum { - long: HashMap, - short: HashMap, + long: Vec, + short: Vec, prev_k_long: usize, prev_k_short: usize, } -impl RollingVolumeSum { - fn new() -> Self { - RollingVolumeSum { - long: HashMap::new(), - short: HashMap::new(), - prev_k_long: 0, - prev_k_short: 0, - } - } - - fn update(&mut self, hlcvs: &Array3, k: usize, pside: usize, window: usize) { - let (volume_sums, prev_k) = match pside { - LONG => (&mut self.long, &mut self.prev_k_long), - SHORT => (&mut self.short, &mut self.prev_k_short), - _ => panic!("Invalid pside"), - }; - - let start_idx = k.saturating_sub(window); - let prev_start_idx = prev_k.saturating_sub(window); - - for idx in 0..hlcvs.shape()[1] { - let mut sum = *volume_sums.entry(idx).or_insert(0.0); - - // Remove volumes outside the new window - if *prev_k > start_idx { - for i in prev_start_idx..start_idx { - sum -= hlcvs[[i, idx, VOLUME]]; - } - } - - // Add new volumes - for i in (*prev_k).max(start_idx)..k { - sum += hlcvs[[i, idx, VOLUME]]; - } - - volume_sums.insert(idx, sum); - } - - *prev_k = k; - } -} - pub struct Backtest { hlcvs: Array3, // 3D array: (n_timesteps, n_coins, 4) bot_params_pair: BotParamsPair, @@ -184,6 +142,7 @@ pub struct Backtest { n_eligible_long: usize, n_eligible_short: usize, rolling_volume_sum: RollingVolumeSum, + volume_indices_buffer: Option>, } impl Backtest { @@ -252,7 +211,13 @@ impl Backtest { did_fill_short: HashSet::new(), n_eligible_long, n_eligible_short, - rolling_volume_sum: RollingVolumeSum::new(), + rolling_volume_sum: RollingVolumeSum { + long: vec![0.0; n_coins], + short: vec![0.0; n_coins], + prev_k_long: 0, + prev_k_short: 0, + }, + volume_indices_buffer: Some(vec![(0.0, 0); n_coins]), // Initialize here } } @@ -264,93 +229,65 @@ impl Backtest { }; let window = bot_params.filter_rolling_window; - let start_idx = k.saturating_sub(window); - - // Calculate volume sums for all coins - let mut volume_sums: Vec<(usize, f64)> = (0..self.n_coins) - .map(|idx| { - let sum = self.hlcvs.slice(s![start_idx..k, idx, VOLUME]).sum(); - (idx, sum) - }) - .collect(); - - // Sort by volume in descending order - volume_sums.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); - - // Take top n_eligible indices - let filtered_indices: Vec = volume_sums - .into_iter() - .take(n_eligible) - .map(|(idx, _)| idx) - .collect(); + let start_k = k.saturating_sub(window); - // Calculate noisiness - let mut noisiness: Vec<(usize, f64)> = filtered_indices - .into_iter() - .map(|idx| { - let slice = self.hlcvs.slice(s![start_idx..k, idx, ..]); - let nrr_sum: f64 = slice - .axis_iter(Axis(0)) - .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE]) - .sum(); - let mean_nrr = nrr_sum / (k - start_idx) as f64; - (idx, mean_nrr) - }) - .collect(); - - // Sort by noisiness in descending order - noisiness.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); - - // Return indices sorted by noisiness - noisiness.into_iter().map(|(idx, _)| idx).collect() - } - - pub fn calc_preferred_coins_old(&self, k: usize, pside: usize) -> Vec { - let (bot_params, n_eligible) = match pside { - LONG => (&self.bot_params_pair.long, self.n_eligible_long), - SHORT => (&self.bot_params_pair.short, self.n_eligible_short), + let (rolling_volume_sum, prev_k) = match pside { + LONG => ( + &mut self.rolling_volume_sum.long, + &mut self.rolling_volume_sum.prev_k_long, + ), + SHORT => ( + &mut self.rolling_volume_sum.short, + &mut self.rolling_volume_sum.prev_k_short, + ), _ => panic!("Invalid pside"), }; - let start_idx = k.saturating_sub(bot_params.filter_rolling_window); - - // Calculate volume sums - let mut volume_sums: Vec<(usize, f64)> = (0..self.n_coins) - .map(|idx| { - let sum = self.hlcvs.slice(s![start_idx..k, idx, VOLUME]).sum(); - (idx, sum) - }) - .collect(); + // Use the pre-allocated buffer for volume indices + let volume_indices = self.volume_indices_buffer.as_mut().unwrap(); - // Sort by volume in descending order - volume_sums.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); + // Update rolling volume sums + if k > window && k - *prev_k < window { + // Rolling calculation + for idx in 0..self.n_coins { + rolling_volume_sum[idx] -= self + .hlcvs + .slice(s![*prev_k - window..start_k, idx, VOLUME]) + .sum(); + rolling_volume_sum[idx] += self.hlcvs.slice(s![*prev_k..k, idx, VOLUME]).sum(); + volume_indices[idx] = (rolling_volume_sum[idx], idx); + } + } else { + // Full calculation + for idx in 0..self.n_coins { + rolling_volume_sum[idx] = self.hlcvs.slice(s![start_k..k, idx, VOLUME]).sum(); + volume_indices[idx] = (rolling_volume_sum[idx], idx); + } + } + *prev_k = k; - // Filter by volume - let filtered_indices: Vec = volume_sums - .into_iter() - .take(n_eligible) - .map(|(idx, _)| idx) - .collect(); + // Partial sort to get top n_eligible coins by volume + volume_indices.select_nth_unstable_by(n_eligible, |a, b| { + b.0.partial_cmp(&a.0).unwrap_or(Ordering::Equal) + }); - // Calculate noisiness - let mut noisiness: Vec<(usize, f64)> = filtered_indices - .into_iter() - .map(|idx| { - let slice = self.hlcvs.slice(s![start_idx..k, idx, ..]); - let nrr_sum: f64 = slice - .axis_iter(Axis(0)) - .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE]) - .sum(); - let mean_nrr = nrr_sum / (k - start_idx) as f64; - (idx, mean_nrr) - }) - .collect(); + // Calculate noisiness for top n_eligible coins + let mut noisinesses = vec![(0.0f64, 0usize); n_eligible]; + for (i, &(_, idx)) in volume_indices.iter().take(n_eligible).enumerate() { + let noisiness: f64 = self + .hlcvs + .slice(s![start_k..k, idx, ..]) + .axis_iter(Axis(0)) + .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE]) + .sum(); + noisinesses[i] = (noisiness, idx); + } // Sort by noisiness in descending order - noisiness.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal)); + noisinesses.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(Ordering::Equal)); // Return indices sorted by noisiness - noisiness.into_iter().map(|(idx, _)| idx).collect() + noisinesses.into_iter().map(|(_, idx)| idx).collect() } pub fn run(&mut self) -> (Vec, Vec) { From 784317adbe6ad2b1179fd45edfb734cd31355623 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Thu, 26 Sep 2024 17:27:50 -0400 Subject: [PATCH 15/32] further optimizations of calc_preferred_coins --- passivbot-rust/src/backtest.rs | 54 ++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/passivbot-rust/src/backtest.rs b/passivbot-rust/src/backtest.rs index e8b8a02a3..fe8ad44b8 100644 --- a/passivbot-rust/src/backtest.rs +++ b/passivbot-rust/src/backtest.rs @@ -154,14 +154,6 @@ impl Backtest { ) -> Self { let n_timesteps = hlcvs.shape()[0]; let n_coins = hlcvs.shape()[1]; - let n_eligible_long = bot_params_pair.long.n_positions.max( - (n_coins as f64 * (1.0 - bot_params_pair.long.filter_relative_volume_clip_pct)).round() - as usize, - ); - let n_eligible_short = bot_params_pair.short.n_positions.max( - (n_coins as f64 * (1.0 - bot_params_pair.short.filter_relative_volume_clip_pct)).round() - as usize, - ); let initial_emas = (0..n_coins) .map(|i| { let close_price = hlcvs[[0, i, CLOSE]]; @@ -176,6 +168,14 @@ impl Backtest { let mut bot_params_pair_cloned = bot_params_pair.clone(); bot_params_pair_cloned.long.n_positions = n_coins.min(bot_params_pair.long.n_positions); bot_params_pair_cloned.short.n_positions = n_coins.min(bot_params_pair.short.n_positions); + let n_eligible_long = bot_params_pair_cloned.long.n_positions.max( + (n_coins as f64 * (1.0 - bot_params_pair.long.filter_relative_volume_clip_pct)).round() + as usize, + ); + let n_eligible_short = bot_params_pair_cloned.short.n_positions.max( + (n_coins as f64 * (1.0 - bot_params_pair.short.filter_relative_volume_clip_pct)).round() + as usize, + ); Backtest { hlcvs, bot_params_pair: bot_params_pair_cloned, @@ -222,9 +222,26 @@ impl Backtest { } pub fn calc_preferred_coins(&mut self, k: usize, pside: usize) -> Vec { - let (bot_params, n_eligible) = match pside { - LONG => (&self.bot_params_pair.long, self.n_eligible_long), - SHORT => (&self.bot_params_pair.short, self.n_eligible_short), + let (bot_params, n_positions) = match pside { + LONG => ( + &self.bot_params_pair.long, + self.bot_params_pair.long.n_positions, + ), + SHORT => ( + &self.bot_params_pair.short, + self.bot_params_pair.short.n_positions, + ), + _ => panic!("Invalid pside"), + }; + + // Early return if all coins are already eligible + if self.n_coins <= n_positions { + return (0..self.n_coins).collect(); + } + + let n_eligible = match pside { + LONG => self.n_eligible_long, + SHORT => self.n_eligible_short, _ => panic!("Invalid pside"), }; @@ -266,21 +283,21 @@ impl Backtest { } *prev_k = k; - // Partial sort to get top n_eligible coins by volume - volume_indices.select_nth_unstable_by(n_eligible, |a, b| { - b.0.partial_cmp(&a.0).unwrap_or(Ordering::Equal) - }); + // Sort by volume in descending order + volume_indices.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(Ordering::Equal)); // Calculate noisiness for top n_eligible coins - let mut noisinesses = vec![(0.0f64, 0usize); n_eligible]; - for (i, &(_, idx)) in volume_indices.iter().take(n_eligible).enumerate() { + let actual_n_eligible = n_eligible.min(self.n_coins); + let mut noisinesses = Vec::with_capacity(actual_n_eligible); + + for &(_, idx) in volume_indices.iter().take(actual_n_eligible) { let noisiness: f64 = self .hlcvs .slice(s![start_k..k, idx, ..]) .axis_iter(Axis(0)) .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE]) .sum(); - noisinesses[i] = (noisiness, idx); + noisinesses.push((noisiness, idx)); } // Sort by noisiness in descending order @@ -289,7 +306,6 @@ impl Backtest { // Return indices sorted by noisiness noisinesses.into_iter().map(|(_, idx)| idx).collect() } - pub fn run(&mut self) -> (Vec, Vec) { let check_points: Vec = (0..7).map(|i| i * 60 * 24).collect(); let n_timesteps = self.hlcvs.shape()[0]; From ba9f0c1728d11ddb660e370cb16b3e3f018d0489 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sat, 28 Sep 2024 09:28:11 -0400 Subject: [PATCH 16/32] add missing params --- src/pure_funcs.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pure_funcs.py b/src/pure_funcs.py index 5560d7855..28162f761 100644 --- a/src/pure_funcs.py +++ b/src/pure_funcs.py @@ -597,6 +597,8 @@ def get_template_live_config(passivbot_mode="neat_grid"): "long_entry_trailing_grid_ratio": [-1.0, 1.0], "long_entry_trailing_retracement_pct": [0.0, 0.1], "long_entry_trailing_threshold_pct": [-0.1, 0.1], + "long_filter_rolling_window": [10.0, 1440.0], + "long_filter_relative_volume_clip_pct": [0.0, 1.0], "long_n_positions": [1.0, 20.0], "long_total_wallet_exposure_limit": [0.0, 2.0], "long_unstuck_close_pct": [0.001, 0.1], @@ -620,6 +622,8 @@ def get_template_live_config(passivbot_mode="neat_grid"): "short_entry_trailing_grid_ratio": [-1.0, 1.0], "short_entry_trailing_retracement_pct": [0.0, 0.1], "short_entry_trailing_threshold_pct": [-0.1, 0.1], + "short_filter_rolling_window": [10.0, 1440.0], + "short_filter_relative_volume_clip_pct": [0.0, 1.0], "short_n_positions": [1.0, 20.0], "short_total_wallet_exposure_limit": [0.0, 2.0], "short_unstuck_close_pct": [0.001, 0.1], From b05c57250785b8e343d9763d7b7cc0a3527bff5b Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sat, 28 Sep 2024 09:46:20 -0400 Subject: [PATCH 17/32] adapt to new filter params --- src/passivbot.py | 107 ++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 57 deletions(-) diff --git a/src/passivbot.py b/src/passivbot.py index 2790b77f1..a96027621 100644 --- a/src/passivbot.py +++ b/src/passivbot.py @@ -235,20 +235,16 @@ async def execute_to_exchange(self, debug_mode=False): if to_cancel or to_create: self.previous_REST_update_ts = 0 - def is_forager_mode(self): + def is_forager_mode(self, pside=None): n_approved_symbols = len(self.config["live"]["approved_coins"]) if n_approved_symbols == 0: return True + if pside is None: + return self.is_forager_mode("long") or self.is_forager_mode("short") if ( - self.config["bot"]["long"]["total_wallet_exposure_limit"] > 0.0 - and self.config["bot"]["long"]["n_positions"] > 0 - and round(self.config["bot"]["long"]["n_positions"]) < n_approved_symbols - ): - return True - if ( - self.config["bot"]["short"]["total_wallet_exposure_limit"] > 0.0 - and self.config["bot"]["short"]["n_positions"] > 0 - and round(self.config["bot"]["short"]["n_positions"]) < n_approved_symbols + self.config["bot"][pside]["total_wallet_exposure_limit"] > 0.0 + and self.config["bot"][pside]["n_positions"] > 0 + and round(self.config["bot"][pside]["n_positions"]) < n_approved_symbols ): return True return False @@ -259,6 +255,8 @@ def set_live_configs(self): "total_wallet_exposure_limit", "unstuck_loss_allowance_pct", "unstuck_close_pct", + "filter_rolling_window", + "filter_relative_volume_clip_pct", } self.config["bot"]["long"]["n_positions"] = round(self.config["bot"]["long"]["n_positions"]) self.config["bot"]["short"]["n_positions"] = round(self.config["bot"]["short"]["n_positions"]) @@ -719,29 +717,24 @@ def update_PB_modes(self): self.ideal_actives[pside][symbol] = "" if symbol in self.actual_actives[pside]: self.PB_modes[pside][symbol] = self.forced_modes[pside][symbol] - if self.forager_mode: - if self.config["live"]["relative_volume_filter_clip_pct"] > 0.0: - self.calc_volumes() - # filter by relative volume - eligible_symbols = sorted(self.volumes, key=lambda x: self.volumes[x])[ - int( - round( - len(self.volumes) * self.config["live"]["relative_volume_filter_clip_pct"] - ) - ) : - ] - else: - eligible_symbols = list(self.eligible_symbols) - self.calc_noisiness() # ideal symbols are high noise symbols - - # calc ideal actives for long and short separately - for pside in self.actual_actives: - if ( - self.config["bot"][pside]["n_positions"] > 0 - and self.config["bot"][pside]["total_wallet_exposure_limit"] > 0.0 - ): + if self.is_forager_mode(pside): + if self.config["bot"][pside]["filter_relative_volume_clip_pct"] > 0.0: + volumes = self.calc_volumes(pside) + # filter by relative volume + n_eligible = round( + len(volumes) + * (1 - self.config["bot"][pside]["filter_relative_volume_clip_pct"]) + ) + eligible_symbols = sorted(volumes, key=lambda x: volumes[x], reverse=True)[ + : int(max(n_eligible, self.config["bot"][pside]["n_positions"])) + ] + else: + eligible_symbols = list(self.eligible_symbols) + # ideal symbols are high noise symbols + noisiness = self.calc_noisiness(pside, eligible_symbols=eligible_symbols) + if self.is_enabled(pside=pside): self.warn_on_high_effective_min_cost(pside) - for symbol in sorted(self.noisiness, key=lambda x: self.noisiness[x], reverse=True): + for symbol in sorted(noisiness, key=lambda x: noisiness[x], reverse=True): if ( not self.is_enabled(symbol, pside) or symbol not in self.eligible_symbols @@ -750,10 +743,8 @@ def update_PB_modes(self): or not self.effective_min_cost_is_low_enough(pside, symbol) ): continue - slots_full = ( - len(self.ideal_actives[pside]) >= self.config["bot"][pside]["n_positions"] - ) - if slots_full: + if len(self.ideal_actives[pside]) >= self.config["bot"][pside]["n_positions"]: + # slots full break if symbol not in self.ideal_actives[pside]: self.ideal_actives[pside][symbol] = "" @@ -780,14 +771,12 @@ def update_PB_modes(self): if len(slots_filled) >= self.config["bot"][pside]["n_positions"]: break self.PB_modes[pside][symbol] = "normal" - else: - # if not forager mode, all eligible symbols are ideal symbols, unless symbol in forced_modes - for pside in ["long", "short"]: - if ( - self.config["bot"][pside]["n_positions"] > 0 - and self.config["bot"][pside]["total_wallet_exposure_limit"] > 0.0 - ): + else: + # if not forager mode, all eligible symbols are ideal symbols, unless symbol in forced_modes + if self.is_enabled(pside=pside): self.warn_on_high_effective_min_cost(pside) + else: + continue for symbol in self.eligible_symbols: if self.is_enabled(symbol, pside): if not self.effective_min_cost_is_low_enough(pside, symbol): @@ -1312,9 +1301,11 @@ def update_effective_min_cost(self, symbol=None): logging.error(f"error with {get_function_name()} for {symbol}: {e}") traceback.print_exc() - def is_enabled(self, symbol, pside=None): + def is_enabled(self, symbol=None, pside=None): if pside is None: return self.is_enabled(symbol, "long") or self.is_enabled(symbol, "short") + if symbol is None: + return any([self.is_enabled(symbol, pside) for symbol in self.live_configs]) return ( symbol in self.live_configs and self.live_configs[symbol][pside]["wallet_exposure_limit"] > 0.0 @@ -1875,27 +1866,29 @@ async def start_bot(self, debug_mode=False): if not debug_mode: await self.run_execution_loop() - def calc_noisiness(self): - if not hasattr(self, "noisiness"): - self.noisiness = {} - n = int(round(self.config["live"]["ohlcv_rolling_window"])) - for symbol in self.eligible_symbols: + def calc_noisiness(self, pside, eligible_symbols=None): + if eligible_symbols is None: + eligible_symbols = self.eligible_symbols + noisiness = {} + n = int(round(self.config["bot"][pside]["filter_rolling_window"])) + for symbol in eligible_symbols: if symbol in self.ohlcvs_1m and self.ohlcvs_1m[symbol]: ohlcvs_1m = [v for v in self.ohlcvs_1m[symbol].values()[-n:]] - self.noisiness[symbol] = np.mean([(x[2] - x[3]) / x[4] for x in ohlcvs_1m]) + noisiness[symbol] = np.mean([(x[2] - x[3]) / x[4] for x in ohlcvs_1m]) else: - self.noisiness[symbol] = 0.0 + noisiness[symbol] = 0.0 + return noisiness - def calc_volumes(self): - if not hasattr(self, "volumes"): - self.volumes = {} - n = int(round(self.config["live"]["ohlcv_rolling_window"])) + def calc_volumes(self, pside): + n = int(round(self.config["bot"][pside]["filter_rolling_window"])) + volumes = {} for symbol in self.ohlcvs_1m: if self.ohlcvs_1m[symbol] and len(self.ohlcvs_1m[symbol]) > 0: ohlcvs_1m = [v for v in self.ohlcvs_1m[symbol].values()[-n:]] - self.volumes[symbol] = sum([x[4] * x[5] for x in ohlcvs_1m]) + volumes[symbol] = sum([x[4] * x[5] for x in ohlcvs_1m]) else: - self.volumes[symbol] = 0.0 + volumes[symbol] = 0.0 + return volumes async def execute_multiple(self, orders: [dict], type_: str, max_n_executions: int): if not orders: From 3a9694dda7d9ff42ba387acd856cd817f0c1e291 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sat, 28 Sep 2024 10:31:18 -0400 Subject: [PATCH 18/32] update docs to new filter parameters --- docs/configuration.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 3b4ae8343..ebfc97094 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -108,7 +108,16 @@ If a position is stuck, bot will use profits made on other positions to realize - `unstuck_threshold`: - if a position is bigger than a threshold, consider it stuck and activate unstucking. - `if wallet_exposure / wallet_exposure_limit > unstuck_threshold: unstucking enabled` - - e.g. if a position size is $500 and max allowed position size is $1000, then position is 50% full. If unstuck_threshold==0.45, then unstuck the position until its size is $450. + - e.g. if a position size is $500 and max allowed position size is $1000, then position is 50% full. If unstuck_threshold==0.45, then unstuck the position until its size is $450. + +### Filter Parameters + +Coins selected for trading are filtered by volume and noisiness. First, filter coins by volume, dropping x% of the lowest volume coins, then sort the eligible coins by noisiness and select the top noisiest coins for trading. + +- `filter_relative_volume_clip_pct`: Volume filter: disapprove the lowest relative volume coins. E.g. `filter_relative_volume_clip_pct=0.1`: drop 10% lowest volume coins. Set to zero to allow all. +- `filter_rolling_window`: number of minutes to look into the past to compute volume and noisiness, used for dynamic coin selection in forager mode. + - noisiness is normalized relative range of 1m ohlcvs: `mean((high - low) / close)` + - in forager mode, bot will select coins with highest noisiness for opening positions ## Live Trading Settings - `approved_coins`: list of coins approved for trading. If empty, all coins are approved. @@ -136,12 +145,8 @@ If a position is stuck, bot will use profits made on other positions to realize - `max_n_cancellations_per_batch`: will cancel n open orders per execution - `max_n_creations_per_batch`: will create n new orders per execution - `minimum_coin_age_days`: disallow coins younger than a given number of days -- `ohlcv_rolling_window`: number of minutes to look into the past to compute volume and noisiness, used for dynamic coin selection in forager mode. - - noisiness is normalized relative range of 1m ohlcvs: `mean((high - low) / close)` - - in forager mode, bot will select coins with highest noisiness for opening positions - `pnls_max_lookback_days`: how far into the past to fetch pnl history - `price_distance_threshold`: minimum distance to current price action required for EMA based limit orders -- `relative_volume_filter_clip_pct`: Volume filter: disapprove the lowest relative volume coins. Default 0.1 == 10%. Set to zero to allow all. - `time_in_force`: default is good-till-cancelled - `user`: fetch API key/secret from api-keys.json From 10f98ebf33d5d9b66ffef5dac34b7e7c16396830 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Sun, 29 Sep 2024 10:55:51 -0400 Subject: [PATCH 19/32] n_positions is not greater than n_eligible --- src/passivbot.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/passivbot.py b/src/passivbot.py index a96027621..2cad78ea2 100644 --- a/src/passivbot.py +++ b/src/passivbot.py @@ -140,7 +140,6 @@ def __init__(self, config: dict): 3000.0, self.config["live"]["execution_delay_seconds"] * 1000 ) self.quote = "USDT" - self.forager_mode = self.is_forager_mode() self.minimum_market_age_millis = ( self.config["live"]["minimum_coin_age_days"] * 24 * 60 * 60 * 1000 @@ -258,8 +257,10 @@ def set_live_configs(self): "filter_rolling_window", "filter_relative_volume_clip_pct", } - self.config["bot"]["long"]["n_positions"] = round(self.config["bot"]["long"]["n_positions"]) - self.config["bot"]["short"]["n_positions"] = round(self.config["bot"]["short"]["n_positions"]) + for pside in ["long", "short"]: + self.config["bot"][pside]["n_positions"] = min( + len(self.eligible_symbols), int(round(self.config["bot"]["long"]["n_positions"])) + ) for symbol in self.markets_dict: self.live_configs[symbol] = deepcopy(self.config["bot"]) self.live_configs[symbol]["leverage"] = self.config["live"]["leverage"] @@ -668,7 +669,7 @@ async def init_flags(self): if not self.markets_dict[symbol]["active"]: self.forced_modes[pside][symbol] = "tp_only" - if self.forager_mode and self.minimum_market_age_millis > 0: + if self.is_forager_mode() and self.minimum_market_age_millis > 0: if not hasattr(self, "first_timestamps"): self.first_timestamps = await get_first_ohlcv_timestamps( cc=self.cca, symbols=sorted(self.eligible_symbols) @@ -679,7 +680,7 @@ async def init_flags(self): self.first_timestamps = None def is_old_enough(self, symbol): - if self.forager_mode and self.minimum_market_age_millis > 0: + if self.is_forager_mode() and self.minimum_market_age_millis > 0: if symbol in self.first_timestamps: return utc_ms() - self.first_timestamps[symbol] > self.minimum_market_age_millis else: @@ -873,7 +874,7 @@ def effective_min_cost_is_low_enough(self, pside, symbol): WE_limit = self.live_configs[symbol][pside]["wallet_exposure_limit"] assert WE_limit > 0.0 except: - if self.forager_mode: + if self.is_forager_mode(pside): WE_limit = ( self.config["bot"][pside]["total_wallet_exposure_limit"] / self.config["bot"][pside]["n_positions"] From 276447088c13e9239aae646b50f4dd086625e927 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Mon, 30 Sep 2024 11:35:52 -0400 Subject: [PATCH 20/32] volume clip at 0.5 and sort keys template config --- configs/template.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/configs/template.json b/configs/template.json index 88062f907..0853013e4 100644 --- a/configs/template.json +++ b/configs/template.json @@ -20,8 +20,8 @@ "entry_trailing_grid_ratio": -0.28, "entry_trailing_retracement_pct": 0.0024735, "entry_trailing_threshold_pct": -0.062799, - "filter_rolling_window": 60, - "filter_relative_volume_clip_pct": 0.95, + "filter_relative_volume_clip_pct": 0.5, + "filter_rolling_window": 60.0, "n_positions": 10.776, "total_wallet_exposure_limit": 0.97499, "unstuck_close_pct": 0.049666, @@ -45,8 +45,8 @@ "entry_trailing_grid_ratio": -0.3633, "entry_trailing_retracement_pct": 0.06044, "entry_trailing_threshold_pct": -0.084207, - "filter_rolling_window": 60, - "filter_relative_volume_clip_pct": 0.95, + "filter_relative_volume_clip_pct": 0.5, + "filter_rolling_window": 60.0, "n_positions": 7.6679, "total_wallet_exposure_limit": 0.0, "unstuck_close_pct": 0.052781, @@ -86,8 +86,8 @@ "long_entry_trailing_grid_ratio": [-1.0, 1.0], "long_entry_trailing_retracement_pct": [0.0, 0.1], "long_entry_trailing_threshold_pct": [-0.1, 0.1], - "long_filter_rolling_window": [10.0, 1440.0], "long_filter_relative_volume_clip_pct": [0.0, 1.0], + "long_filter_rolling_window": [10.0, 1440.0], "long_n_positions": [1.0, 20.0], "long_total_wallet_exposure_limit": [0.0, 5.0], "long_unstuck_close_pct": [0.001, 0.1], @@ -111,8 +111,8 @@ "short_entry_trailing_grid_ratio": [-1.0, 1.0], "short_entry_trailing_retracement_pct": [0.0, 0.1], "short_entry_trailing_threshold_pct": [-0.1, 0.1], - "short_filter_rolling_window": [10.0, 1440.0], "short_filter_relative_volume_clip_pct": [0.0, 1.0], + "short_filter_rolling_window": [10.0, 1440.0], "short_n_positions": [1.0, 20.0], "short_total_wallet_exposure_limit": [0.0, 5.0], "short_unstuck_close_pct": [0.001, 0.1], From a29502ea1a48eed11865c53fe01d3dc3a869bacd Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Tue, 1 Oct 2024 11:11:27 -0400 Subject: [PATCH 21/32] use shared memory to avoid data duplication during multiprocessing --- passivbot-rust/src/backtest.rs | 8 ++++---- passivbot-rust/src/python.rs | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/passivbot-rust/src/backtest.rs b/passivbot-rust/src/backtest.rs index fe8ad44b8..f8032c01a 100644 --- a/passivbot-rust/src/backtest.rs +++ b/passivbot-rust/src/backtest.rs @@ -116,8 +116,8 @@ pub struct RollingVolumeSum { prev_k_short: usize, } -pub struct Backtest { - hlcvs: Array3, // 3D array: (n_timesteps, n_coins, 4) +pub struct Backtest<'a> { + hlcvs: &'a Array3, bot_params_pair: BotParamsPair, exchange_params_list: Vec, backtest_params: BacktestParams, @@ -145,9 +145,9 @@ pub struct Backtest { volume_indices_buffer: Option>, } -impl Backtest { +impl<'a> Backtest<'a> { pub fn new( - hlcvs: Array3, + hlcvs: &'a Array3, bot_params_pair: BotParamsPair, exchange_params_list: Vec, backtest_params: &BacktestParams, diff --git a/passivbot-rust/src/python.rs b/passivbot-rust/src/python.rs index dc4983e7f..dd88e86bc 100644 --- a/passivbot-rust/src/python.rs +++ b/passivbot-rust/src/python.rs @@ -11,6 +11,8 @@ use crate::types::{ Analysis, BacktestParams, BotParams, BotParamsPair, EMABands, ExchangeParams, Order, OrderBook, Position, StateParams, TrailingPriceBundle, }; +use memmap::MmapOptions; +use ndarray::ShapeBuilder; use ndarray::{Array1, Array2, Array3, Array4, ArrayBase, ArrayD}; use numpy::{ IntoPyArray, PyArray1, PyArray2, PyArray3, PyArray4, PyReadonlyArray2, PyReadonlyArray3, @@ -20,15 +22,40 @@ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::types::{PyDict, PyList}; use pyo3::wrap_pyfunction; +use std::{fs::File, slice}; #[pyfunction] pub fn run_backtest( - hlcvs: PyReadonlyArray3, + shared_memory_file: &str, + hlcvs_shape: (usize, usize, usize), + hlcvs_dtype: &str, bot_params_pair_dict: &PyDict, exchange_params_list: &PyAny, backtest_params_dict: &PyDict, ) -> PyResult<(Py>, Py>, Py)> { - let hlcvs_rust = hlcvs.as_array(); + // Open the memory-mapped file + let file = File::open(shared_memory_file) + .map_err(|e| PyValueError::new_err(format!("Unable to open shared memory file: {}", e)))?; + + let mmap = unsafe { + MmapOptions::new() + .map(&file) + .map_err(|e| PyValueError::new_err(format!("Unable to map file: {}", e)))? + }; + + // Create an ndarray view of the memory-mapped file + let hlcvs_rust = unsafe { + match hlcvs_dtype { + " { + let data = slice::from_raw_parts( + mmap.as_ptr() as *const f64, + hlcvs_shape.0 * hlcvs_shape.1 * hlcvs_shape.2, + ); + Array3::::from_shape_vec(hlcvs_shape.into_shape(), data.to_vec()).unwrap() + } + _ => return Err(PyValueError::new_err("Unsupported dtype for HLCV data")), + } + }; let bot_params_pair = bot_params_pair_from_dict(bot_params_pair_dict)?; let exchange_params = { @@ -55,7 +82,7 @@ pub fn run_backtest( let backtest_params = backtest_params_from_dict(backtest_params_dict)?; let mut backtest = Backtest::new( - hlcvs_rust.to_owned(), + &hlcvs_rust, bot_params_pair, exchange_params, &backtest_params, From d27d688f7b586bb6555e65d8009e8c9733b19ecf Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Tue, 1 Oct 2024 11:11:47 -0400 Subject: [PATCH 22/32] new dependency memmap --- passivbot-rust/Cargo.lock | 33 +++++++++++++++++++++++++++++++++ passivbot-rust/Cargo.toml | 1 + 2 files changed, 34 insertions(+) diff --git a/passivbot-rust/Cargo.lock b/passivbot-rust/Cargo.lock index a71c50a4b..d393ee37b 100644 --- a/passivbot-rust/Cargo.lock +++ b/passivbot-rust/Cargo.lock @@ -58,6 +58,16 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "memmap" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "memoffset" version = "0.9.1" @@ -155,6 +165,7 @@ dependencies = [ name = "passivbot_rust" version = "0.1.0" dependencies = [ + "memmap", "ndarray", "numpy", "pyo3", @@ -309,6 +320,28 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-targets" version = "0.52.5" diff --git a/passivbot-rust/Cargo.toml b/passivbot-rust/Cargo.toml index 8a4163dad..e5ad785dd 100644 --- a/passivbot-rust/Cargo.toml +++ b/passivbot-rust/Cargo.toml @@ -11,3 +11,4 @@ crate-type = ["cdylib"] pyo3 = { version = "0.21.2", features = ["extension-module"] } ndarray = "0.15.6" numpy = "0.21.0" +memmap = "0.7.0" \ No newline at end of file From c3a8b43da3c7979e43d0a4b6425ab5292dc44725 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Tue, 1 Oct 2024 11:12:15 -0400 Subject: [PATCH 23/32] default to template config if config path not passed --- src/backtest.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/backtest.py b/src/backtest.py index 2a2b19bc3..a23d40970 100644 --- a/src/backtest.py +++ b/src/backtest.py @@ -222,7 +222,9 @@ async def main(): datefmt="%Y-%m-%dT%H:%M:%S", ) parser = argparse.ArgumentParser(prog="backtest", description="run forager backtest") - parser.add_argument("config_path", type=str, default=None, help="path to hjson passivbot config") + parser.add_argument( + "config_path", type=str, default=None, nargs="?", help="path to json passivbot config" + ) template_config = get_template_live_config("v7") del template_config["optimize"] keep_live_keys = { @@ -235,7 +237,12 @@ async def main(): del template_config["live"][key] add_arguments_recursively(parser, template_config) args = parser.parse_args() - config = load_config("configs/template.hjson" if args.config_path is None else args.config_path) + if args.config_path is None: + logging.info(f"loading default template config configs/template.json") + config = load_config("configs/template.json") + else: + logging.info(f"loading config {args.config_path}") + config = load_config(args.config_path) update_config_with_args(config, args) config = format_config(config) symbols, hlcvs, mss, results_path = await prepare_hlcvs_mss(config) From 00d3f8a50197a0c4d985fd1aca4222a387752ca9 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Tue, 1 Oct 2024 11:12:40 -0400 Subject: [PATCH 24/32] use shared memory for parallel optimizing --- src/optimize.py | 70 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 16 deletions(-) diff --git a/src/optimize.py b/src/optimize.py index 0fae3a202..914d8e9c4 100644 --- a/src/optimize.py +++ b/src/optimize.py @@ -5,6 +5,7 @@ import argparse import multiprocessing import subprocess +import mmap from multiprocessing import shared_memory from backtest import ( prepare_hlcvs_mss, @@ -37,6 +38,22 @@ import json import pprint from deap import base, creator, tools, algorithms +from contextlib import contextmanager +import tempfile + + +def create_shared_memory_file(hlcvs): + temp_file = tempfile.NamedTemporaryFile(delete=False) + shared_memory_file = temp_file.name + + try: + with open(shared_memory_file, "wb") as f: + f.write(hlcvs.tobytes()) + except IOError as e: + print(f"Error writing to shared memory file: {e}") + raise + + return shared_memory_file def mutPolynomialBoundedWrapper(individual, eta, low, up, indpb): @@ -144,18 +161,24 @@ def config_to_individual(config): return individual +@contextmanager +def managed_mmap(filename, dtype, shape): + try: + mmap = np.memmap(filename, dtype=dtype, mode="r", shape=shape) + yield mmap + finally: + del mmap + + class Evaluator: - def __init__(self, hlcvs, config, mss): - self.hlcvs = hlcvs - self.shared_hlcvs = shared_memory.SharedMemory(create=True, size=self.hlcvs.nbytes) - self.shared_hlcvs_np = np.ndarray( - self.hlcvs.shape, dtype=self.hlcvs.dtype, buffer=self.shared_hlcvs.buf - ) - np.copyto(self.shared_hlcvs_np, self.hlcvs) - del self.hlcvs + def __init__(self, shared_memory_file, hlcvs_shape, hlcvs_dtype, config, mss): + self.shared_memory_file = shared_memory_file + self.hlcvs_shape = hlcvs_shape + self.hlcvs_dtype = hlcvs_dtype + self.mmap_context = managed_mmap(self.shared_memory_file, self.hlcvs_dtype, self.hlcvs_shape) + self.shared_hlcvs_np = self.mmap_context.__enter__() self.config = config - _, self.exchange_params, self.backtest_params = prep_backtest_args(config, mss) def evaluate(self, individual): @@ -164,7 +187,9 @@ def evaluate(self, individual): config, [], exchange_params=self.exchange_params, backtest_params=self.backtest_params ) fills, equities, analysis = pbr.run_backtest( - self.shared_hlcvs_np, + self.shared_memory_file, + self.shared_hlcvs_np.shape, + self.shared_hlcvs_np.dtype.str, bot_params, self.exchange_params, self.backtest_params, @@ -193,10 +218,22 @@ def calc_fitness(self, analysis): w_1 = modifier - analysis[self.config["optimize"]["scoring"][1]] return w_0, w_1 - def cleanup(self): - # Close and unlink the shared memory - self.shared_hlcvs.close() - self.shared_hlcvs.unlink() + def __del__(self): + if hasattr(self, "mmap_context"): + self.mmap_context.__exit__(None, None, None) + + def __getstate__(self): + # This method is called when pickling. We exclude mmap_context and shared_hlcvs_np + state = self.__dict__.copy() + del state["mmap_context"] + del state["shared_hlcvs_np"] + return state + + def __setstate__(self, state): + # This method is called when unpickling. We recreate mmap_context and shared_hlcvs_np + self.__dict__.update(state) + self.mmap_context = managed_mmap(self.shared_memory_file, self.hlcvs_dtype, self.hlcvs_shape) + self.shared_hlcvs_np = self.mmap_context.__enter__() def add_extra_options(parser): @@ -283,7 +320,8 @@ async def main(): f"optimize_results/{date_fname}_{coins_fname}_{hash_snippet}_all_results.txt" ) try: - evaluator = Evaluator(hlcvs, config, mss) + shared_memory_file = create_shared_memory_file(hlcvs) + evaluator = Evaluator(shared_memory_file, hlcvs.shape, hlcvs.dtype, config, mss) creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -1.0)) # Minimize both objectives creator.create("Individual", list, fitness=creator.FitnessMulti) @@ -398,7 +436,7 @@ def create_individual(): finally: # Close the pool logging.info(f"attempting clean shutdown...") - evaluator.cleanup() + os.unlink(shared_memory_file) sys.exit(0) # pool.close() # pool.join() From 5bc025140b6df94298b591bdf2db9cb3a623386f Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Tue, 1 Oct 2024 12:14:13 -0400 Subject: [PATCH 25/32] adapt to shared memory --- src/backtest.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/backtest.py b/src/backtest.py index a23d40970..ad00312a7 100644 --- a/src/backtest.py +++ b/src/backtest.py @@ -28,6 +28,22 @@ import logging from main import manage_rust_compilation +import tempfile +from contextlib import contextmanager + + +@contextmanager +def create_shared_memory_file(hlcvs): + temp_file = tempfile.NamedTemporaryFile(delete=False) + shared_memory_file = temp_file.name + try: + with open(shared_memory_file, "wb") as f: + f.write(hlcvs.tobytes()) + yield shared_memory_file + finally: + os.unlink(shared_memory_file) + + plt.rcParams["figure.figsize"] = [29, 18] @@ -167,7 +183,17 @@ def run_backtest(hlcvs, mss, config: dict): bot_params, exchange_params, backtest_params = prep_backtest_args(config, mss) print(f"Starting backtest...") sts = utc_ms() - fills, equities, analysis = pbr.run_backtest(hlcvs, bot_params, exchange_params, backtest_params) + + with create_shared_memory_file(hlcvs) as shared_memory_file: + fills, equities, analysis = pbr.run_backtest( + shared_memory_file, + hlcvs.shape, + hlcvs.dtype.str, + bot_params, + exchange_params, + backtest_params, + ) + print(f"seconds elapsed for backtest: {(utc_ms() - sts) / 1000:.4f}") return fills, equities, analysis From 04ad09818d37bb10269f2ba8bd0d30d9334e2150 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Tue, 1 Oct 2024 12:16:11 -0400 Subject: [PATCH 26/32] rewrite cleanup --- src/optimize.py | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/optimize.py b/src/optimize.py index 914d8e9c4..509b9756f 100644 --- a/src/optimize.py +++ b/src/optimize.py @@ -32,7 +32,6 @@ from main import manage_rust_compilation import numpy as np from uuid import uuid4 -import signal import logging import traceback import json @@ -136,11 +135,6 @@ def cxSimulatedBinaryBoundedWrapper(ind1, ind2, eta, low, up): return ind1, ind2 -def signal_handler(signal, frame): - print("\nOptimization interrupted by user. Exiting gracefully...") - sys.exit(0) - - def individual_to_config(individual, template=None): if template is None: template = get_template_live_config("v7") @@ -163,11 +157,18 @@ def config_to_individual(config): @contextmanager def managed_mmap(filename, dtype, shape): + mmap = None try: mmap = np.memmap(filename, dtype=dtype, mode="r", shape=shape) yield mmap + except FileNotFoundError: + if shutdown_event.is_set(): + yield None + else: + raise finally: - del mmap + if mmap is not None: + del mmap class Evaluator: @@ -230,10 +231,11 @@ def __getstate__(self): return state def __setstate__(self, state): - # This method is called when unpickling. We recreate mmap_context and shared_hlcvs_np self.__dict__.update(state) self.mmap_context = managed_mmap(self.shared_memory_file, self.hlcvs_dtype, self.hlcvs_shape) self.shared_hlcvs_np = self.mmap_context.__enter__() + if self.shared_hlcvs_np is None: + print("Warning: Unable to recreate shared memory mapping during unpickling.") def add_extra_options(parser): @@ -295,7 +297,6 @@ async def main(): add_arguments_recursively(parser, template_config) add_extra_options(parser) args = parser.parse_args() - signal.signal(signal.SIGINT, signal_handler) logging.basicConfig( format="%(asctime)s %(levelname)-8s %(message)s", level=logging.INFO, @@ -419,6 +420,7 @@ def create_individual(): print(logbook) logging.info(f"Optimization complete.") + try: logging.info(f"Extracting best config...") result = subprocess.run( @@ -430,16 +432,25 @@ def create_individual(): print(result.stdout) except Exception as e: logging.error(f"failed to extract best config {e}") - ######## except Exception as e: + logging.error(f"An error occurred: {e}") traceback.print_exc() finally: - # Close the pool - logging.info(f"attempting clean shutdown...") - os.unlink(shared_memory_file) + if "pool" in locals(): + logging.info("Closing and terminating the process pool...") + pool.close() + pool.terminate() + pool.join() + + if shared_memory_file and os.path.exists(shared_memory_file): + logging.info(f"Removing shared memory file: {shared_memory_file}") + try: + os.unlink(shared_memory_file) + except Exception as e: + logging.error(f"Error removing shared memory file: {e}") + + logging.info("Cleanup complete. Exiting.") sys.exit(0) - # pool.close() - # pool.join() if __name__ == "__main__": From 55dd211e1cae61fcbd45b1c5a48220e1e1421510 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Wed, 2 Oct 2024 11:46:41 -0400 Subject: [PATCH 27/32] log loading config with -lc --- src/passivbot.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/passivbot.py b/src/passivbot.py index 2cad78ea2..3a26d3498 100644 --- a/src/passivbot.py +++ b/src/passivbot.py @@ -277,6 +277,9 @@ def set_live_configs(self): if symbol in self.flags and self.flags[symbol].live_config_path is not None: try: loaded = load_config(self.flags[symbol].live_config_path) + logging.info( + f"successfully loaded {self.flags[symbol].live_config_path} for {symbol}" + ) for pside in loaded["bot"]: for k, v in loaded["bot"][pside].items(): if k not in skip: From cb6c81d3ceaa9cf37a0570d34797acea6f7089f3 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Wed, 2 Oct 2024 12:14:13 -0400 Subject: [PATCH 28/32] compile rust if never compiled --- src/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.py b/src/main.py index 84bb62b06..a89bf113b 100644 --- a/src/main.py +++ b/src/main.py @@ -1,5 +1,4 @@ import asyncio -from passivbot import main import os import time import subprocess @@ -98,4 +97,5 @@ def manage_rust_compilation(): if __name__ == "__main__": manage_rust_compilation() + from passivbot import main asyncio.run(main()) From 0fb5c609d7049087734f24f42d7e360d2538a515 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Wed, 2 Oct 2024 12:57:13 -0400 Subject: [PATCH 29/32] add prints --- src/main.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main.py b/src/main.py index a89bf113b..046431b03 100644 --- a/src/main.py +++ b/src/main.py @@ -26,6 +26,7 @@ def check_compilation_needed(): # Find the most recently modified compiled extension compiled_files = [path for path in COMPILED_EXTENSION_PATHS if os.path.exists(path)] if not compiled_files: + print(f"No Rust extension found. Compiling...") return True # No extension found, compilation needed compiled_time = max(os.path.getmtime(path) for path in compiled_files) @@ -36,6 +37,7 @@ def check_compilation_needed(): if file.endswith(".rs"): file_path = os.path.join(root, file) if os.path.getmtime(file_path) > compiled_time: + print(f"Rust extension found, but out of date. Recompiling...") return True # A source file is newer, compilation needed return False # No compilation needed @@ -98,4 +100,5 @@ def manage_rust_compilation(): if __name__ == "__main__": manage_rust_compilation() from passivbot import main + asyncio.run(main()) From 757db2deec7258e8c22808024feaa2fa2fadf004 Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Wed, 2 Oct 2024 12:57:33 -0400 Subject: [PATCH 30/32] print full path in recursive_config_update --- src/procedures.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/procedures.py b/src/procedures.py index 248bf7435..5f8d1e7d8 100644 --- a/src/procedures.py +++ b/src/procedures.py @@ -1246,15 +1246,22 @@ def add_arguments_recursively(parser, config, prefix="", acronyms=set()): acronyms.add(acronym) -def recursive_config_update(config, key, value): +def recursive_config_update(config, key, value, path=None): + if path is None: + path = [] + if key in config: if value != config[key]: - print(f"changed {key} {config[key]} -> {value}") + full_path = ".".join(path + [key]) + print(f"changed {full_path} {config[key]} -> {value}") config[key] = value return True + key_split = key.split("_") if key_split[0] in config: - return recursive_config_update(config[key_split[0]], "_".join(key_split[1:]), value) + new_path = path + [key_split[0]] + return recursive_config_update(config[key_split[0]], "_".join(key_split[1:]), value, new_path) + return False From 23c0e408e755b373cf5bdb45300dbaf558a4473b Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Wed, 2 Oct 2024 14:18:36 -0400 Subject: [PATCH 31/32] use npy instead of csv; cleanup code --- src/downloader.py | 1310 +++------------------------------------------ 1 file changed, 69 insertions(+), 1241 deletions(-) diff --git a/src/downloader.py b/src/downloader.py index 0c8686745..3ce5454d4 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -36,938 +36,6 @@ from pure_funcs import ts_to_date, ts_to_date_utc, date_to_ts2, get_dummy_settings, get_day, numpyize -class Downloader: - """ - Downloader class for tick data. Fetches data from specified time until now or specified time. - """ - - def __init__(self, config: dict): - self.fetch_delay_seconds = 0.75 - self.config = config - # use binance data for bybit - self.config["exchange"] = ( - "binance" - if self.config["exchange"] in ["bybit", "bitget", "okx", "kucoin"] - else self.config["exchange"] - ) - self.spot = "spot" in config and config["spot"] - self.tick_filepath = os.path.join( - config["caches_dirpath"], f"{config['session_name']}_ticks_cache.npy" - ) - try: - self.start_time = int( - parser.parse(self.config["start_date"]) - .replace(tzinfo=datetime.timezone.utc) - .timestamp() - * 1000 - ) - except Exception: - raise Exception(f"Unrecognized date format for start time {config['start_date']}") - try: - self.end_time = int( - parser.parse(self.config["end_date"]) - .replace(tzinfo=datetime.timezone.utc) - .timestamp() - * 1000 - ) - if self.end_time > utc_ms(): - raise Exception(f"End date later than current time {config['end_date']}") - except Exception: - raise Exception(f"Unrecognized date format for end time {config['end_date']}") - if self.config["exchange"] == "binance": - if self.spot: - self.daily_base_url = "https://data.binance.vision/data/spot/daily/aggTrades/" - self.monthly_base_url = "https://data.binance.vision/data/spot/monthly/aggTrades/" - else: - market_type = "cm" if config["inverse"] else "um" - self.daily_base_url = ( - f"https://data.binance.vision/data/futures/{market_type}/daily/aggTrades/" - ) - self.monthly_base_url = ( - f"https://data.binance.vision/data/futures/{market_type}/monthly/aggTrades/" - ) - elif self.config["exchange"] == "bybit": - self.daily_base_url = "https://public.bybit.com/trading/" - else: - raise Exception(f"unknown exchange {config['exchange']}") - if "historical_data_path" in self.config and self.config["historical_data_path"]: - self.filepath = make_get_filepath( - os.path.join( - self.config["historical_data_path"], - "historical_data", - self.config["exchange"], - f"agg_trades_{'spot' if self.spot else 'futures'}", - self.config["symbol"], - "", - ) - ) - else: - self.filepath = make_get_filepath( - os.path.join( - "historical_data", - self.config["exchange"], - f"agg_trades_{'spot' if self.spot else 'futures'}", - self.config["symbol"], - "", - ) - ) - - def validate_dataframe(self, df: pd.DataFrame) -> Tuple[bool, pd.DataFrame, pd.DataFrame]: - """ - Validates a dataframe and detects gaps in it. Also detects missing trades in the beginning and end. - @param df: Dataframe to check for gaps. - @return: A tuple with following result: if missing values present, the cleaned dataframe, a dataframe with start and end of gaps. - """ - df.sort_values("trade_id", inplace=True) - df.drop_duplicates("trade_id", inplace=True) - df.reset_index(drop=True, inplace=True) - missing_end_frame = df["trade_id"][df["trade_id"].diff() != 1] - gaps = pd.DataFrame() - gaps["start"] = df.iloc[missing_end_frame[1:].index - 1]["trade_id"].tolist() - gaps["end"] = missing_end_frame[1:].tolist() - missing_ids = df["trade_id"].iloc[0] % 100000 - if missing_ids != 0: - gaps = gaps.append( - { - "start": df["trade_id"].iloc[0] - missing_ids, - "end": df["trade_id"].iloc[0] - 1, - }, - ignore_index=True, - ) - missing_ids = df["trade_id"].iloc[-1] % 100000 - if missing_ids != 99999: - gaps = gaps.append( - { - "start": df["trade_id"].iloc[-1], - "end": df["trade_id"].iloc[-1] + (100000 - missing_ids - 1), - }, - ignore_index=True, - ) - if gaps.empty: - return False, df, gaps - else: - gaps["start"] = gaps["start"].astype(np.int64) - gaps["end"] = gaps["end"].astype(np.int64) - gaps.sort_values("start", inplace=True) - gaps.reset_index(drop=True, inplace=True) - gaps["start"] = gaps["start"].replace(0, 1) - return True, df, gaps - - def read_dataframe(self, path: str) -> pd.DataFrame: - """ - Reads a dataframe with correct data types. - @param path: The path to the dataframe. - @return: The read dataframe. - """ - try: - df = pd.read_csv( - path, - dtype={ - "trade_id": np.int64, - "price": np.float64, - "qty": np.float64, - "timestamp": np.int64, - "is_buyer_maker": np.int8, - }, - ) - except ValueError as e: - df = pd.DataFrame() - print_(["Error in reading dataframe", e]) - return df - - def save_dataframe(self, df: pd.DataFrame, filename: str, missing: bool, verified: bool) -> str: - """ - Saves a processed dataframe. Creates the name based on first and last trade id and first and last timestamp. - Deletes dataframes that are obsolete. For example, when gaps were filled. - @param df: The dataframe to save. - @param filename: The current name of the dataframe. - @param missing: If the dataframe had gaps. - @return: - """ - if verified: - new_name = f'{df["trade_id"].iloc[0]}_{df["trade_id"].iloc[-1]}_{df["timestamp"].iloc[0]}_{df["timestamp"].iloc[-1]}_verified.csv' - else: - new_name = f'{df["trade_id"].iloc[0]}_{df["trade_id"].iloc[-1]}_{df["timestamp"].iloc[0]}_{df["timestamp"].iloc[-1]}.csv' - if new_name != filename: - print_( - [ - "Saving file", - new_name, - ts_to_date(int(new_name.split("_")[2]) / 1000), - ] - ) - df.to_csv(os.path.join(self.filepath, new_name), index=False) - new_name = "" - try: - os.remove(os.path.join(self.filepath, filename)) - print_(["Removed file", filename]) - except: - pass - elif missing: - print_(["Replacing file", filename]) - df.to_csv(os.path.join(self.filepath, filename), index=False) - else: - new_name = "" - return new_name - - def transform_ticks(self, ticks: list) -> pd.DataFrame: - """ - Transforms tick data into a cleaned dataframe with correct data types. - @param ticks: List of tick dictionaries. - @return: Clean dataframe with correct data types. - """ - df = pd.DataFrame(ticks) - if not df.empty: - df["trade_id"] = df["trade_id"].astype(np.int64) - df["price"] = df["price"].astype(np.float64) - df["qty"] = df["qty"].astype(np.float64) - df["timestamp"] = df["timestamp"].astype(np.int64) - df["is_buyer_maker"] = df["is_buyer_maker"].astype(np.int8) - df.sort_values("trade_id", inplace=True) - df.drop_duplicates("trade_id", inplace=True) - df.reset_index(drop=True, inplace=True) - return df - - def get_filenames(self) -> list: - """ - Returns a sorted list of all file names in the directory. - @return: Sorted list of file names. - """ - return sorted( - [f for f in os.listdir(self.filepath) if f.endswith(".csv")], - key=lambda x: int(eval(x[: x.find("_")].replace(".cs", "").replace("v", ""))), - ) - - def new_id( - self, - first_timestamp, - last_timestamp, - first_trade_id, - length, - start_time, - prev_div, - ): - """ - Calculates a new id based on several parameters. Uses a weighted approach for more stability. - @param first_timestamp: First timestamp in current result. - @param last_timestamp: Last timestamp in current result. - @param first_trade_id: First trade id in current result. - @param length: The amount of trades in the current result. - @param start_time: The time to look for. - @param prev_div: Previous results of this function. - @return: Estimated trade id. - """ - div = int((last_timestamp - first_timestamp) / length) - prev_div.append(div) - forward = int((first_timestamp - start_time) / np.mean(prev_div)) - return max(1, int(first_trade_id - forward)), prev_div, forward - - async def find_time(self, start_time) -> pd.DataFrame: - """ - Finds the trades according to the time. - Uses different approaches for exchanges depending if time based fetching is supported. - If time based searching is supported, directly fetch the data. - If time based searching is not supported, start with current trades and move closer to start time based on estimation. - @param start_time: Time to look for. - @return: Dataframe with first trade later or equal to start time. - """ - try: - ticks = await self.bot.fetch_ticks_time(start_time) - return self.transform_ticks(ticks) - except: - print_(["Finding id for start time..."]) - ticks = await self.bot.fetch_ticks() - df = self.transform_ticks(ticks) - highest_id = df["trade_id"].iloc[-1] - prev_div = [] - first_ts = df["timestamp"].iloc[0] - last_ts = df["timestamp"].iloc[-1] - first_id = df["trade_id"].iloc[0] - length = len(df) - while not start_time >= first_ts or not start_time <= last_ts: - loop_start = time() - nw_id, prev_div, forward = self.new_id( - first_ts, last_ts, first_id, length, start_time, prev_div - ) - print_( - [ - "Current time span from", - df["timestamp"].iloc[0], - "to", - df["timestamp"].iloc[-1], - "with earliest trade id", - df["trade_id"].iloc[0], - "estimating distance of", - forward, - "trades", - ] - ) - if nw_id > highest_id: - nw_id = highest_id - try: - ticks = await self.bot.fetch_ticks(from_id=int(nw_id), do_print=False) - df = self.transform_ticks(ticks) - if not df.empty: - first_ts = df["timestamp"].iloc[0] - last_ts = df["timestamp"].iloc[-1] - first_id = df["trade_id"].iloc[0] - length = len(df) - if nw_id == 1 and first_ts >= start_time: - break - except Exception: - print("Failed to fetch or transform...") - await asyncio.sleep(max(0.0, self.fetch_delay_seconds - time() + loop_start)) - print_(["Found id for start time!"]) - return df[df["timestamp"] >= start_time] - - def get_zip(self, base_url, symbol, date): - """ - Fetches a full day of trades from the Binance repository. - @param symbol: Symbol to fetch. - @param date: Day to download. - @return: Dataframe with full day. - """ - print_(["Fetching", symbol, date]) - url = f"{base_url}{symbol.upper()}/{symbol.upper()}-aggTrades-{date}.zip" - print(url) - df = pd.DataFrame(columns=["trade_id", "price", "qty", "timestamp", "is_buyer_maker"]) - column_names = [ - "trade_id", - "price", - "qty", - "first", - "last", - "timestamp", - "is_buyer_maker", - ] - if self.spot: - column_names.append("best_match") - try: - resp = urlopen(url) - file_tmp = BytesIO() - with tqdm.wrapattr( - open(os.devnull, "wb"), "write", miniters=1, total=getattr(resp, "length", None) - ) as fout: - for chunk in resp: - fout.write(chunk) - file_tmp.write(chunk) - - with zipfile.ZipFile(file_tmp) as my_zip_file: - for contained_file in my_zip_file.namelist(): - tf = pd.read_csv(my_zip_file.open(contained_file), names=column_names) - if tf.trade_id.iloc[0] == "agg_trade_id": - # catch cases where header is included as first row - print("header in first row: attempting fix...") - tf = tf.iloc[1:].reset_index() - tf.is_buyer_maker = tf.is_buyer_maker == "true" - tf.drop( - errors="ignore", - columns=["index"], - inplace=True, - ) - tf.drop( - errors="ignore", - columns=["first", "last", "best_match"], - inplace=True, - ) - tf["trade_id"] = tf["trade_id"].astype(np.int64) - tf["price"] = tf["price"].astype(np.float64) - tf["qty"] = tf["qty"].astype(np.float64) - tf["timestamp"] = tf["timestamp"].astype(np.int64) - tf["is_buyer_maker"] = tf["is_buyer_maker"].astype(np.int8) - tf.sort_values("trade_id", inplace=True) - tf.drop_duplicates("trade_id", inplace=True) - tf.reset_index(drop=True, inplace=True) - if df.empty: - df = tf - else: - df = pd.concat([df, tf]) - except Exception as e: - print("Failed to fetch", date, e) - return df - - async def find_df_enclosing_timestamp(self, timestamp, guessed_chunk=None): - if guessed_chunk is not None: - if guessed_chunk[0]["timestamp"] < timestamp < guessed_chunk[-1]["timestamp"]: - print_(["found id"]) - return self.transform_ticks(guessed_chunk) - else: - guessed_chunk = sorted( - await self.bot.fetch_ticks(do_print=False), key=lambda x: x["trade_id"] - ) - return await self.find_df_enclosing_timestamp(timestamp, guessed_chunk) - - if timestamp < guessed_chunk[0]["timestamp"]: - guessed_id = guessed_chunk[0]["trade_id"] - len(guessed_chunk) * ( - guessed_chunk[0]["timestamp"] - timestamp - ) / (guessed_chunk[-1]["timestamp"] - guessed_chunk[0]["timestamp"]) - else: - guessed_id = guessed_chunk[-1]["trade_id"] + len(guessed_chunk) * ( - timestamp - guessed_chunk[-1]["timestamp"] - ) / (guessed_chunk[-1]["timestamp"] - guessed_chunk[0]["timestamp"]) - guessed_id = int(guessed_id - len(guessed_chunk) / 2) - guessed_chunk = sorted( - await self.bot.fetch_ticks(guessed_id, do_print=False), - key=lambda x: x["trade_id"], - ) - print_( - [ - f"guessed_id {guessed_id} earliest ts {ts_to_date(guessed_chunk[0]['timestamp'] / 1000)[:19]} last ts {ts_to_date(guessed_chunk[-1]['timestamp'] / 1000)[:19]} target ts {ts_to_date(timestamp / 1000)[:19]}" - ] - ) - return await self.find_df_enclosing_timestamp(timestamp, guessed_chunk) - - def deduce_trade_ids(self, daily_ticks, df_for_id_matching): - for idx in [0, -1]: - match = daily_ticks[ - (daily_ticks.timestamp == df_for_id_matching.timestamp.iloc[idx]) - & (daily_ticks.price == df_for_id_matching.price.iloc[idx]) - & (daily_ticks.qty == df_for_id_matching.qty.iloc[idx]) - ] - if len(match) == 1: - id_at_match = df_for_id_matching.trade_id.iloc[idx] - return np.arange( - id_at_match - match.index[0], - id_at_match - match.index[0] + len(daily_ticks), - ) - # trade_ids = np.arange(id_at_match, id_at_match + len(daily_ticks.loc[match.index:])) - return match, id_at_match - raise Exception("unable to make trade ids") - - async def get_csv_gz_old(self, base_url, symbol, date, df_for_id_matching): - """ - Fetches a full day of trades from the Bybit repository. - @param symbol: Symbol to fetch. - @param date: Day to download. - @return: Dataframe with full day. - """ - print_(["Fetching", symbol, date]) - url = f"{base_url}{symbol.upper()}/{symbol.upper()}{date}.csv.gz" - df = pd.DataFrame(columns=["trade_id", "price", "qty", "timestamp", "is_buyer_maker"]) - try: - resp = urlopen(url) - with gzip.open(BytesIO(resp.read())) as f: - ff = pd.read_csv(f) - trade_ids = np.zeros(len(ff)).astype(np.int64) - tf = pd.DataFrame( - { - "trade_id": trade_ids, - "price": ff.price.astype(np.float64), - "qty": ff["size"].astype(np.float64), - "timestamp": (ff.timestamp * 1000).astype(np.int64), - "is_buyer_maker": (ff.side == "Sell").astype(np.int8), - } - ) - tf["trade_id"] = self.deduce_trade_ids(tf, df_for_id_matching) - tf.sort_values("timestamp", inplace=True) - tf.reset_index(drop=True, inplace=True) - del ff - df = tf - except Exception as e: - print("Failed to fetch", date, e) - return df - - async def download_ticks(self): - """ - Searches for previously downloaded files and fills gaps in them if necessary. - Downloads any missing data based on the specified time frame. - @return: - """ - if self.config["exchange"] == "binance": - if self.spot: - self.bot = await create_binance_bot_spot(get_dummy_settings(self.config)) - else: - self.bot = await create_binance_bot(get_dummy_settings(self.config)) - elif self.config["exchange"] == "bybit": - self.bot = await create_bybit_bot(get_dummy_settings(self.config)) - else: - print(self.config["exchange"], "not found") - return - - filenames = self.get_filenames() - mod_files = [] - highest_id = 0 - for f in filenames: - verified = False - try: - first_time = int(f.split("_")[2]) - last_time = int(f.split("_")[3].split(".")[0]) - if len(f.split("_")) > 4: - verified = True - except: - first_time = sys.maxsize - last_time = sys.maxsize - if ( - not verified - and last_time >= self.start_time - and (self.end_time == -1 or (first_time <= self.end_time)) - or last_time == sys.maxsize - ): - print_(["Validating file", f, ts_to_date(first_time / 1000)]) - df = self.read_dataframe(os.path.join(self.filepath, f)) - missing, df, gaps = self.validate_dataframe(df) - exists = False - if gaps.empty: - first_id = df["trade_id"].iloc[0] - self.save_dataframe(df, f, missing, True) - else: - first_id = ( - df["trade_id"].iloc[0] - if df["trade_id"].iloc[0] < gaps["start"].iloc[0] - else gaps["start"].iloc[0] - ) - if not gaps.empty and ( - f != filenames[-1] or str(first_id - first_id % 100000) not in f - ): - last_id = df["trade_id"].iloc[-1] - for i in filenames: - tmp_first_id = int(i.split("_")[0]) - tmp_last_id = int(i.split("_")[1].replace(".csv", "")) - if ( - (first_id - first_id % 100000) == tmp_first_id - and ( - (first_id - first_id % 100000 + 99999) == tmp_last_id - or (highest_id == tmp_first_id or highest_id == tmp_last_id) - or highest_id > last_id - ) - and first_id != 1 - and i != f - ): - exists = True - break - if missing and df["timestamp"].iloc[-1] > self.start_time and not exists: - current_time = df["timestamp"].iloc[-1] - for i in gaps.index: - print_( - [ - "Filling gaps from id", - gaps["start"].iloc[i], - "to id", - gaps["end"].iloc[i], - ] - ) - current_id = gaps["start"].iloc[i] - while current_id < gaps["end"].iloc[i] and utc_ms() - current_time > 10000: - loop_start = time() - try: - fetched_new_trades = await self.bot.fetch_ticks(int(current_id)) - tf = self.transform_ticks(fetched_new_trades) - if tf.empty: - print_(["Response empty. No new trades, exiting..."]) - await asyncio.sleep( - max( - 0.0, - self.fetch_delay_seconds - time() + loop_start, - ) - ) - break - if current_id == tf["trade_id"].iloc[-1]: - print_(["Same trade ID again. No new trades, exiting..."]) - await asyncio.sleep( - max( - 0.0, - self.fetch_delay_seconds - time() + loop_start, - ) - ) - break - current_id = tf["trade_id"].iloc[-1] - df = pd.concat([df, tf]) - df.sort_values("trade_id", inplace=True) - df.drop_duplicates("trade_id", inplace=True) - df = df[ - df["trade_id"] - <= gaps["end"].iloc[i] - gaps["end"].iloc[i] % 100000 + 99999 - ] - df.reset_index(drop=True, inplace=True) - current_time = df["timestamp"].iloc[-1] - except Exception: - print_(["Failed to fetch or transform..."]) - await asyncio.sleep( - max(0.0, self.fetch_delay_seconds - time() + loop_start) - ) - if not df.empty: - if df["trade_id"].iloc[-1] > highest_id: - highest_id = df["trade_id"].iloc[-1] - if not exists: - tf = df[df["trade_id"].mod(100000) == 0] - if len(tf) > 1: - df = df[: tf.index[-1]] - nf = self.save_dataframe(df, f, missing, verified) - mod_files.append(nf) - elif df["trade_id"].iloc[0] != 1: - os.remove(os.path.join(self.filepath, f)) - print_(["Removed file fragment", f]) - - chunk_gaps = [] - filenames = self.get_filenames() - prev_last_id = 0 - prev_last_time = self.start_time - for f in filenames: - first_id = int(f.split("_")[0]) - last_id = int(f.split("_")[1]) - first_time = int(f.split("_")[2]) - last_time = int(f.split("_")[3].split(".")[0]) - if ( - first_id - 1 != prev_last_id - and f not in mod_files - and first_time >= prev_last_time - and first_time >= self.start_time - and not prev_last_time > self.end_time - ): - chunk_gaps.append((prev_last_time, first_time, prev_last_id, first_id - 1)) - if first_time >= self.start_time or last_time >= self.start_time: - prev_last_id = last_id - prev_last_time = last_time - - if len(filenames) < 1: - chunk_gaps.append((self.start_time, self.end_time, 0, 0)) - if prev_last_time < self.end_time: - chunk_gaps.append((prev_last_time, self.end_time, prev_last_id, 0)) - - seen = set() - chunk_gaps_dedup = [] - for elm in chunk_gaps: - if elm not in seen: - chunk_gaps_dedup.append(elm) - seen.add(elm) - chunk_gaps = chunk_gaps_dedup - - for gaps in chunk_gaps: - start_time, end_time, start_id, end_id = gaps - df = pd.DataFrame() - - current_id = start_id + 1 - current_time = start_time - - if self.config["exchange"] == "binance": - fetched_new_trades = await self.bot.fetch_ticks(1) - tf = self.transform_ticks(fetched_new_trades) - earliest = tf["timestamp"].iloc[0] - - if earliest > start_time: - start_time = earliest - current_time = start_time - - tmp = pd.date_range( - start=datetime.datetime.fromtimestamp( - start_time / 1000, datetime.timezone.utc - ).date(), - end=datetime.datetime.fromtimestamp( - end_time / 1000, datetime.timezone.utc - ).date(), - freq="D", - ).to_pydatetime() - days = [date.strftime("%Y-%m-%d") for date in tmp] - df = pd.DataFrame(columns=["trade_id", "price", "qty", "timestamp", "is_buyer_maker"]) - - months_done = set() - months_failed = set() - for day in days: - month = day[:7] - if month in months_done: - continue - if month in months_failed: - tf = self.get_zip(self.daily_base_url, self.config["symbol"], day) - if tf.empty: - print_(["failed to fetch daily", day]) - continue - else: - tf = self.get_zip(self.monthly_base_url, self.config["symbol"], month) - if tf.empty: - print_(["failed to fetch monthly", month]) - months_failed.add(month) - tf = self.get_zip(self.daily_base_url, self.config["symbol"], day) - else: - months_done.add(month) - tf = tf[tf["timestamp"] >= start_time] - tf = tf[tf["timestamp"] <= end_time] - if start_id != 0: - tf = tf[tf["trade_id"] > start_id] - if end_id != 0: - tf = tf[tf["trade_id"] <= end_id] - if df.empty: - df = tf - else: - df = pd.concat([df, tf]) - df.sort_values("trade_id", inplace=True) - df.drop_duplicates("trade_id", inplace=True) - df.reset_index(drop=True, inplace=True) - - if not df.empty and ( - (df["trade_id"].iloc[0] % 100000 == 0 and len(df) >= 100000) - or df["trade_id"].iloc[0] % 100000 != 0 - ): - for index, row in df[df["trade_id"] % 100000 == 0].iterrows(): - if index != 0: - self.save_dataframe( - df[ - (df["trade_id"] >= row["trade_id"] - 1000000) - & (df["trade_id"] < row["trade_id"]) - ], - "", - True, - False, - ) - df = df[df["trade_id"] >= row["trade_id"]] - if not df.empty: - start_id = df["trade_id"].iloc[0] - 1 - start_time = df["timestamp"].iloc[0] - current_time = df["timestamp"].iloc[-1] - current_id = df["trade_id"].iloc[-1] + 1 - if start_id == 0: - df = await self.find_time(start_time) - current_id = df["trade_id"].iloc[-1] + 1 - current_time = df["timestamp"].iloc[-1] - - end_id = sys.maxsize if end_id == 0 else end_id - 1 - - if current_id <= end_id and current_time <= end_time and utc_ms() - current_time > 10000: - print_( - [ - "Downloading from", - ts_to_date(float(current_time) / 1000), - "to", - ts_to_date(float(end_time) / 1000), - ] - ) - - while ( - current_id <= end_id and current_time <= end_time and utc_ms() - current_time > 10000 - ): - loop_start = time() - fetched_new_trades = await self.bot.fetch_ticks(int(current_id)) - tf = self.transform_ticks(fetched_new_trades) - if tf.empty: - print_(["Response empty. No new trades, exiting..."]) - await asyncio.sleep(max(0.0, self.fetch_delay_seconds - time() + loop_start)) - break - if current_id == tf["trade_id"].iloc[-1]: - print_(["Same trade ID again. No new trades, exiting..."]) - await asyncio.sleep(max(0.0, self.fetch_delay_seconds - time() + loop_start)) - break - df = pd.concat([df, tf]) - df.sort_values("trade_id", inplace=True) - df.drop_duplicates("trade_id", inplace=True) - df.reset_index(drop=True, inplace=True) - current_time = tf["timestamp"].iloc[-1] - current_id = tf["trade_id"].iloc[-1] + 1 - tf = df[df["trade_id"].mod(100000) == 0] - if not tf.empty and len(df) > 1: - if df["trade_id"].iloc[0] % 100000 == 0 and len(tf) > 1: - self.save_dataframe(df[: tf.index[-1]], "", True, False) - df = df[tf.index[-1] :] - elif df["trade_id"].iloc[0] % 100000 != 0 and len(tf) == 1: - self.save_dataframe(df[: tf.index[-1]], "", True, False) - df = df[tf.index[-1] :] - await asyncio.sleep(max(0.0, self.fetch_delay_seconds - time() + loop_start)) - if not df.empty: - df = df[df["timestamp"] >= start_time] - if start_id != 0 and not df.empty: - df = df[df["trade_id"] > start_id] - elif end_id != sys.maxsize and not df.empty: - df = df[df["trade_id"] <= end_id] - elif end_time != sys.maxsize and not df.empty: - df = df[df["timestamp"] <= end_time] - if not df.empty: - self.save_dataframe(df, "", True, False) - - try: - await self.bot.session.close() - except: - pass - - async def prepare_files(self): - """ - Takes downloaded data and prepares a numpy array for use in backtesting. - @return: - """ - filenames = [ - f - for f in self.get_filenames() - if int(f.split("_")[3].split(".")[0]) >= self.start_time - and int(f.split("_")[2]) <= self.end_time - ] - left_overs = pd.DataFrame() - sample_size_ms = 1000 - current_index = 0 - - try: - first_frame = pd.read_csv( - os.path.join(self.filepath, filenames[0]), - dtype={ - "price": np.float64, - "is_buyer_maker": np.float64, - "timestamp": np.float64, - "qty": np.float64, - }, - usecols=["price", "is_buyer_maker", "timestamp", "qty"], - ) - first_frame = first_frame[ - (first_frame["timestamp"] >= self.start_time) - & (first_frame["timestamp"] <= self.end_time) - ] - earliest_time = first_frame.timestamp.iloc[0] // sample_size_ms * sample_size_ms - except Exception as e: - print_(["Error in determining earliest time", e]) - earliest_time = self.start_time - - try: - last_frame = pd.read_csv( - os.path.join(self.filepath, filenames[-1]), - dtype={ - "price": np.float64, - "is_buyer_maker": np.float64, - "timestamp": np.float64, - "qty": np.float64, - }, - usecols=["price", "is_buyer_maker", "timestamp", "qty"], - ) - last_frame = last_frame[ - (last_frame["timestamp"] >= self.start_time) - & (last_frame["timestamp"] <= self.end_time) - ] - latest_time = last_frame.timestamp.iloc[-1] // sample_size_ms * sample_size_ms - except Exception as e: - print_(["Error in determining latest time", e]) - latest_time = self.end_time - - array = np.zeros( - (int((latest_time - earliest_time) / sample_size_ms + 1), 3), - dtype=np.float64, - ) - - for f in filenames: - chunk = pd.read_csv( - os.path.join(self.filepath, f), - dtype={ - "price": np.float64, - "is_buyer_maker": np.float64, - "timestamp": np.float64, - "qty": np.float64, - }, - usecols=["price", "is_buyer_maker", "timestamp", "qty"], - ) - - chunk = pd.concat([left_overs, chunk]) - chunk.sort_values("timestamp", inplace=True) - chunk = chunk[ - (chunk["timestamp"] >= self.start_time) & (chunk["timestamp"] <= self.end_time) - ] - - cut_off = ( - chunk.timestamp.iloc[-1] // sample_size_ms * sample_size_ms - 1 - (1 * sample_size_ms) - ) - - left_overs = chunk[chunk["timestamp"] > cut_off] - chunk = chunk[chunk["timestamp"] <= cut_off] - - sampled_ticks = calc_samples(chunk[["timestamp", "qty", "price"]].values) - if current_index != 0 and array[current_index - 1, 0] + 1000 != sampled_ticks[0, 0]: - size = int((sampled_ticks[0, 0] - array[current_index - 1, 0]) / sample_size_ms) - 1 - tmp = np.zeros((size, 3), dtype=np.float64) - tmp[:, 0] = np.arange( - array[current_index - 1, 0] + sample_size_ms, - sampled_ticks[0, 0], - sample_size_ms, - dtype=np.float64, - ) - tmp[:, 2] = array[current_index - 1, 2] - array[current_index : current_index + len(tmp)] = tmp - current_index += len(tmp) - array[current_index : current_index + len(sampled_ticks)] = sampled_ticks - current_index += len(sampled_ticks) - - print( - "\rloaded chunk of data", - f, - ts_to_date(float(f.split("_")[2]) / 1000), - end=" ", - ) - print("\n") - - # Fill in anything left over - if not left_overs.empty: - sampled_ticks = calc_samples(left_overs[["timestamp", "qty", "price"]].values) - if current_index != 0 and array[current_index - 1, 0] + 1000 != sampled_ticks[0, 0]: - size = int((sampled_ticks[0, 0] - array[current_index - 1, 0]) / sample_size_ms) - 1 - tmp = np.zeros((size, 3), dtype=np.float64) - tmp[:, 0] = np.arange( - array[current_index - 1, 0] + sample_size_ms, - sampled_ticks[0, 0], - sample_size_ms, - dtype=np.float64, - ) - tmp[:, 2] = array[current_index - 1, 2] - array[current_index : current_index + len(tmp)] = tmp - current_index += len(tmp) - array[current_index : current_index + len(sampled_ticks)] = sampled_ticks - current_index += len(sampled_ticks) - - # Fill the gap at the end with the latest price - # Should not be necessary anymore - if current_index + 1 < len(array): - size = len(array) - current_index - tmp = np.zeros((size, 3), dtype=np.float64) - tmp[:, 0] = np.arange( - array[current_index - 1, 0] + sample_size_ms, - array[current_index - 1, 0] + ((size + 1) * sample_size_ms), - sample_size_ms, - dtype=np.float64, - ) - tmp[:, 2] = array[current_index - 1, 2] - array[current_index : current_index + len(tmp)] = tmp - current_index += len(tmp) - - print_( - [ - "Saving single file with", - len(array), - " ticks to", - self.tick_filepath, - "...", - ] - ) - np.save(self.tick_filepath, array) - print_(["Saved single file!"]) - - async def get_sampled_ticks(self) -> np.ndarray: - """ - Function for direct use in the backtester. Checks if the numpy arrays exist and if so loads them. - If they do not exist or if their length doesn't match, download the missing data and create them. - @return: numpy array. - """ - if os.path.exists(self.tick_filepath): - print_(["Loading cached tick data from", self.tick_filepath]) - tick_data = np.load(self.tick_filepath) - return tick_data - await self.download_ticks() - await self.prepare_files() - tick_data = np.load(self.tick_filepath) - return tick_data - - -def get_zip(url: str): - col_names = ["timestamp", "open", "high", "low", "close", "volume"] - try: - resp = urlopen(url) - file_tmp = BytesIO() - with tqdm.wrapattr( - open(os.devnull, "wb"), "write", miniters=1, total=getattr(resp, "length", None) - ) as fout: - for chunk in resp: - fout.write(chunk) - file_tmp.write(chunk) - dfs = [] - with zipfile.ZipFile(file_tmp) as my_zip_file: - for contained_file in my_zip_file.namelist(): - df = pd.read_csv(my_zip_file.open(contained_file)) - df.columns = col_names + [str(i) for i in range(len(df.columns) - len(col_names))] - dfs.append(df[col_names]) - return pd.concat(dfs).sort_values("timestamp").reset_index() - except Exception as e: - print(e) - - async def fetch_zips(url): try: async with aiohttp.ClientSession() as session: @@ -1017,15 +85,6 @@ def get_first_ohlcv_ts(symbol: str, spot=False) -> int: return 0 -def findall(string, pattern): - """Yields all the positions of - the pattern in the string""" - i = string.find(pattern) - while i != -1: - yield i - i = string.find(pattern, i + 1) - - def get_days_in_between(start_day, end_day): date_format = "%Y-%m-%d" start_date = datetime.datetime.strptime(start_day, date_format) @@ -1059,8 +118,9 @@ async def download_ohlcvs_bybit_sub( start_date, end_date = get_day(start_date), get_day(end_date) assert date_to_ts2(end_date) >= date_to_ts2(start_date), "end_date is older than start_date" dirpath = make_get_filepath(f"historical_data/ohlcvs_bybit{'_spot' if spot else ''}/{symbol}/") + convert_csv_to_npy(dirpath) ideal_days = get_days_in_between(start_date, end_date) - days_done = [filename[:-4] for filename in os.listdir(dirpath) if ".csv" in filename] + days_done = [filename[:-4] for filename in os.listdir(dirpath) if ".npy" in filename] days_to_get = [day for day in ideal_days if day not in days_done] dfs = {} if len(days_to_get) > 0: @@ -1083,15 +143,15 @@ async def download_ohlcvs_bybit_sub( for day, df in sorted(dfs_.items()): if day in days_done: continue - filepath = f"{dirpath}{day}.csv" - df.to_csv(filepath) + filepath = f"{dirpath}{day}.npy" + dump_ohlcv_data(df, filepath) dumped.append(day) if not download_only: dfs.update(dfs_) if not download_only: for day in ideal_days: - if os.path.exists(f"{dirpath}{day}.csv"): - dfs[day] = pd.read_csv(f"{dirpath}{day}.csv") + if os.path.exists(f"{dirpath}{day}.npy"): + dfs[day] = load_ohlcv_data(f"{dirpath}{day}.npy") if len(dfs) == 0: return pd.DataFrame(columns=["timestamp", "open", "high", "low", "close", "volume"]) df = pd.concat(dfs.values()).sort_values("timestamp").reset_index() @@ -1153,14 +213,16 @@ def convert_to_ohlcv(df, spot, interval=60000): for x in ["open", "high", "low", "close"]: ohlcvs[x] = ohlcvs[x].fillna(closes) ohlcvs["volume"] = ohlcvs["volume"].fillna(0.0) - return ohlcvs + ohlcvs.loc[:, "timestamp"] = ohlcvs.index.values + columns = ["timestamp", "open", "high", "low", "close", "volume"] + return ohlcvs[columns] async def download_single_ohlcvs_binance(url: str, fpath: str): try: print(f"fetching {url}") csv = await get_zip_binance(url) - csv.to_csv(fpath) + dump_ohlcv_data(csv, fpath) except Exception as e: print(f"failed to download {url} {e}") @@ -1175,6 +237,7 @@ async def download_ohlcvs_binance( start_tss=None, ) -> pd.DataFrame: dirpath = make_get_filepath(f"historical_data/ohlcvs_{'spot' if spot else 'futures'}/{symbol}/") + convert_csv_to_npy(dirpath) base_url = "https://data.binance.vision/data/" base_url += "spot/" if spot else f"futures/{'cm' if inverse else 'um'}/" col_names = ["timestamp", "open", "high", "low", "close", "volume"] @@ -1192,7 +255,7 @@ async def download_ohlcvs_binance( months = [m for m in months if m != month_now] # do months async - months_filepaths = {month: os.path.join(dirpath, month + ".csv") for month in months} + months_filepaths = {month: os.path.join(dirpath, month + ".npy") for month in months} missing_months = {k: v for k, v in months_filepaths.items() if not os.path.exists(v)} await asyncio.gather( *[ @@ -1205,11 +268,11 @@ async def download_ohlcvs_binance( months_done = sorted([x for x in os.listdir(dirpath) if x[:-4] in months_filepaths]) # do days async - days_filepaths = {day: os.path.join(dirpath, day + ".csv") for day in days} + days_filepaths = {day: os.path.join(dirpath, day + ".npy") for day in days} missing_days = { k: v for k, v in days_filepaths.items() - if not os.path.exists(v) and k[:7] + ".csv" not in months_done + if not os.path.exists(v) and k[:7] + ".npy" not in months_done } await asyncio.gather( *[ @@ -1224,17 +287,17 @@ async def download_ohlcvs_binance( # delete days contained in months fnames = os.listdir(dirpath) for fname in fnames: - if fname.endswith(".csv") and len(fname) == 14: - if fname[:7] + ".csv" in fnames: + if fname.endswith(".npy") and len(fname) == 14: + if fname[:7] + ".npy" in fnames: print("deleting", os.path.join(dirpath, fname)) os.remove(os.path.join(dirpath, fname)) if not download_only: fnames = os.listdir(dirpath) dfs = [ - pd.read_csv(os.path.join(dirpath, fpath)) + load_ohlcv_data(os.path.join(dirpath, fpath)) for fpath in months_done + days_done - if fpath in fnames + if fpath in fnames and fpath.endswith(".npy") ] try: df = pd.concat(dfs)[col_names].sort_values("timestamp") @@ -1269,7 +332,7 @@ def count_longest_identical_data(hlc, symbol, verbose=True): return longest_consecutive -def attempt_gap_fix_hlcs(df, symbol=None): +def attempt_gap_fix_hlcvs(df, symbol=None): interval = 60 * 1000 max_hours = 12 max_gap = interval * 60 * max_hours @@ -1300,7 +363,7 @@ async def load_hlcvs(symbol, start_date, end_date, base_dir="backtests", exchang df = await download_ohlcvs_binance(symbol, False, start_date, end_date, False) elif exchange == "bybit": df = await download_ohlcvs_bybit(symbol, start_date, end_date) - df = attempt_gap_fix_hlcs(df, symbol=symbol) + df = attempt_gap_fix_hlcvs(df, symbol=symbol) else: raise Exception(f"downloading ohlcvs from exchange {exchange} not supported") if len(df) == 0: @@ -1358,7 +421,7 @@ async def prepare_hlcvs(config: dict): continue assert ( np.diff(data[:, 0]) == interval_ms - ).all(), f"gaps in hlcv data {symbol}" # verify integrous 1m hlcs + ).all(), f"gaps in hlcv data {symbol}" # verify integrous 1m hlcvs hlcvsd[symbol] = data symbols = sorted(hlcvsd.keys()) if len(symbols) > 1: @@ -1367,135 +430,6 @@ async def prepare_hlcvs(config: dict): return symbols, timestamps, unified_data -async def load_hlc_cache( - symbol, - inverse, - start_date, - end_date, - base_dir="backtests", - spot=False, - exchange="binance", - start_tss=None, - minimum_coin_age_days=None, -): - cache_fname = ( - f"{ts_to_date_utc(date_to_ts2(start_date))[:10]}_" - + f"{ts_to_date_utc(date_to_ts2(end_date))[:10]}_ohlcv_cache.npy" - ) - - filepath = make_get_filepath( - os.path.join(base_dir, exchange + ("_spot" if spot else ""), symbol, "caches", cache_fname) - ) - if os.path.exists(filepath): - data = np.load(filepath) - else: - if exchange == "bybit": - df = await download_ohlcvs_bybit(symbol, start_date, end_date, spot, download_only=False) - df = attempt_gap_fix_hlcs(df) - else: - df = await download_ohlcvs_binance( - symbol, inverse, start_date, end_date, spot, start_tss=start_tss - ) - if len(df) == 0: - return pd.DataFrame() - df = df[df.timestamp >= date_to_ts2(start_date)] - df = df[df.timestamp <= date_to_ts2(end_date)] - data = df[["timestamp", "high", "low", "close"]].values - np.save(filepath, data) - try: - count_longest_identical_data(data, symbol) - except Exception as e: - print("error checking integrity", e) - if minimum_coin_age_days: - if start_tss and symbol in start_tss: - first_ts = start_tss[symbol] - else: - first_ts = (await get_first_ohlcv_timestamps(symbols=[symbol]))[symbol] - new_start_ts = max(first_ts + 1000 * 60 * 60 * 24 * minimum_coin_age_days, data[0][0]) - if new_start_ts != data[0][0]: - print( - f"changing start date for {symbol} {ts_to_date_utc(data[0][0])} -> {ts_to_date_utc(new_start_ts)}" - ) - data = data[data[:, 0] >= new_start_ts] - return data - - -async def prepare_multsymbol_data( - symbols, start_date, end_date, base_dir, exchange -) -> (float, np.ndarray): - """ - returns first timestamp and hlc data in the form - [ - [ - [sym0_high0, sym0_low0, sym0_close0], - [sym0_high1, sym0_low1, sym0_close1], - ... - ], - [ - [sym1_high0, sym1_low0, sym1_close0], - [sym1_high1, sym1_low1, sym1_close1], - ... - ], - ... - ] - """ - if end_date in ["today", "now", ""]: - end_date = ts_to_date_utc(utc_ms())[:10] - hlcs = [] - interval = 60000.0 - for symbol in symbols: - data = await load_hlc_cache(symbol, False, start_date, end_date, base_dir, False, exchange) - assert ( - np.diff(data[:, 0]) == interval - ).all(), f"gaps in hlc data {symbol}" # verify integrous 1m hlcs - dft = pd.DataFrame( - data, columns=["timestamp"] + [f"{symbol}_{key}" for key in ["high", "low", "close"]] - ) - hlcs.append(dft) - - tss = np.arange( - min([x.timestamp.iloc[0] for x in hlcs]), - max([x.timestamp.iloc[-1] for x in hlcs]) + interval, - interval, - ) - df = pd.concat([x.set_index("timestamp").reindex(tss) for x in hlcs], axis=1, join="outer") - df = df.fillna(0.0) - return df.index[0], np.array([df.values[:, i : i + 3] for i in range(0, len(symbols) * 3, 3)]) - - -def pad_hlcs(hlcs, timestamps): - start_timestamp = timestamps[0] - interval = 60000 - num_timestamps = len(timestamps) - - # Initialize the padded_hlcs array with NaNs - padded_hlcs = np.full((num_timestamps, 3), np.nan) - - # Calculate the indices for where the hlcs data should be placed in the padded array - hlcs_start_idx = int((hlcs[0, 0] - start_timestamp) // interval) - hlcs_end_idx = int((hlcs[-1, 0] - start_timestamp) // interval) - - # Fill the hlcs data into the padded array - padded_indices = ((hlcs[:, 0] - start_timestamp) // interval).astype(int) - padded_hlcs[padded_indices, :] = hlcs[:, 1:] - - # Frontfill - front_fill_value = hlcs[0, 3] - padded_hlcs[:hlcs_start_idx, :] = front_fill_value - - # Backfill - back_fill_value = hlcs[-1, 3] - padded_hlcs[hlcs_end_idx + 1 :, :] = back_fill_value - - # Forward fill remaining NaNs using numpy's `np.nan_to_num` and `np.fmax.accumulate` - nan_mask = np.isnan(padded_hlcs[:, 0]) - idx = np.where(~nan_mask, np.arange(num_timestamps), 0) - np.maximum.accumulate(idx, out=idx) - padded_hlcs = padded_hlcs[idx] - - return padded_hlcs - - def unify_hlcv_data(hlcv_list) -> (np.ndarray, np.ndarray): # Find the global start and end timestamps @@ -1537,149 +471,49 @@ def unify_hlcv_data(hlcv_list) -> (np.ndarray, np.ndarray): return timestamps, unified_array -async def prepare_hlcs_forager(config: dict): - """ - returns - [timestamp], - [ - [ - [sym0_high0, sym0_low0, sym0_close0], - [sym1_high0, sym1_low0, sym1_close0], - [sym2_high0, sym2_low0, sym2_close0], - ], - [ - [sym0_high1, sym0_low1, sym0_close1], - [sym1_high1, sym1_low1, sym1_close1], - [sym2_high1, sym2_low1, sym2_close1], - ] - ] - """ - symbols = config["backtest"]["symbols"] - start_date = config["backtest"]["start_date"] - end_date = config["backtest"]["end_date"] - base_dir = config["backtest"]["base_dir"] - exchange = config["backtest"]["exchange"] - minimum_coin_age_days = config["live"]["minimum_coin_age_days"] - if end_date in ["today", "now", ""]: - end_date = ts_to_date_utc(utc_ms())[:10] - hlcsd = {} - interval_ms = 60000 - start_tss = None - if exchange == "binance": - start_tss = await get_first_ohlcv_timestamps(cc=ccxt.binanceusdm(), symbols=symbols) - for symbol in symbols: - data = await load_hlc_cache( - symbol, - False, - start_date, - end_date, - base_dir, - False, - exchange, - start_tss=start_tss, - minimum_coin_age_days=minimum_coin_age_days, - ) - if len(data) == 0: - continue - assert ( - np.diff(data[:, 0]) == interval_ms - ).all(), f"gaps in hlc data {symbol}" # verify integrous 1m hlcs - hlcsd[symbol] = data - # hlcsd is {symbol: array([[timestamp, high, low, close]])} - first_timestamp = min([x[0][0] for x in hlcsd.values()]) - last_timestamp = max([x[-1][0] for x in hlcsd.values()]) - timestamps = np.arange(first_timestamp, last_timestamp + interval_ms, interval_ms) - - unified_data = [] - for symbol, data in hlcsd.items(): - padded_hlcs = pad_hlcs(data, timestamps) - unified_data.append(padded_hlcs) - - return timestamps, np.array(unified_data).transpose(1, 0, 2) +def convert_csv_to_npy(filepath): + if not os.path.exists(filepath): + return False + if os.path.isdir(filepath): + for fp in os.listdir(filepath): + convert_csv_to_npy(os.path.join(filepath, fp)) + return False + if filepath.endswith(".csv"): + columns = ["timestamp", "open", "high", "low", "close", "volume"] + npy_filepath = filepath.replace(".csv", ".npy") + csv_data = pd.read_csv(filepath)[columns] + dump_ohlcv_data(csv_data, npy_filepath) + os.remove(filepath) + print(f"successfully converted {filepath} to {npy_filepath}") + return True + + +def dump_ohlcv_data(data, filepath): + npy_filepath = filepath.replace(".csv", ".npy") + columns = ["timestamp", "open", "high", "low", "close", "volume"] + if isinstance(data, pd.DataFrame): + to_dump = data[columns].astype(float).values + elif isinstance(data, np.ndarray): + to_dump = data + else: + raise Exception(f"unknown file type {filepath} dump_ohlcv_data") + np.save(npy_filepath, to_dump) -def format_hlcs_forager(hlcsd: dict): - interval_ms = 60000 - first_timestamp = min([x[0][0] for x in hlcsd.values()]) - last_timestamp = max([x[-1][0] for x in hlcsd.values()]) - timestamps = np.arange(first_timestamp, last_timestamp + interval_ms, interval_ms) - - unified_data = [] - for symbol, data in hlcsd.items(): - padded_hlcs = pad_hlcs(numpyize(data), timestamps) - unified_data.append(padded_hlcs) - - return timestamps, np.array(unified_data).transpose(1, 0, 2) - - -def calc_noisiness(timestamps, hlcs, timeframe="15m"): - """ - Takes 1m hlcs and timestamps as input and calculates noisiness aggregated over a specified timeframe. - - Args: - timestamps (np.array): Array of timestamps. - hlcs (np.array): 3D array of shape (time, symbols, [high, low, close]). - timeframe (str): Aggregation timeframe ("15m", "5m", "1h", "4h", "1d"). - - Returns: - np.array: 2D array with adjusted timestamps and noisiness values for each symbol per new timeframe. - """ - - if timeframe == "15m": - n_mins, tf = 15, 1000 * 60 * 15 - elif timeframe == "5m": - n_mins, tf = 5, 1000 * 60 * 5 - elif timeframe == "1h": - n_mins, tf = 60, 1000 * 60 * 60 - elif timeframe == "4h": - n_mins, tf = 60 * 4, 1000 * 60 * 60 * 4 - elif timeframe == "1d": - n_mins, tf = 60 * 24, 1000 * 60 * 60 * 24 +def load_ohlcv_data(filepath): + npy_filepath = filepath.replace(".csv", ".npy") + columns = ["timestamp", "open", "high", "low", "close", "volume"] + if os.path.exists(npy_filepath): + loaded_data = np.load(npy_filepath, allow_pickle=True) else: - raise Exception(f"unsupported timeframe: {timeframe}") - - # Calculate the first and last timestamp for the new adjusted timeframe - first_ts = timestamps[0] // tf * tf - last_ts = timestamps[-1] // tf * tf - new_timestamps = np.arange(first_ts, last_ts + tf, tf) - - # Number of symbols and the number of new timeframes - num_symbols = hlcs.shape[1] - num_periods = len(new_timestamps) - - # Initialize the noisiness array - noisiness = np.zeros((num_periods - 1, num_symbols)) - - start_idx = timestamps[0] // tf * tf - for i in range(start_idx, len(hlcs) + tf, n_mins): - slice_ = hlcs[max(0, i - n_mins) : i] - high = slice_.max() - low = slice_.min() - - # Process each symbol - for symbol_index in range(num_symbols): - # Aggregate high, low, and close for each timeframe - for i in range(1, num_periods): - # Determine indices in the original array that fall into the current timeframe bucket - mask = (timestamps >= new_timestamps[i - 1]) & (timestamps < new_timestamps[i]) - if np.any(mask): - highs = hlcs[mask, symbol_index, 0] - lows = hlcs[mask, symbol_index, 1] - closes = hlcs[mask, symbol_index, 2] - - # Compute high max, low min, and the last close in the interval - period_high = np.max(highs) - period_low = np.min(lows) - period_close = closes[-1] - - # Calculate noisiness - if period_close == 0.0: - noisiness[i - 1, symbol_index] = 0.0 - else: - noisiness[i - 1, symbol_index] = (period_high - period_low) / period_close - - # Return adjusted timestamps (excluding the last since it doesn't complete the interval) and noisiness values - return new_timestamps[:-1], noisiness + print(f"loading {filepath}") + csv_data = pd.read_csv(filepath)[columns] + print(f"dumping {npy_filepath}") + dump_ohlcv_data(csv_data, npy_filepath) + print(f"removing {filepath}") + os.remove(filepath) + loaded_data = csv_data.values + return pd.DataFrame(loaded_data, columns=columns) async def main(): @@ -1696,20 +530,14 @@ async def main(): args = parser.parse_args() config = prepare_backtest_config(args) - if config["ohlcv"]: - data = await load_hlc_cache( - config["symbol"], - config["inverse"], - config["start_date"], - config["end_date"], - spot=config["spot"], - exchange=config["exchange"], - ) - else: - downloader = Downloader(config) - await downloader.download_ticks() - if not args.download_only: - await downloader.prepare_files() + data = await load_hlc_cache( + config["symbol"], + config["inverse"], + config["start_date"], + config["end_date"], + spot=config["spot"], + exchange=config["exchange"], + ) if __name__ == "__main__": From bba64290b2b39a66435016b28ba653358d2650cb Mon Sep 17 00:00:00 2001 From: Eirik Narjord Date: Wed, 2 Oct 2024 14:21:38 -0400 Subject: [PATCH 32/32] bug fix: init_markets_dict needs param verbose --- src/exchanges/binance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/exchanges/binance.py b/src/exchanges/binance.py index f708bd8e0..b0e2a0718 100644 --- a/src/exchanges/binance.py +++ b/src/exchanges/binance.py @@ -80,9 +80,9 @@ async def print_new_user_suggestion(self): print(front_pad + "#" * (max_len + 2) + back_pad) print("\n\n") - async def init_markets_dict(self): + async def init_markets_dict(self, verbose=True): await self.print_new_user_suggestion() - await super().init_markets_dict() + await super().init_markets_dict(verbose=verbose) def set_market_specific_settings(self): super().set_market_specific_settings()