From 9f2cce1d240458a16b96d3fb1dd72e2870a9e62d Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sun, 22 Sep 2024 14:34:10 -0400
Subject: [PATCH 01/32] add new parameters filter_rolling_window,
 filter_relative_volume_clip_pct

---
 configs/template.json | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/configs/template.json b/configs/template.json
index c9d30da2a..88062f907 100644
--- a/configs/template.json
+++ b/configs/template.json
@@ -20,6 +20,8 @@
                   "entry_trailing_grid_ratio": -0.28,
                   "entry_trailing_retracement_pct": 0.0024735,
                   "entry_trailing_threshold_pct": -0.062799,
+                  "filter_rolling_window": 60,
+                  "filter_relative_volume_clip_pct": 0.95,
                   "n_positions": 10.776,
                   "total_wallet_exposure_limit": 0.97499,
                   "unstuck_close_pct": 0.049666,
@@ -43,6 +45,8 @@
                    "entry_trailing_grid_ratio": -0.3633,
                    "entry_trailing_retracement_pct": 0.06044,
                    "entry_trailing_threshold_pct": -0.084207,
+                   "filter_rolling_window": 60,
+                   "filter_relative_volume_clip_pct": 0.95,
                    "n_positions": 7.6679,
                    "total_wallet_exposure_limit": 0.0,
                    "unstuck_close_pct": 0.052781,
@@ -61,10 +65,8 @@
           "max_n_cancellations_per_batch": 5,
           "max_n_creations_per_batch": 3,
           "minimum_coin_age_days": 30.0,
-          "ohlcv_rolling_window": 60,
           "pnls_max_lookback_days": 30.0,
           "price_distance_threshold": 0.002,
-          "relative_volume_filter_clip_pct": 0.5,
           "time_in_force": "good_till_cancelled",
           "user": "bybit_01"},
  "optimize": {"bounds": {"long_close_grid_markup_range": [0.0, 0.03],
@@ -84,6 +86,8 @@
                          "long_entry_trailing_grid_ratio": [-1.0, 1.0],
                          "long_entry_trailing_retracement_pct": [0.0, 0.1],
                          "long_entry_trailing_threshold_pct": [-0.1, 0.1],
+                         "long_filter_rolling_window": [10.0, 1440.0],
+                         "long_filter_relative_volume_clip_pct": [0.0, 1.0],
                          "long_n_positions": [1.0, 20.0],
                          "long_total_wallet_exposure_limit": [0.0, 5.0],
                          "long_unstuck_close_pct": [0.001, 0.1],
@@ -107,6 +111,8 @@
                          "short_entry_trailing_grid_ratio": [-1.0, 1.0],
                          "short_entry_trailing_retracement_pct": [0.0, 0.1],
                          "short_entry_trailing_threshold_pct": [-0.1, 0.1],
+                         "short_filter_rolling_window": [10.0, 1440.0],
+                         "short_filter_relative_volume_clip_pct": [0.0, 1.0],
                          "short_n_positions": [1.0, 20.0],
                          "short_total_wallet_exposure_limit": [0.0, 5.0],
                          "short_unstuck_close_pct": [0.001, 0.1],

From 13fae9934449acb8786b21a5b05d067ed8336e54 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sun, 22 Sep 2024 14:35:28 -0400
Subject: [PATCH 02/32] update for new filter params

---
 notebooks/notes_backtest.ipynb | 38 +++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/notebooks/notes_backtest.ipynb b/notebooks/notes_backtest.ipynb
index 57f7b4056..8163a7697 100644
--- a/notebooks/notes_backtest.ipynb
+++ b/notebooks/notes_backtest.ipynb
@@ -28,13 +28,26 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "97f846a3-874c-48f4-93d8-829edc0b4bce",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
     "config = load_config('configs/template.json')\n",
     "{k: config[k] for k in ['backtest', 'bot', 'live']}"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "60155aa0-a1d3-4763-b314-5e015643b527",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#config['backtest']['symbols'] = config['backtest']['symbols'][::10]\n",
+    "config['backtest']['start_date'] = '2024-03-01'\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -43,18 +56,19 @@
    "outputs": [],
    "source": [
     "symbols = config['backtest']['symbols']\n",
-    "symbols, hlcvs, mss, results_path = await prepare_hlcvs_mss(config)\n",
-    "hlcs = hlcvs[:,:,:3]"
+    "symbols, hlcvs, mss, results_path = await prepare_hlcvs_mss(config)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "896e5b9f-e71f-4abb-b33d-085065fbd3eb",
+   "id": "2f473002-2ba5-41e3-9f06-ff56aaea2318",
    "metadata": {},
    "outputs": [],
    "source": [
-    "preferred_coins = calc_preferred_coins(hlcvs, config)"
+    "#config['bot']['long']['n_positions'] = 3\n",
+    "#config['bot']['long']['filter_rolling_window'] = 1440\n",
+    "#config['bot']['long']['filter_relative_volume_clip_pct'] = 0.5"
    ]
   },
   {
@@ -64,13 +78,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fills, equities, analysis = run_backtest(hlcs, preferred_coins, mss, config)"
+    "fills, equities, analysis = run_backtest(hlcvs, mss, config)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b3ce4490-63ad-4e1f-8477-d188cc058fa4",
+   "id": "b0a86c90-e834-4016-abfa-8158e2d227cb",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -79,7 +93,7 @@
     "print(f'elapsed {utc_ms() - sts}')\n",
     "sts = utc_ms()\n",
     "equities = pd.Series(equities)\n",
-    "analysis_py, balance_and_equity = analyze_fills_forager(config['backtest']['symbols'], hlcs, fdf, equities)\n",
+    "analysis_py, balance_and_equity = analyze_fills_forager(config['backtest']['symbols'], hlcvs, fdf, equities)\n",
     "for k in analysis_py:\n",
     "    if k not in analysis:\n",
     "        analysis[k] = analysis_py[k]\n",
@@ -88,6 +102,14 @@
     "balance_and_equity.plot()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e4be85e1-6a27-4864-8e5f-cd78d23abf52",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": null,

From 060e29480ddbca93284bed847012a2ebe533cc67 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sun, 22 Sep 2024 14:35:51 -0400
Subject: [PATCH 03/32] compute preferred coins on the fly

---
 passivbot-rust/src/backtest.rs  | 264 ++++++++++++++++++++------------
 passivbot-rust/src/constants.rs |   1 +
 passivbot-rust/src/lib.rs       |   2 -
 passivbot-rust/src/python.rs    |  54 ++-----
 passivbot-rust/src/types.rs     |   2 +
 5 files changed, 180 insertions(+), 143 deletions(-)

diff --git a/passivbot-rust/src/backtest.rs b/passivbot-rust/src/backtest.rs
index 44f46108e..e14c0c55c 100644
--- a/passivbot-rust/src/backtest.rs
+++ b/passivbot-rust/src/backtest.rs
@@ -1,7 +1,7 @@
 use crate::closes::{
     calc_closes_long, calc_closes_short, calc_next_close_long, calc_next_close_short,
 };
-use crate::constants::{CLOSE, HIGH, LONG, LOW, NO_POS, SHORT};
+use crate::constants::{CLOSE, HIGH, LONG, LOW, NO_POS, SHORT, VOLUME};
 use crate::entries::{
     calc_entries_long, calc_entries_short, calc_min_entry_qty, calc_next_entry_long,
     calc_next_entry_short,
@@ -15,8 +15,7 @@ use crate::utils::{
     calc_pprice_diff_int, calc_wallet_exposure, cost_to_qty, qty_to_cost, round_, round_dn,
     round_up,
 };
-use ndarray::s;
-use ndarray::{Array1, Array2, Array3, Array4};
+use ndarray::{s, Array1, Array2, Array3, Array4, Axis};
 use std::cmp::Ordering;
 use std::collections::{HashMap, HashSet};
 
@@ -122,9 +121,14 @@ pub struct TradingEnabled {
     short: bool,
 }
 
+pub struct PreferredCoins {
+    long: Vec<usize>,
+    short: Vec<usize>,
+}
+
 pub struct Backtest {
-    hlcs: Array3<f64>,            // 3D array: (n_timesteps, n_markets, 3)
-    preferred_coins: Array2<i32>, // 2D array: (n_timesteps, n_markets)
+    hlcvs: Array3<f64>, // 3D array: (n_timesteps, n_markets, 4)
+    preferred_coins: PreferredCoins,
     bot_params_pair: BotParamsPair,
     exchange_params_list: Vec<ExchangeParams>,
     backtest_params: BacktestParams,
@@ -147,33 +151,46 @@ pub struct Backtest {
     delist_timestamps: HashMap<usize, usize>,
     did_fill_long: HashSet<usize>,
     did_fill_short: HashSet<usize>,
+    rolling_volumes: Vec<Vec<f64>>,
 }
 
 impl Backtest {
     pub fn new(
-        hlcs: Array3<f64>,
-        preferred_coins: Array2<i32>,
+        hlcvs: Array3<f64>,
         bot_params_pair: BotParamsPair,
         exchange_params_list: Vec<ExchangeParams>,
         backtest_params: &BacktestParams,
     ) -> Self {
-        let n_markets = hlcs.shape()[1];
+        let n_timesteps = hlcvs.shape()[0];
+        let n_markets = hlcvs.shape()[1];
+        let max_window = bot_params_pair
+            .long
+            .filter_rolling_window
+            .max(bot_params_pair.short.filter_rolling_window);
+
+        // Initialize rolling_volumes with zeros
+        let rolling_volumes = vec![vec![0.0; n_markets]; n_timesteps];
+
         let initial_emas = (0..n_markets)
             .map(|i| {
-                let close_price = hlcs[[0, i, CLOSE]];
+                let close_price = hlcvs[[0, i, CLOSE]];
                 EMAs {
                     long: [close_price; 3],
                     short: [close_price; 3],
                 }
             })
             .collect();
+        let preferred_coins = PreferredCoins {
+            long: Vec::<usize>::new(),
+            short: Vec::<usize>::new(),
+        };
         let mut equities = Vec::<f64>::new();
         equities.push(backtest_params.starting_balance);
         let mut bot_params_pair_cloned = bot_params_pair.clone();
         bot_params_pair_cloned.long.n_positions = n_markets.min(bot_params_pair.long.n_positions);
         bot_params_pair_cloned.short.n_positions = n_markets.min(bot_params_pair.short.n_positions);
-        Backtest {
-            hlcs,
+        let mut backtest = Backtest {
+            hlcvs,
             preferred_coins,
             bot_params_pair: bot_params_pair_cloned,
             exchange_params_list,
@@ -207,12 +224,109 @@ impl Backtest {
             delist_timestamps: HashMap::new(),
             did_fill_long: HashSet::new(),
             did_fill_short: HashSet::new(),
+            rolling_volumes,
+        };
+        backtest.initialize_rolling_volumes(max_window);
+        backtest
+    }
+
+    fn initialize_rolling_volumes(&mut self, max_window: usize) {
+        let n_markets = self.hlcvs.shape()[1];
+        let n_timesteps = self.hlcvs.shape()[0];
+
+        for k in 0..n_timesteps {
+            let start = k.saturating_sub(max_window - 1);
+            for i in 0..n_markets {
+                // Update rolling volume
+                self.rolling_volumes[k][i] = self.hlcvs.slice(s![start..=k, i, VOLUME]).sum();
+            }
         }
     }
 
+    fn update_rolling_volumes(&mut self, k: usize) {
+        let n_markets = self.hlcvs.shape()[1];
+        let max_window = self
+            .bot_params_pair
+            .long
+            .filter_rolling_window
+            .max(self.bot_params_pair.short.filter_rolling_window);
+
+        if k >= max_window {
+            let old_k = k - max_window;
+            for i in 0..n_markets {
+                self.rolling_volumes[k][i] = self.rolling_volumes[k - 1][i]
+                    + self.hlcvs[[k, i, VOLUME]]
+                    - self.hlcvs[[old_k, i, VOLUME]];
+            }
+        } else {
+            // For the first max_window steps, we need to recalculate the full sum
+            let start = 0;
+            for i in 0..n_markets {
+                self.rolling_volumes[k][i] = self.hlcvs.slice(s![start..=k, i, VOLUME]).sum();
+            }
+        }
+    }
+
+    fn calc_noisiness(&self, k: usize, idx: usize, window: usize) -> f64 {
+        let start = k.saturating_sub(window - 1);
+        let slice = self.hlcvs.slice(s![start..=k, idx, ..]);
+        let nrr_sum: f64 = slice
+            .axis_iter(Axis(0))
+            .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE])
+            .sum();
+        nrr_sum / (k - start + 1) as f64
+    }
+
+    fn calc_preferred_coins(&self, k: usize, pside: usize) -> Vec<usize> {
+        let bot_params = match pside {
+            LONG => &self.bot_params_pair.long,
+            SHORT => &self.bot_params_pair.short,
+            _ => panic!("Invalid pside"),
+        };
+
+        let n_coins = self.hlcvs.shape()[1];
+
+        // Use pre-computed rolling volumes
+        let mut volume_sums: Vec<(usize, f64)> = self.rolling_volumes[k]
+            .iter()
+            .enumerate()
+            .map(|(idx, &sum)| (idx, sum))
+            .collect();
+
+        // Sort by volume in descending order
+        volume_sums.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
+
+        // Filter by volume
+        let n_eligible = bot_params.n_positions.max(
+            (n_coins as f64 * (1.0 - bot_params.filter_relative_volume_clip_pct)).round() as usize,
+        );
+        let filtered_indices: Vec<usize> = volume_sums
+            .iter()
+            .take(n_eligible)
+            .map(|&(idx, _)| idx)
+            .collect();
+
+        // Calculate noisiness on-the-fly for filtered coins
+        let mut noisiness: Vec<(usize, f64)> = filtered_indices
+            .into_iter()
+            .map(|idx| {
+                (
+                    idx,
+                    self.calc_noisiness(k, idx, bot_params.filter_rolling_window),
+                )
+            })
+            .collect();
+
+        // Sort by noisiness in descending order
+        noisiness.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
+
+        // Return indices sorted by noisiness
+        noisiness.into_iter().map(|(idx, _)| idx).collect()
+    }
+
     pub fn run(&mut self) -> (Vec<Fill>, Vec<f64>) {
         let check_points: Vec<usize> = (0..7).map(|i| i * 60 * 24).collect();
-        let n_timesteps = self.hlcs.shape()[0];
+        let n_timesteps = self.hlcvs.shape()[0];
 
         for idx in 0..self.n_markets {
             self.trailing_prices
@@ -224,18 +338,18 @@ impl Backtest {
 
             // check if the coin was delisted at any point
             if n_timesteps > *check_points.last().unwrap() {
-                let last_hlc_close = self.hlcs[[n_timesteps - 1, idx, CLOSE]];
+                let last_hlc_close = self.hlcvs[[n_timesteps - 1, idx, CLOSE]];
                 if check_points.iter().all(|&point| {
-                    self.hlcs[[n_timesteps - 1 - point, idx, HIGH]] == last_hlc_close
-                        && self.hlcs[[n_timesteps - 1 - point, idx, LOW]] == last_hlc_close
-                        && self.hlcs[[n_timesteps - 1 - point, idx, CLOSE]] == last_hlc_close
+                    self.hlcvs[[n_timesteps - 1 - point, idx, HIGH]] == last_hlc_close
+                        && self.hlcvs[[n_timesteps - 1 - point, idx, LOW]] == last_hlc_close
+                        && self.hlcvs[[n_timesteps - 1 - point, idx, CLOSE]] == last_hlc_close
                 }) {
                     // was delisted. Find timestamp of delisting
                     let mut i = n_timesteps - check_points.last().unwrap();
                     while i > 0
-                        && self.hlcs[[i, idx, HIGH]] == last_hlc_close
-                        && self.hlcs[[i, idx, LOW]] == last_hlc_close
-                        && self.hlcs[[i, idx, CLOSE]] == last_hlc_close
+                        && self.hlcvs[[i, idx, HIGH]] == last_hlc_close
+                        && self.hlcvs[[i, idx, LOW]] == last_hlc_close
+                        && self.hlcvs[[i, idx, CLOSE]] == last_hlc_close
                     {
                         i -= 1;
                     }
@@ -246,6 +360,7 @@ impl Backtest {
             }
         }
         for k in 1..(n_timesteps - 1) {
+            self.update_rolling_volumes(k);
             self.check_for_fills(k);
             self.update_emas(k);
             self.update_open_orders(k);
@@ -255,7 +370,7 @@ impl Backtest {
     }
 
     fn create_state_params(&self, k: usize, idx: usize, pside: usize) -> StateParams {
-        let close_price = self.hlcs[[k, idx, CLOSE]];
+        let close_price = self.hlcvs[[k, idx, CLOSE]];
         StateParams {
             balance: self.balance,
             order_book: OrderBook {
@@ -278,7 +393,7 @@ impl Backtest {
         let mut equity = self.balance;
         // Calculate unrealized PnL for long positions
         for (&idx, position) in &self.positions.long {
-            let current_price = self.hlcs[[k, idx, CLOSE]];
+            let current_price = self.hlcvs[[k, idx, CLOSE]];
             let upnl = calc_pnl_long(
                 position.price,
                 current_price,
@@ -289,7 +404,7 @@ impl Backtest {
         }
         // Calculate unrealized PnL for short positions
         for (&idx, position) in &self.positions.short {
-            let current_price = self.hlcs[[k, idx, CLOSE]];
+            let current_price = self.hlcvs[[k, idx, CLOSE]];
             let upnl = calc_pnl_short(
                 position.price,
                 current_price,
@@ -302,6 +417,9 @@ impl Backtest {
     }
 
     fn update_actives(&mut self, k: usize, pside: usize) -> Vec<usize> {
+        // Calculate preferred coins first
+        let preferred_coins = self.calc_preferred_coins(k, pside);
+
         let (actives, positions, n_positions) = match pside {
             LONG => (
                 &mut self.actives.long,
@@ -315,23 +433,26 @@ impl Backtest {
             ),
             _ => panic!("Invalid pside"),
         };
+
         let mut actives_without_pos = Vec::with_capacity(n_positions);
         actives.clear();
+
+        // First, add all markets with existing positions
         for &market_idx in positions.keys() {
             actives.insert(market_idx);
         }
-        // Add additional markets based on preferred_coins
-        for &market_idx in self.preferred_coins.row(k).iter() {
-            let market_idx = market_idx as usize;
+
+        // Then, add additional markets based on preferred_coins
+        for &market_idx in &preferred_coins {
             if actives.len() < n_positions {
                 if actives.insert(market_idx) {
-                    // Only add to actives_without_pos if it's a new insertion
                     actives_without_pos.push(market_idx);
                 }
             } else {
                 break;
             }
         }
+
         actives_without_pos
     }
 
@@ -705,21 +826,21 @@ impl Backtest {
         } else {
             self.trailing_prices.short.entry(idx).or_default()
         };
-        if self.hlcs[[k, idx, LOW]] < trailing_price_bundle.min_since_open {
-            trailing_price_bundle.min_since_open = self.hlcs[[k, idx, LOW]];
-            trailing_price_bundle.max_since_min = self.hlcs[[k, idx, CLOSE]];
+        if self.hlcvs[[k, idx, LOW]] < trailing_price_bundle.min_since_open {
+            trailing_price_bundle.min_since_open = self.hlcvs[[k, idx, LOW]];
+            trailing_price_bundle.max_since_min = self.hlcvs[[k, idx, CLOSE]];
         } else {
             trailing_price_bundle.max_since_min = trailing_price_bundle
                 .max_since_min
-                .max(self.hlcs[[k, idx, HIGH]]);
+                .max(self.hlcvs[[k, idx, HIGH]]);
         }
-        if self.hlcs[[k, idx, HIGH]] > trailing_price_bundle.max_since_open {
-            trailing_price_bundle.max_since_open = self.hlcs[[k, idx, HIGH]];
-            trailing_price_bundle.min_since_max = self.hlcs[[k, idx, CLOSE]];
+        if self.hlcvs[[k, idx, HIGH]] > trailing_price_bundle.max_since_open {
+            trailing_price_bundle.max_since_open = self.hlcvs[[k, idx, HIGH]];
+            trailing_price_bundle.min_since_max = self.hlcvs[[k, idx, CLOSE]];
         } else {
             trailing_price_bundle.min_since_max = trailing_price_bundle
                 .min_since_max
-                .min(self.hlcs[[k, idx, LOW]]);
+                .min(self.hlcvs[[k, idx, LOW]]);
         }
     }
 
@@ -767,7 +888,7 @@ impl Backtest {
                     qty: -self.positions.long[&idx].size,
                     price: round_(
                         f64::min(
-                            self.hlcs[[k, idx, HIGH]] - self.exchange_params_list[idx].price_step,
+                            self.hlcvs[[k, idx, HIGH]] - self.exchange_params_list[idx].price_step,
                             self.positions.long[&idx].price,
                         ),
                         self.exchange_params_list[idx].price_step,
@@ -839,7 +960,7 @@ impl Backtest {
                     qty: self.positions.short[&idx].size.abs(),
                     price: round_(
                         f64::max(
-                            self.hlcs[[k, idx, LOW]] + self.exchange_params_list[idx].price_step,
+                            self.hlcvs[[k, idx, LOW]] + self.exchange_params_list[idx].price_step,
                             self.positions.short[&idx].price,
                         ),
                         self.exchange_params_list[idx].price_step,
@@ -900,9 +1021,9 @@ impl Backtest {
     fn order_filled(&self, k: usize, idx: usize, order: &Order) -> bool {
         // check if will fill in next candle
         if order.qty > 0.0 {
-            self.hlcs[[k, idx, LOW]] < order.price
+            self.hlcvs[[k, idx, LOW]] < order.price
         } else if order.qty < 0.0 {
-            self.hlcs[[k, idx, HIGH]] > order.price
+            self.hlcvs[[k, idx, HIGH]] > order.price
         } else {
             false
         }
@@ -933,7 +1054,7 @@ impl Backtest {
                         > self.bot_params_pair.long.unstuck_threshold
                     {
                         let pprice_diff =
-                            calc_pprice_diff_int(LONG, position.price, self.hlcs[[k, idx, CLOSE]]);
+                            calc_pprice_diff_int(LONG, position.price, self.hlcvs[[k, idx, CLOSE]]);
                         stuck_positions.push((idx, LONG, pprice_diff));
                     }
                 }
@@ -960,8 +1081,11 @@ impl Backtest {
                     if wallet_exposure / self.bot_params_pair.short.wallet_exposure_limit
                         > self.bot_params_pair.short.unstuck_threshold
                     {
-                        let pprice_diff =
-                            calc_pprice_diff_int(SHORT, position.price, self.hlcs[[k, idx, CLOSE]]);
+                        let pprice_diff = calc_pprice_diff_int(
+                            SHORT,
+                            position.price,
+                            self.hlcvs[[k, idx, CLOSE]],
+                        );
                         stuck_positions.push((idx, SHORT, pprice_diff));
                     }
                 }
@@ -976,7 +1100,7 @@ impl Backtest {
             match pside {
                 LONG => {
                     let close_price = f64::max(
-                        self.hlcs[[k, idx, CLOSE]],
+                        self.hlcvs[[k, idx, CLOSE]],
                         round_up(
                             self.emas[idx].compute_bands(LONG).upper
                                 * (1.0 + self.bot_params_pair.long.unstuck_ema_dist),
@@ -1018,7 +1142,7 @@ impl Backtest {
                 }
                 SHORT => {
                     let close_price = f64::min(
-                        self.hlcs[[k, idx, CLOSE]],
+                        self.hlcvs[[k, idx, CLOSE]],
                         round_dn(
                             self.emas[idx].compute_bands(SHORT).lower
                                 * (1.0 - self.bot_params_pair.short.unstuck_ema_dist),
@@ -1240,7 +1364,7 @@ impl Backtest {
     #[inline]
     fn update_emas(&mut self, k: usize) {
         for i in 0..self.n_markets {
-            let close_price = self.hlcs[[k, i, CLOSE]];
+            let close_price = self.hlcvs[[k, i, CLOSE]];
 
             let long_alphas = &self.ema_alphas.long.alphas;
             let long_alphas_inv = &self.ema_alphas.long.alphas_inv;
@@ -1410,55 +1534,3 @@ fn calc_drawdowns(equity_series: &[f64]) -> Vec<f64> {
         .map(|(&ret, &max)| (ret - max) / max)
         .collect()
 }
-
-pub fn calc_noisiness(hlcs: &Array3<f64>, window: usize) -> Array2<f64> {
-    let (n_minutes, n_coins, _) = hlcs.dim();
-
-    // Calculate Normalized Relative Range (NRR)
-    let nrrs =
-        (&hlcs.slice(s![.., .., 0]) - &hlcs.slice(s![.., .., 1])) / &hlcs.slice(s![.., .., 2]);
-
-    let mut noisiness = Array2::<f64>::zeros((n_minutes, n_coins));
-    let mut sums = vec![0.0; n_coins];
-
-    for i in 1..n_minutes {
-        let idx_start = i.saturating_sub(window);
-
-        for j in 0..n_coins {
-            sums[j] += nrrs[[i - 1, j]];
-
-            if idx_start > 0 {
-                sums[j] -= nrrs[[idx_start - 1, j]];
-                noisiness[[i, j]] = sums[j] / window as f64;
-            } else {
-                noisiness[[i, j]] = sums[j] / i as f64;
-            }
-        }
-    }
-    noisiness
-}
-
-pub fn calc_volumes(hlcvs: &Array3<f64>, window: usize) -> Array2<f64> {
-    let (n_minutes, n_coins, _) = hlcvs.dim();
-
-    // Calculate volume in quote currency (close * volume)
-    let quote_volumes = &hlcvs.slice(s![.., .., 2]) * &hlcvs.slice(s![.., .., 3]);
-
-    let mut rolling_volumes = Array2::<f64>::zeros((n_minutes, n_coins));
-    let mut sums = vec![0.0; n_coins];
-
-    for i in 0..n_minutes {
-        let idx_start = i.saturating_sub(window);
-        for j in 0..n_coins {
-            sums[j] += quote_volumes[[i, j]];
-            if i >= window {
-                sums[j] -= quote_volumes[[idx_start, j]];
-                rolling_volumes[[i, j]] = sums[j];
-            } else {
-                rolling_volumes[[i, j]] = sums[j];
-            }
-        }
-    }
-
-    rolling_volumes
-}
diff --git a/passivbot-rust/src/constants.rs b/passivbot-rust/src/constants.rs
index fc495db0a..b64b12186 100644
--- a/passivbot-rust/src/constants.rs
+++ b/passivbot-rust/src/constants.rs
@@ -1,6 +1,7 @@
 pub const HIGH: usize = 0;
 pub const LOW: usize = 1;
 pub const CLOSE: usize = 2;
+pub const VOLUME: usize = 3;
 
 pub const LONG: usize = 3;
 pub const SHORT: usize = 4;
diff --git a/passivbot-rust/src/lib.rs b/passivbot-rust/src/lib.rs
index ca533bce0..04ec1fe0d 100644
--- a/passivbot-rust/src/lib.rs
+++ b/passivbot-rust/src/lib.rs
@@ -41,7 +41,5 @@ fn passivbot_rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(calc_closes_long_py, m)?)?;
     m.add_function(wrap_pyfunction!(calc_closes_short_py, m)?)?;
     m.add_function(wrap_pyfunction!(run_backtest, m)?)?;
-    m.add_function(wrap_pyfunction!(calc_noisiness_py, m)?)?;
-    m.add_function(wrap_pyfunction!(calc_volumes_py, m)?)?;
     Ok(())
 }
diff --git a/passivbot-rust/src/python.rs b/passivbot-rust/src/python.rs
index 1b3db2871..dc4983e7f 100644
--- a/passivbot-rust/src/python.rs
+++ b/passivbot-rust/src/python.rs
@@ -1,4 +1,4 @@
-use crate::backtest::{analyze_backtest, calc_noisiness, calc_volumes, Backtest};
+use crate::backtest::{analyze_backtest, Backtest};
 use crate::closes::{
     calc_closes_long, calc_closes_short, calc_grid_close_long, calc_next_close_long,
     calc_next_close_short, calc_trailing_close_long,
@@ -21,54 +21,14 @@ use pyo3::prelude::*;
 use pyo3::types::{PyDict, PyList};
 use pyo3::wrap_pyfunction;
 
-#[pyfunction]
-pub fn calc_volumes_py(hlcvs: PyReadonlyArray3<f64>, window: usize) -> PyResult<Py<PyArray2<f64>>> {
-    // Convert PyReadonlyArray3 to owned Array3
-    let hlcvs_rust: Array3<f64> = hlcvs.as_array().to_owned();
-
-    // Call the existing calc_volumes function
-    let volumes = calc_volumes(&hlcvs_rust, window);
-
-    // Convert the result back to a PyArray
-    Python::with_gil(|py| Ok(volumes.into_pyarray(py).to_owned()))
-}
-
-#[pyfunction]
-pub fn calc_noisiness_py(
-    hlcs: PyReadonlyArray3<f64>,
-    window: usize,
-) -> PyResult<Py<PyArray2<f64>>> {
-    // Convert PyReadonlyArray3 to owned Array3
-    let hlcs_rust: Array3<f64> = hlcs.as_array().to_owned();
-
-    // Call the existing calc_noisiness function
-    let noisiness = calc_noisiness(&hlcs_rust, window);
-
-    // Convert the result back to a PyArray
-    Python::with_gil(|py| Ok(noisiness.into_pyarray(py).to_owned()))
-}
-
 #[pyfunction]
 pub fn run_backtest(
-    hlcs: PyReadonlyArray3<f64>,
-    preferred_coins: &PyAny,
+    hlcvs: PyReadonlyArray3<f64>,
     bot_params_pair_dict: &PyDict,
     exchange_params_list: &PyAny,
     backtest_params_dict: &PyDict,
 ) -> PyResult<(Py<PyArray2<PyObject>>, Py<PyArray1<f64>>, Py<PyDict>)> {
-    let hlcs_rust = hlcs.as_array();
-
-    let preferred_coins_rust: Array2<i32> =
-        if let Ok(arr) = preferred_coins.downcast::<PyArray2<i32>>() {
-            unsafe { arr.as_array().to_owned() }
-        } else if let Ok(arr) = preferred_coins.downcast::<PyArray2<i64>>() {
-            let preferred_coins_i64: ArrayBase<_, _> = unsafe { arr.as_array() };
-            preferred_coins_i64.mapv(|x| x as i32)
-        } else {
-            return Err(PyValueError::new_err(
-                "Unsupported data type for preferred_coins",
-            ));
-        };
+    let hlcvs_rust = hlcvs.as_array();
 
     let bot_params_pair = bot_params_pair_from_dict(bot_params_pair_dict)?;
     let exchange_params = {
@@ -95,8 +55,7 @@ pub fn run_backtest(
     let backtest_params = backtest_params_from_dict(backtest_params_dict)?;
 
     let mut backtest = Backtest::new(
-        hlcs_rust.to_owned(),
-        preferred_coins_rust,
+        hlcvs_rust.to_owned(),
         bot_params_pair,
         exchange_params,
         &backtest_params,
@@ -186,6 +145,11 @@ fn bot_params_from_dict(dict: &PyDict) -> PyResult<BotParams> {
         entry_trailing_retracement_pct: extract_value(dict, "entry_trailing_retracement_pct")?,
         entry_trailing_grid_ratio: extract_value(dict, "entry_trailing_grid_ratio")?,
         entry_trailing_threshold_pct: extract_value(dict, "entry_trailing_threshold_pct")?,
+        filter_rolling_window: {
+            let filter_rolling_window_float: f64 = extract_value(dict, "filter_rolling_window")?;
+            filter_rolling_window_float.round() as usize
+        },
+        filter_relative_volume_clip_pct: extract_value(dict, "filter_relative_volume_clip_pct")?,
         ema_span_0: extract_value(dict, "ema_span_0")?,
         ema_span_1: extract_value(dict, "ema_span_1")?,
         n_positions: {
diff --git a/passivbot-rust/src/types.rs b/passivbot-rust/src/types.rs
index c6cd13245..98714ba4b 100644
--- a/passivbot-rust/src/types.rs
+++ b/passivbot-rust/src/types.rs
@@ -110,6 +110,8 @@ pub struct BotParams {
     pub entry_trailing_retracement_pct: f64,
     pub entry_trailing_grid_ratio: f64,
     pub entry_trailing_threshold_pct: f64,
+    pub filter_rolling_window: usize,
+    pub filter_relative_volume_clip_pct: f64,
     pub ema_span_0: f64,
     pub ema_span_1: f64,
     pub n_positions: usize,

From 66379438900cf1c7fd1f99abc434230324099b8e Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sun, 22 Sep 2024 14:36:13 -0400
Subject: [PATCH 04/32] preferred coins are computed inside rust backtester

---
 src/backtest.py | 72 ++++++++-----------------------------------------
 1 file changed, 11 insertions(+), 61 deletions(-)

diff --git a/src/backtest.py b/src/backtest.py
index cc6e38bfe..dcfdb5d7e 100644
--- a/src/backtest.py
+++ b/src/backtest.py
@@ -54,7 +54,7 @@ def process_forager_fills(fills):
     return fdf
 
 
-def analyze_fills_forager(symbols, hlcs, fdf, equities):
+def analyze_fills_forager(symbols, hlcvs, fdf, equities):
     analysis = {}
     pnls = {}
     for pside in ["long", "short"]:
@@ -163,22 +163,20 @@ def prep_backtest_args(config, mss, exchange_params=None, backtest_params=None):
     return bot_params, exchange_params, backtest_params
 
 
-def run_backtest(hlcs, preferred_coins, mss, config: dict):
+def run_backtest(hlcvs, mss, config: dict):
     bot_params, exchange_params, backtest_params = prep_backtest_args(config, mss)
     print(f"Starting backtest...")
     sts = utc_ms()
-    fills, equities, analysis = pbr.run_backtest(
-        hlcs, preferred_coins, bot_params, exchange_params, backtest_params
-    )
+    fills, equities, analysis = pbr.run_backtest(hlcvs, bot_params, exchange_params, backtest_params)
     print(f"seconds elapsed for backtest: {(utc_ms() - sts) / 1000:.4f}")
     return fills, equities, analysis
 
 
-def post_process(config, hlcs, fills, equities, analysis, results_path):
+def post_process(config, hlcvs, fills, equities, analysis, results_path):
     sts = utc_ms()
     fdf = process_forager_fills(fills)
     equities = pd.Series(equities)
-    analysis_py, bal_eq = analyze_fills_forager(config["backtest"]["symbols"], hlcs, fdf, equities)
+    analysis_py, bal_eq = analyze_fills_forager(config["backtest"]["symbols"], hlcvs, fdf, equities)
     for k in analysis_py:
         if k not in analysis:
             analysis[k] = analysis_py[k]
@@ -191,10 +189,10 @@ def post_process(config, hlcs, fills, equities, analysis, results_path):
     config["analysis"] = analysis
     dump_config(config, f"{results_path}config.json")
     fdf.to_csv(f"{results_path}fills.csv")
-    plot_forager(results_path, config["backtest"]["symbols"], fdf, bal_eq, hlcs)
+    plot_forager(results_path, config["backtest"]["symbols"], fdf, bal_eq, hlcvs)
 
 
-def plot_forager(results_path, symbols: [str], fdf: pd.DataFrame, bal_eq, hlcs):
+def plot_forager(results_path, symbols: [str], fdf: pd.DataFrame, bal_eq, hlcvs):
     plots_dir = make_get_filepath(oj(results_path, "fills_plots", ""))
     plt.clf()
     bal_eq.plot()
@@ -203,10 +201,10 @@ def plot_forager(results_path, symbols: [str], fdf: pd.DataFrame, bal_eq, hlcs):
     for i, symbol in enumerate(symbols):
         try:
             print(f"Plotting fills for {symbol}")
-            hlcs_df = pd.DataFrame(hlcs[:, i, :], columns=["high", "low", "close"])
+            hlcvs_df = pd.DataFrame(hlcvs[:, i, :3], columns=["high", "low", "close"])
             fdfc = fdf[fdf.symbol == symbol]
             plt.clf()
-            plot_fills_forager(fdfc, hlcs_df)
+            plot_fills_forager(fdfc, hlcvs_df)
             plt.title(f"Fills {symbol}")
             plt.xlabel = "time"
             plt.ylabel = "price"
@@ -215,50 +213,6 @@ def plot_forager(results_path, symbols: [str], fdf: pd.DataFrame, bal_eq, hlcs):
             print(f"Error plotting {symbol} {e}")
 
 
-def calc_preferred_coins(hlcvs, config):
-    w_size = config["live"]["ohlcv_rolling_window"]
-    n_coins = hlcvs.shape[1]
-
-    # Calculate noisiness indices
-    noisiness_indices = np.argsort(-pbr.calc_noisiness_py(hlcvs[:, :, :3], w_size))
-
-    # Calculate volume-based eligibility
-    if config["live"]["relative_volume_filter_clip_pct"] > 0.0:
-        n_eligibles = int(round(n_coins * (1 - config["live"]["relative_volume_filter_clip_pct"])))
-
-        for pside in ["long", "short"]:
-            if (
-                config["bot"][pside]["n_positions"] > 0.0
-                and config["bot"][pside]["total_wallet_exposure_limit"] > 0.0
-            ):
-                n_eligibles = max(n_eligibles, int(round(config["bot"][pside]["n_positions"])))
-
-        if n_eligibles < n_coins:
-            # Calculate rolling volumes and get volume-based ranking
-            rolling_volumes = pbr.calc_volumes_py(hlcvs, w_size)
-            volume_ranking = np.argsort(-rolling_volumes, axis=1)
-
-            # Create a mask for eligible coins based on volume (vectorized)
-            rows = np.arange(hlcvs.shape[0])[:, None]
-            cols = volume_ranking[:, :n_eligibles]
-            eligibility_mask = np.zeros((hlcvs.shape[0], n_coins), dtype=bool)
-            eligibility_mask[rows, cols] = True
-
-            # Filter noisiness_indices based on eligibility
-            filtered_noisiness_indices = np.array(
-                [
-                    indices[mask]
-                    for indices, mask in zip(
-                        noisiness_indices, eligibility_mask[rows, noisiness_indices]
-                    )
-                ]
-            )
-
-            return filtered_noisiness_indices
-
-    return noisiness_indices
-
-
 async def main():
     manage_rust_compilation()
     logging.basicConfig(
@@ -274,8 +228,6 @@ async def main():
         "approved_coins",
         "ignored_coins",
         "minimum_coin_age_days",
-        "ohlcv_rolling_window",
-        "relative_volume_filter_clip_pct",
     }
     for key in sorted(template_config["live"]):
         if key not in keep_live_keys:
@@ -287,10 +239,8 @@ async def main():
     config = format_config(config)
     symbols, hlcvs, mss, results_path = await prepare_hlcvs_mss(config)
     config["backtest"]["symbols"] = symbols
-    preferred_coins = calc_preferred_coins(hlcvs, config)
-    hlcs = hlcvs[:, :, :3]
-    fills, equities, analysis = run_backtest(hlcs, preferred_coins, mss, config)
-    post_process(config, hlcs, fills, equities, analysis, results_path)
+    fills, equities, analysis = run_backtest(hlcvs, mss, config)
+    post_process(config, hlcvs, fills, equities, analysis, results_path)
 
 
 if __name__ == "__main__":

From 9750f3b1a132f9814f0cac272a0d831ddbf848a7 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sun, 22 Sep 2024 14:36:23 -0400
Subject: [PATCH 05/32] volume is in quote

---
 src/downloader.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/downloader.py b/src/downloader.py
index c217ae0a2..5c9863bb8 100644
--- a/src/downloader.py
+++ b/src/downloader.py
@@ -1520,6 +1520,9 @@ def unify_hlcv_data(hlcv_list) -> (np.ndarray, np.ndarray):
         # Extract the required data (high, low, close, volume)
         coin_data = ohlcv[:, 1:]
 
+        # Use quote volume as volume
+        coin_data[:, 3] = coin_data[:, 2] * coin_data[:, 3]
+
         # Place the data in the unified array
         unified_array[start_idx:end_idx, i, :] = coin_data
 

From 571b2c7f190808e8d61908448c85efd11914b48f Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sun, 22 Sep 2024 14:36:41 -0400
Subject: [PATCH 06/32] cleanup bybit fetch pnls

---
 src/exchanges/bybit.py | 129 ++++++++++++-----------------------------
 1 file changed, 36 insertions(+), 93 deletions(-)

diff --git a/src/exchanges/bybit.py b/src/exchanges/bybit.py
index 927c2376c..5d82f254e 100644
--- a/src/exchanges/bybit.py
+++ b/src/exchanges/bybit.py
@@ -293,7 +293,9 @@ async def fetch_pnls_sub(
         start_time: int = None,
         end_time: int = None,
     ):
-        if start_time is not None:
+        if start_time is None:
+            pnls = await self.fetch_pnl(start_time=start_time, end_time=end_time)
+        else:
             week = 1000 * 60 * 60 * 24 * 7
             pnls = []
             if end_time is None:
@@ -310,9 +312,7 @@ async def fetch_pnls_sub(
                 if sts <= start_time:
                     break
                 i += 1
-                logging.info(f"fetching pnls for more than a week {ts_to_date_utc(sts)}")
-        else:
-            pnls = await self.fetch_pnl(start_time=start_time, end_time=end_time)
+                logging.info(f"fetched pnls for more than a week {ts_to_date_utc(sts)}")
         return sorted(pnls, key=lambda x: x["timestamp"])
 
     async def fetch_pnl(
@@ -332,38 +332,44 @@ async def fetch_pnl(
                 params["startTime"] = int(start_time)
             if end_time is not None:
                 params["endTime"] = int(end_time)
-            fetched = await self.cca.private_get_v5_position_closed_pnl(params)
-            fetched["result"]["list"] = sorted(
-                floatify(fetched["result"]["list"]), key=lambda x: x["updatedTime"]
-            )
+            fetched = (await self.cca.private_get_v5_position_closed_pnl(params))["result"]
             while True:
-                if fetched["result"]["list"] == []:
+                fetched["list"] = sorted(
+                    floatify(fetched["list"]), key=lambda x: float(x["updatedTime"])
+                )
+                for i in range(len(fetched["list"])):
+                    fetched["list"][i]["timestamp"] = float(fetched["list"][i]["updatedTime"])
+                    fetched["list"][i]["symbol"] = self.get_symbol_id_inv(
+                        fetched["list"][i]["symbol"]
+                    )
+                    fetched["list"][i]["pnl"] = float(fetched["list"][i]["closedPnl"])
+                    fetched["list"][i]["side"] = fetched["list"][i]["side"].lower()
+                    fetched["list"][i]["position_side"] = (
+                        "long" if fetched["list"][i]["side"] == "sell" else "short"
+                    )
+                if fetched["list"] == []:
                     break
                 if (
-                    fetched["result"]["list"][0]["orderId"] in ids_seen
-                    and fetched["result"]["list"][-1]["orderId"] in ids_seen
+                    fetched["list"][0]["orderId"] in ids_seen
+                    and fetched["list"][-1]["orderId"] in ids_seen
                 ):
                     break
-                all_pnls.extend(fetched["result"]["list"])
-                for elm in fetched["result"]["list"]:
+                all_pnls.extend(fetched["list"])
+                for elm in fetched["list"]:
                     ids_seen.add(elm["orderId"])
                 if start_time is None:
                     break
-                if fetched["result"]["list"][0]["updatedTime"] <= start_time:
+                if fetched["list"][0]["updatedTime"] <= start_time:
                     break
-                if not fetched["result"]["nextPageCursor"]:
+                if not fetched["nextPageCursor"]:
+                    break
+                if len(fetched["list"]) < limit:
                     break
                 logging.info(
-                    f"fetching pnls {ts_to_date_utc(fetched['result']['list'][-1]['updatedTime'])}"
-                )
-                params["cursor"] = fetched["result"]["nextPageCursor"]
-                fetched = await self.cca.private_get_v5_position_closed_pnl(params)
-                fetched["result"]["list"] = sorted(
-                    floatify(fetched["result"]["list"]), key=lambda x: x["updatedTime"]
+                    f"fetched pnls from {ts_to_date_utc(fetched['list'][-1]['updatedTime'])} n pnls: {len(fetched['list'])}"
                 )
-            for i in range(len(all_pnls)):
-                all_pnls[i]["pnl"] = all_pnls[i]["closedPnl"]
-                all_pnls[i]["timestamp"] = all_pnls[i]["updatedTime"]
+                params["cursor"] = fetched["nextPageCursor"]
+                fetched = (await self.cca.private_get_v5_position_closed_pnl(params))["result"]
             return sorted(all_pnls, key=lambda x: x["updatedTime"])
         except Exception as e:
             logging.error(f"error fetching pnls {e}")
@@ -371,54 +377,7 @@ async def fetch_pnl(
             traceback.print_exc()
             return []
 
-    async def fetch_fills_sub_sub(self, start_time=None, end_time=None):
-        assert start_time is not None
-        params = {"limit": 100}
-        all_fetched = []
-        week = 1000 * 60 * 60 * 24 * 7
-        fetch_windows = [
-            (i, min(i + week, end_time)) for i in range(int(start_time), int(end_time), int(week))
-        ]
-        results = await asyncio.gather(
-            *[
-                self.cca.fetch_my_trades(params={"paginate": True, "endTime": int(ets)})
-                for sts, ets in fetch_windows
-            ]
-        )
-        result = sorted(flatten(results), key=lambda x: x["timestamp"])
-        return result
-        if start_time and end_time and end_time - start_time > week:
-            start_end_times = [start_time]
-        result = await self.cca.fetch_my_trades(
-            since=int(start_time) if start_time else start_time, params=params
-        )
-        return sorted(result, key=lambda x: x["timestamp"])
-
-    async def fetch_fills_sub(self, start_time=None, end_time=None):
-        if start_time is None:
-            result = await self.cca.fetch_my_trades()
-            return sorted(result, key=lambda x: x["timestamp"])
-        if end_time is None:
-            end_time = int(self.get_exchange_time() + 1000 * 60 * 60 * 24)
-        all_fetched_fills = []
-        for _ in range(100):
-            fills = await self.cca.fetch_my_trades(
-                params={"paginate": True, "endTime": int(end_time)}
-            )
-            if not fills:
-                break
-            all_fetched_fills += fills
-            if fills[0]["timestamp"] <= start_time:
-                break
-            logging.info(
-                f"fetched fills: {fills[0]['datetime']} {fills[-1]['datetime']} {len(fills)}"
-            )
-            end_time = fills[0]["timestamp"]
-        else:
-            logging.error(f"more than 100 calls to ccxt fetch_my_trades")
-        return sorted(all_fetched_fills, key=lambda x: x["timestamp"])
-
-    async def fetch_fills2_sub_sub(self, start_time, end_time, limit=None):
+    async def fetch_fills(self, start_time, end_time, limit=None):
         if start_time is None:
             result = await self.cca.fetch_my_trades()
             return sorted(result, key=lambda x: x["timestamp"])
@@ -436,39 +395,23 @@ async def fetch_fills2_sub_sub(self, start_time, end_time, limit=None):
             if fills[0]["timestamp"] <= start_time:
                 break
             logging.info(
-                f"fetched fills: {fills[0]['datetime']} {fills[-1]['datetime']} {len(fills)}"
+                f"fetched fills from {fills[0]['datetime']} to {fills[-1]['datetime']} n fills: {len(fills)}"
             )
             end_time = fills[0]["timestamp"]
+            limit = 1000
         else:
             logging.error(f"more than 100 calls to ccxt fetch_my_trades")
         return sorted(all_fetched_fills, key=lambda x: x["timestamp"])
 
-    async def fetch_fills2_sub(self, start_time, end_time):
-        if start_time is None:
-            result = await self.cca.fetch_my_trades()
-            return sorted(result, key=lambda x: x["timestamp"])
-        if end_time is None:
-            end_time = int(self.get_exchange_time() + 1000 * 60 * 60 * 24)
-        params = {"limit": 100}
-        all_fetched = []
-        week = 1000 * 60 * 60 * 24 * 7
-        fetch_windows = [
-            (i, min(i + week, end_time)) for i in range(int(start_time), int(end_time), int(week))
-        ]
-        results = await asyncio.gather(
-            *[self.fetch_fills2_sub_sub(sts, ets) for sts, ets in fetch_windows]
-        )
-        result = sorted(flatten(results), key=lambda x: x["timestamp"])
-        return result
-
     async def fetch_pnls(self, start_time=None, end_time=None, limit=None):
         # fetch fills first, then pnls (bybit has them in separate endpoints)
         if start_time:
             if self.get_exchange_time() - start_time < 1000 * 60 * 60 * 4 and limit == 100:
+                # set start time to None (fetch latest) if start time is recent
                 start_time = None
-        fills = await self.fetch_fills2_sub_sub(start_time=start_time, end_time=end_time, limit=limit)
+        fills = await self.fetch_fills(start_time=start_time, end_time=end_time, limit=limit)
         if start_time:
-            fills = [x for x in fills if x["timestamp"] >= start_time - 1000 * 60 * 60 * 4]
+            fills = [x for x in fills if x["timestamp"] >= start_time - 1000 * 60 * 60]
         if not fills:
             return []
         start_time = fills[0]["timestamp"]

From 95faed09cefe150c842c63a2234e3eb010645fae Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sun, 22 Sep 2024 14:36:59 -0400
Subject: [PATCH 07/32] update for new filter params

---
 src/optimize.py | 39 +++++++++++----------------------------
 1 file changed, 11 insertions(+), 28 deletions(-)

diff --git a/src/optimize.py b/src/optimize.py
index 4ce20c01f..0fae3a202 100644
--- a/src/optimize.py
+++ b/src/optimize.py
@@ -9,7 +9,6 @@
 from backtest import (
     prepare_hlcvs_mss,
     prep_backtest_args,
-    calc_preferred_coins,
 )
 from pure_funcs import (
     get_template_live_config,
@@ -146,26 +145,15 @@ def config_to_individual(config):
 
 
 class Evaluator:
-    def __init__(self, hlcs, preferred_coins, config, mss):
-        self.hlcs = hlcs
-        self.shared_hlcs = shared_memory.SharedMemory(create=True, size=self.hlcs.nbytes)
-        self.shared_hlcs_np = np.ndarray(
-            self.hlcs.shape, dtype=self.hlcs.dtype, buffer=self.shared_hlcs.buf
+    def __init__(self, hlcvs, config, mss):
+        self.hlcvs = hlcvs
+        self.shared_hlcvs = shared_memory.SharedMemory(create=True, size=self.hlcvs.nbytes)
+        self.shared_hlcvs_np = np.ndarray(
+            self.hlcvs.shape, dtype=self.hlcvs.dtype, buffer=self.shared_hlcvs.buf
         )
-        np.copyto(self.shared_hlcs_np, self.hlcs)
-        del self.hlcs
+        np.copyto(self.shared_hlcvs_np, self.hlcvs)
+        del self.hlcvs
 
-        self.preferred_coins = preferred_coins
-        self.shared_preferred_coins = shared_memory.SharedMemory(
-            create=True, size=self.preferred_coins.nbytes
-        )
-        self.shared_preferred_coins_np = np.ndarray(
-            self.preferred_coins.shape,
-            dtype=self.preferred_coins.dtype,
-            buffer=self.shared_preferred_coins.buf,
-        )
-        np.copyto(self.shared_preferred_coins_np, self.preferred_coins)
-        del self.preferred_coins
         self.config = config
 
         _, self.exchange_params, self.backtest_params = prep_backtest_args(config, mss)
@@ -176,8 +164,7 @@ def evaluate(self, individual):
             config, [], exchange_params=self.exchange_params, backtest_params=self.backtest_params
         )
         fills, equities, analysis = pbr.run_backtest(
-            self.shared_hlcs_np,
-            self.shared_preferred_coins_np,
+            self.shared_hlcvs_np,
             bot_params,
             self.exchange_params,
             self.backtest_params,
@@ -208,10 +195,8 @@ def calc_fitness(self, analysis):
 
     def cleanup(self):
         # Close and unlink the shared memory
-        self.shared_hlcs.close()
-        self.shared_hlcs.unlink()
-        self.shared_preferred_coins.close()
-        self.shared_preferred_coins.unlink()
+        self.shared_hlcvs.close()
+        self.shared_hlcvs.unlink()
 
 
 def add_extra_options(parser):
@@ -290,8 +275,6 @@ async def main():
     config = format_config(config)
     symbols, hlcvs, mss, results_path = await prepare_hlcvs_mss(config)
     config["backtest"]["symbols"] = symbols
-    preferred_coins = calc_preferred_coins(hlcvs, config)
-    hlcs = hlcvs[:, :, :3]
     date_fname = ts_to_date_utc(utc_ms())[:19].replace(":", "_")
     coins = [symbol_to_coin(s) for s in config["backtest"]["symbols"]]
     coins_fname = "_".join(coins) if len(coins) <= 6 else f"{len(coins)}_coins"
@@ -300,7 +283,7 @@ async def main():
         f"optimize_results/{date_fname}_{coins_fname}_{hash_snippet}_all_results.txt"
     )
     try:
-        evaluator = Evaluator(hlcs, preferred_coins, config, mss)
+        evaluator = Evaluator(hlcvs, config, mss)
         creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -1.0))  # Minimize both objectives
         creator.create("Individual", list, fitness=creator.FitnessMulti)
 

From e87a13c38acf649dee11a0fb3fe5d6254360c575 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sun, 22 Sep 2024 14:37:12 -0400
Subject: [PATCH 08/32] update for new filter params

---
 src/procedures.py | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/src/procedures.py b/src/procedures.py
index a62849ba8..16a860f6a 100644
--- a/src/procedures.py
+++ b/src/procedures.py
@@ -134,17 +134,37 @@ def format_config(config: dict, verbose=True) -> dict:
         result = deepcopy(config["config"])
     else:
         raise Exception(f"failed to format config")
-    for k0, v0, v1 in [("close_trailing_qty_pct", 1.0, [0.05, 1.0])]:
+    for k0, v0, v1 in [
+        ("close_trailing_qty_pct", 1.0, [0.05, 1.0]),
+        (
+            "filter_rolling_window",
+            (
+                result["live"]["ohlcv_rolling_window"]
+                if "ohlcv_rolling_window" in result["live"]
+                else 60.0
+            ),
+            [10.0, 1440.0],
+        ),
+        (
+            "filter_relative_volume_clip_pct",
+            (
+                result["live"]["relative_volume_filter_clip_pct"]
+                if "relative_volume_filter_clip_pct" in result["live"]
+                else 0.5
+            ),
+            [0.0, 1.0],
+        ),
+    ]:
         for pside in ["long", "short"]:
             if k0 not in result["bot"][pside]:
                 result["bot"][pside][k0] = v0
                 if verbose:
-                    print(f"adding missing parameter {k0}: {v0}")
+                    print(f"adding missing backtest parameter {pside} {k0}: {v0}")
             opt_key = f"{pside}_{k0}"
             if opt_key not in result["optimize"]["bounds"]:
                 result["optimize"]["bounds"][opt_key] = v1
                 if verbose:
-                    print(f"adding missing parameter {opt_key}: {v1}")
+                    print(f"adding missing optimize parameter {pside} {opt_key}: {v1}")
     for k0, src, dst in [
         ("live", "minimum_market_age_days", "minimum_coin_age_days"),
         ("live", "noisiness_rolling_mean_window_size", "ohlcv_rolling_window"),
@@ -156,7 +176,6 @@ def format_config(config: dict, verbose=True) -> dict:
             del result[k0][src]
     for k0, k1, v in [
         ("live", "time_in_force", "good_till_cancelled"),
-        ("live", "ohlcv_rolling_window", 60),
         ("optimize", "scoring", ["mdg", "sharpe_ratio"]),
     ]:
         if k1 not in result[k0]:

From 70a5608f9bd7c6ed6c264a2886be57d88ccde08d Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sun, 22 Sep 2024 14:37:27 -0400
Subject: [PATCH 09/32] new filter params

---
 src/pure_funcs.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/pure_funcs.py b/src/pure_funcs.py
index 7d14af16c..5560d7855 100644
--- a/src/pure_funcs.py
+++ b/src/pure_funcs.py
@@ -523,6 +523,8 @@ def get_template_live_config(passivbot_mode="neat_grid"):
                     "entry_trailing_grid_ratio": 0.5,
                     "entry_trailing_retracement_pct": 0.01,
                     "entry_trailing_threshold_pct": 0.05,
+                    "filter_rolling_window": 60,
+                    "filter_relative_volume_clip_pct": 0.95,
                     "n_positions": 10.0,
                     "total_wallet_exposure_limit": 1.7,
                     "unstuck_close_pct": 0.001,
@@ -548,6 +550,8 @@ def get_template_live_config(passivbot_mode="neat_grid"):
                     "entry_trailing_grid_ratio": 0.5,
                     "entry_trailing_retracement_pct": 0.01,
                     "entry_trailing_threshold_pct": 0.05,
+                    "filter_rolling_window": 60,
+                    "filter_relative_volume_clip_pct": 0.95,
                     "n_positions": 10.0,
                     "total_wallet_exposure_limit": 1.7,
                     "unstuck_close_pct": 0.001,
@@ -569,10 +573,8 @@ def get_template_live_config(passivbot_mode="neat_grid"):
                 "max_n_cancellations_per_batch": 5,
                 "max_n_creations_per_batch": 3,
                 "minimum_coin_age_days": 7.0,
-                "ohlcv_rolling_window": 60,
                 "pnls_max_lookback_days": 30.0,
                 "price_distance_threshold": 0.002,
-                "relative_volume_filter_clip_pct": 0.1,
                 "time_in_force": "good_till_cancelled",
                 "user": "bybit_01",
             },

From 9b003ded5da493be3c64332587b3934ea13f4e0e Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sun, 22 Sep 2024 14:37:55 -0400
Subject: [PATCH 10/32] up version

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f9c9842e6..1cb15828c 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 
 :warning: **Used at one's own risk** :warning:
 
-v7.0.7
+v7.1.0
 
 
 ## Overview

From 4c38f1a8e198f32af9d732556da409b9fb9cdea8 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Wed, 25 Sep 2024 15:18:01 -0400
Subject: [PATCH 11/32] cleanup rust backtest

---
 passivbot-rust/src/backtest.rs | 177 +++++++++++++--------------------
 1 file changed, 71 insertions(+), 106 deletions(-)

diff --git a/passivbot-rust/src/backtest.rs b/passivbot-rust/src/backtest.rs
index e14c0c55c..48b85dd92 100644
--- a/passivbot-rust/src/backtest.rs
+++ b/passivbot-rust/src/backtest.rs
@@ -121,19 +121,18 @@ pub struct TradingEnabled {
     short: bool,
 }
 
-pub struct PreferredCoins {
-    long: Vec<usize>,
-    short: Vec<usize>,
+pub struct RollingVolumeSum {
+    long: HashMap<usize, f64>,
+    short: HashMap<usize, f64>,
 }
 
 pub struct Backtest {
-    hlcvs: Array3<f64>, // 3D array: (n_timesteps, n_markets, 4)
-    preferred_coins: PreferredCoins,
+    hlcvs: Array3<f64>, // 3D array: (n_timesteps, n_coins, 4)
     bot_params_pair: BotParamsPair,
     exchange_params_list: Vec<ExchangeParams>,
     backtest_params: BacktestParams,
     balance: f64,
-    n_markets: usize,
+    n_coins: usize,
     ema_alphas: EmaAlphas,
     emas: Vec<EMAs>,
     positions: Positions,
@@ -151,7 +150,9 @@ pub struct Backtest {
     delist_timestamps: HashMap<usize, usize>,
     did_fill_long: HashSet<usize>,
     did_fill_short: HashSet<usize>,
-    rolling_volumes: Vec<Vec<f64>>,
+    n_eligible_long: usize,
+    n_eligible_short: usize,
+    rolling_volume_sum: RollingVolumeSum,
 }
 
 impl Backtest {
@@ -162,16 +163,16 @@ impl Backtest {
         backtest_params: &BacktestParams,
     ) -> Self {
         let n_timesteps = hlcvs.shape()[0];
-        let n_markets = hlcvs.shape()[1];
-        let max_window = bot_params_pair
-            .long
-            .filter_rolling_window
-            .max(bot_params_pair.short.filter_rolling_window);
-
-        // Initialize rolling_volumes with zeros
-        let rolling_volumes = vec![vec![0.0; n_markets]; n_timesteps];
-
-        let initial_emas = (0..n_markets)
+        let n_coins = hlcvs.shape()[1];
+        let n_eligible_long = bot_params_pair.long.n_positions.max(
+            (n_coins as f64 * (1.0 - bot_params_pair.long.filter_relative_volume_clip_pct)).round()
+                as usize,
+        );
+        let n_eligible_short = bot_params_pair.short.n_positions.max(
+            (n_coins as f64 * (1.0 - bot_params_pair.short.filter_relative_volume_clip_pct)).round()
+                as usize,
+        );
+        let initial_emas = (0..n_coins)
             .map(|i| {
                 let close_price = hlcvs[[0, i, CLOSE]];
                 EMAs {
@@ -180,23 +181,18 @@ impl Backtest {
                 }
             })
             .collect();
-        let preferred_coins = PreferredCoins {
-            long: Vec::<usize>::new(),
-            short: Vec::<usize>::new(),
-        };
         let mut equities = Vec::<f64>::new();
         equities.push(backtest_params.starting_balance);
         let mut bot_params_pair_cloned = bot_params_pair.clone();
-        bot_params_pair_cloned.long.n_positions = n_markets.min(bot_params_pair.long.n_positions);
-        bot_params_pair_cloned.short.n_positions = n_markets.min(bot_params_pair.short.n_positions);
-        let mut backtest = Backtest {
+        bot_params_pair_cloned.long.n_positions = n_coins.min(bot_params_pair.long.n_positions);
+        bot_params_pair_cloned.short.n_positions = n_coins.min(bot_params_pair.short.n_positions);
+        Backtest {
             hlcvs,
-            preferred_coins,
             bot_params_pair: bot_params_pair_cloned,
             exchange_params_list,
             backtest_params: backtest_params.clone(),
             balance: backtest_params.starting_balance,
-            n_markets,
+            n_coins,
             ema_alphas: calc_ema_alphas(&bot_params_pair),
             emas: initial_emas,
             positions: Positions::default(),
@@ -224,60 +220,16 @@ impl Backtest {
             delist_timestamps: HashMap::new(),
             did_fill_long: HashSet::new(),
             did_fill_short: HashSet::new(),
-            rolling_volumes,
-        };
-        backtest.initialize_rolling_volumes(max_window);
-        backtest
-    }
-
-    fn initialize_rolling_volumes(&mut self, max_window: usize) {
-        let n_markets = self.hlcvs.shape()[1];
-        let n_timesteps = self.hlcvs.shape()[0];
-
-        for k in 0..n_timesteps {
-            let start = k.saturating_sub(max_window - 1);
-            for i in 0..n_markets {
-                // Update rolling volume
-                self.rolling_volumes[k][i] = self.hlcvs.slice(s![start..=k, i, VOLUME]).sum();
-            }
-        }
-    }
-
-    fn update_rolling_volumes(&mut self, k: usize) {
-        let n_markets = self.hlcvs.shape()[1];
-        let max_window = self
-            .bot_params_pair
-            .long
-            .filter_rolling_window
-            .max(self.bot_params_pair.short.filter_rolling_window);
-
-        if k >= max_window {
-            let old_k = k - max_window;
-            for i in 0..n_markets {
-                self.rolling_volumes[k][i] = self.rolling_volumes[k - 1][i]
-                    + self.hlcvs[[k, i, VOLUME]]
-                    - self.hlcvs[[old_k, i, VOLUME]];
-            }
-        } else {
-            // For the first max_window steps, we need to recalculate the full sum
-            let start = 0;
-            for i in 0..n_markets {
-                self.rolling_volumes[k][i] = self.hlcvs.slice(s![start..=k, i, VOLUME]).sum();
-            }
+            n_eligible_long,
+            n_eligible_short,
+            rolling_volume_sum: RollingVolumeSum {
+                long: HashMap::new(),
+                short: HashMap::new(),
+            },
         }
     }
 
-    fn calc_noisiness(&self, k: usize, idx: usize, window: usize) -> f64 {
-        let start = k.saturating_sub(window - 1);
-        let slice = self.hlcvs.slice(s![start..=k, idx, ..]);
-        let nrr_sum: f64 = slice
-            .axis_iter(Axis(0))
-            .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE])
-            .sum();
-        nrr_sum / (k - start + 1) as f64
-    }
-
-    fn calc_preferred_coins(&self, k: usize, pside: usize) -> Vec<usize> {
+    pub fn calc_preferred_coins(&self, k: usize, pside: usize) -> Vec<usize> {
         let bot_params = match pside {
             LONG => &self.bot_params_pair.long,
             SHORT => &self.bot_params_pair.short,
@@ -285,12 +237,14 @@ impl Backtest {
         };
 
         let n_coins = self.hlcvs.shape()[1];
+        let start_idx = k.saturating_sub(bot_params.filter_rolling_window);
 
-        // Use pre-computed rolling volumes
-        let mut volume_sums: Vec<(usize, f64)> = self.rolling_volumes[k]
-            .iter()
-            .enumerate()
-            .map(|(idx, &sum)| (idx, sum))
+        // Calculate volume sums
+        let mut volume_sums: Vec<(usize, f64)> = (0..n_coins)
+            .map(|idx| {
+                let sum = self.hlcvs.slice(s![start_idx..k, idx, VOLUME]).sum();
+                (idx, sum)
+            })
             .collect();
 
         // Sort by volume in descending order
@@ -301,19 +255,22 @@ impl Backtest {
             (n_coins as f64 * (1.0 - bot_params.filter_relative_volume_clip_pct)).round() as usize,
         );
         let filtered_indices: Vec<usize> = volume_sums
-            .iter()
+            .into_iter()
             .take(n_eligible)
-            .map(|&(idx, _)| idx)
+            .map(|(idx, _)| idx)
             .collect();
 
-        // Calculate noisiness on-the-fly for filtered coins
+        // Calculate noisiness
         let mut noisiness: Vec<(usize, f64)> = filtered_indices
             .into_iter()
             .map(|idx| {
-                (
-                    idx,
-                    self.calc_noisiness(k, idx, bot_params.filter_rolling_window),
-                )
+                let slice = self.hlcvs.slice(s![start_idx..k, idx, ..]);
+                let nrr_sum: f64 = slice
+                    .axis_iter(Axis(0))
+                    .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE])
+                    .sum();
+                let mean_nrr = nrr_sum / (k - start_idx) as f64;
+                (idx, mean_nrr)
             })
             .collect();
 
@@ -328,7 +285,7 @@ impl Backtest {
         let check_points: Vec<usize> = (0..7).map(|i| i * 60 * 24).collect();
         let n_timesteps = self.hlcvs.shape()[0];
 
-        for idx in 0..self.n_markets {
+        for idx in 0..self.n_coins {
             self.trailing_prices
                 .long
                 .insert(idx, TrailingPriceBundle::default());
@@ -360,7 +317,6 @@ impl Backtest {
             }
         }
         for k in 1..(n_timesteps - 1) {
-            self.update_rolling_volumes(k);
             self.check_for_fills(k);
             self.update_emas(k);
             self.update_open_orders(k);
@@ -417,32 +373,41 @@ impl Backtest {
     }
 
     fn update_actives(&mut self, k: usize, pside: usize) -> Vec<usize> {
-        // Calculate preferred coins first
-        let preferred_coins = self.calc_preferred_coins(k, pside);
-
-        let (actives, positions, n_positions) = match pside {
-            LONG => (
-                &mut self.actives.long,
-                &self.positions.long,
-                self.bot_params_pair.long.n_positions,
-            ),
+        // Calculate all the information we need before borrowing
+        let (positions, n_positions) = match pside {
+            LONG => (&self.positions.long, self.bot_params_pair.long.n_positions),
             SHORT => (
-                &mut self.actives.short,
                 &self.positions.short,
                 self.bot_params_pair.short.n_positions,
             ),
             _ => panic!("Invalid pside"),
         };
 
-        let mut actives_without_pos = Vec::with_capacity(n_positions);
+        let current_positions: Vec<usize> = positions.keys().cloned().collect();
+        let mut preferred_coins = Vec::new();
+
+        // Only calculate preferred coins if there are open slots
+        if current_positions.len() < n_positions {
+            preferred_coins = self.calc_preferred_coins(k, pside);
+        }
+
+        // Now we can mutably borrow self.actives
+        let actives = match pside {
+            LONG => &mut self.actives.long,
+            SHORT => &mut self.actives.short,
+            _ => unreachable!(),
+        };
+
         actives.clear();
 
-        // First, add all markets with existing positions
-        for &market_idx in positions.keys() {
+        // Add all markets with existing positions
+        for &market_idx in &current_positions {
             actives.insert(market_idx);
         }
 
-        // Then, add additional markets based on preferred_coins
+        let mut actives_without_pos = Vec::new();
+
+        // Add additional markets based on preferred_coins
         for &market_idx in &preferred_coins {
             if actives.len() < n_positions {
                 if actives.insert(market_idx) {
@@ -1363,7 +1328,7 @@ impl Backtest {
 
     #[inline]
     fn update_emas(&mut self, k: usize) {
-        for i in 0..self.n_markets {
+        for i in 0..self.n_coins {
             let close_price = self.hlcvs[[k, i, CLOSE]];
 
             let long_alphas = &self.ema_alphas.long.alphas;

From 5fcd43e872d7da2ff2b5db3c50b64125555b25b5 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Wed, 25 Sep 2024 18:25:28 -0400
Subject: [PATCH 12/32] bug fix, refactor, etc

---
 passivbot-rust/src/backtest.rs  | 225 +++++++++++++++++++++-----------
 passivbot-rust/src/closes.rs    |   1 -
 passivbot-rust/src/constants.rs |   6 +-
 3 files changed, 149 insertions(+), 83 deletions(-)

diff --git a/passivbot-rust/src/backtest.rs b/passivbot-rust/src/backtest.rs
index 48b85dd92..78ec7125e 100644
--- a/passivbot-rust/src/backtest.rs
+++ b/passivbot-rust/src/backtest.rs
@@ -69,12 +69,6 @@ impl EMAs {
     }
 }
 
-#[derive(Debug, Default)]
-pub struct OpenOrders {
-    pub long: HashMap<usize, OpenOrderBundle>,
-    pub short: HashMap<usize, OpenOrderBundle>,
-}
-
 #[derive(Debug, Default)]
 pub struct OpenOrdersNew {
     pub long: HashMap<usize, OpenOrderBundleNew>,
@@ -87,12 +81,6 @@ pub struct OpenOrderBundleNew {
     pub closes: Vec<Order>,
 }
 
-#[derive(Debug, Default)]
-pub struct OpenOrderBundle {
-    pub entry: Order,
-    pub close: Order,
-}
-
 #[derive(Default, Debug)]
 pub struct Actives {
     long: HashSet<usize>,
@@ -124,6 +112,50 @@ pub struct TradingEnabled {
 pub struct RollingVolumeSum {
     long: HashMap<usize, f64>,
     short: HashMap<usize, f64>,
+    prev_k_long: usize,
+    prev_k_short: usize,
+}
+
+impl RollingVolumeSum {
+    fn new() -> Self {
+        RollingVolumeSum {
+            long: HashMap::new(),
+            short: HashMap::new(),
+            prev_k_long: 0,
+            prev_k_short: 0,
+        }
+    }
+
+    fn update(&mut self, hlcvs: &Array3<f64>, k: usize, pside: usize, window: usize) {
+        let (volume_sums, prev_k) = match pside {
+            LONG => (&mut self.long, &mut self.prev_k_long),
+            SHORT => (&mut self.short, &mut self.prev_k_short),
+            _ => panic!("Invalid pside"),
+        };
+
+        let start_idx = k.saturating_sub(window);
+        let prev_start_idx = prev_k.saturating_sub(window);
+
+        for idx in 0..hlcvs.shape()[1] {
+            let mut sum = *volume_sums.entry(idx).or_insert(0.0);
+
+            // Remove volumes outside the new window
+            if *prev_k > start_idx {
+                for i in prev_start_idx..start_idx {
+                    sum -= hlcvs[[i, idx, VOLUME]];
+                }
+            }
+
+            // Add new volumes
+            for i in (*prev_k).max(start_idx)..k {
+                sum += hlcvs[[i, idx, VOLUME]];
+            }
+
+            volume_sums.insert(idx, sum);
+        }
+
+        *prev_k = k;
+    }
 }
 
 pub struct Backtest {
@@ -136,8 +168,7 @@ pub struct Backtest {
     ema_alphas: EmaAlphas,
     emas: Vec<EMAs>,
     positions: Positions,
-    open_orders: OpenOrders, // keys are symbol indices
-    open_orders_new: OpenOrdersNew,
+    open_orders: OpenOrdersNew,
     trailing_prices: TrailingPrices,
     actives: Actives,
     pnl_cumsum_running: f64,
@@ -196,8 +227,7 @@ impl Backtest {
             ema_alphas: calc_ema_alphas(&bot_params_pair),
             emas: initial_emas,
             positions: Positions::default(),
-            open_orders: OpenOrders::default(),
-            open_orders_new: OpenOrdersNew::default(),
+            open_orders: OpenOrdersNew::default(),
             trailing_prices: TrailingPrices::default(),
             actives: Actives::default(),
             pnl_cumsum_running: 0.0,
@@ -222,25 +252,70 @@ impl Backtest {
             did_fill_short: HashSet::new(),
             n_eligible_long,
             n_eligible_short,
-            rolling_volume_sum: RollingVolumeSum {
-                long: HashMap::new(),
-                short: HashMap::new(),
-            },
+            rolling_volume_sum: RollingVolumeSum::new(),
         }
     }
 
-    pub fn calc_preferred_coins(&self, k: usize, pside: usize) -> Vec<usize> {
-        let bot_params = match pside {
-            LONG => &self.bot_params_pair.long,
-            SHORT => &self.bot_params_pair.short,
+    pub fn calc_preferred_coins(&mut self, k: usize, pside: usize) -> Vec<usize> {
+        let (bot_params, n_eligible) = match pside {
+            LONG => (&self.bot_params_pair.long, self.n_eligible_long),
+            SHORT => (&self.bot_params_pair.short, self.n_eligible_short),
+            _ => panic!("Invalid pside"),
+        };
+
+        let window = bot_params.filter_rolling_window;
+        let start_idx = k.saturating_sub(window);
+
+        // Calculate volume sums for all coins
+        let mut volume_sums: Vec<(usize, f64)> = (0..self.n_coins)
+            .map(|idx| {
+                let sum = self.hlcvs.slice(s![start_idx..k, idx, VOLUME]).sum();
+                (idx, sum)
+            })
+            .collect();
+
+        // Sort by volume in descending order
+        volume_sums.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
+
+        // Take top n_eligible indices
+        let filtered_indices: Vec<usize> = volume_sums
+            .into_iter()
+            .take(n_eligible)
+            .map(|(idx, _)| idx)
+            .collect();
+
+        // Calculate noisiness
+        let mut noisiness: Vec<(usize, f64)> = filtered_indices
+            .into_iter()
+            .map(|idx| {
+                let slice = self.hlcvs.slice(s![start_idx..k, idx, ..]);
+                let nrr_sum: f64 = slice
+                    .axis_iter(Axis(0))
+                    .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE])
+                    .sum();
+                let mean_nrr = nrr_sum / (k - start_idx) as f64;
+                (idx, mean_nrr)
+            })
+            .collect();
+
+        // Sort by noisiness in descending order
+        noisiness.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
+
+        // Return indices sorted by noisiness
+        noisiness.into_iter().map(|(idx, _)| idx).collect()
+    }
+
+    pub fn calc_preferred_coins_old(&self, k: usize, pside: usize) -> Vec<usize> {
+        let (bot_params, n_eligible) = match pside {
+            LONG => (&self.bot_params_pair.long, self.n_eligible_long),
+            SHORT => (&self.bot_params_pair.short, self.n_eligible_short),
             _ => panic!("Invalid pside"),
         };
 
-        let n_coins = self.hlcvs.shape()[1];
         let start_idx = k.saturating_sub(bot_params.filter_rolling_window);
 
         // Calculate volume sums
-        let mut volume_sums: Vec<(usize, f64)> = (0..n_coins)
+        let mut volume_sums: Vec<(usize, f64)> = (0..self.n_coins)
             .map(|idx| {
                 let sum = self.hlcvs.slice(s![start_idx..k, idx, VOLUME]).sum();
                 (idx, sum)
@@ -251,9 +326,6 @@ impl Backtest {
         volume_sums.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
 
         // Filter by volume
-        let n_eligible = bot_params.n_positions.max(
-            (n_coins as f64 * (1.0 - bot_params.filter_relative_volume_clip_pct)).round() as usize,
-        );
         let filtered_indices: Vec<usize> = volume_sums
             .into_iter()
             .take(n_eligible)
@@ -426,14 +498,14 @@ impl Backtest {
         self.did_fill_short.clear();
         if self.trading_enabled.long {
             let mut open_orders_keys_long: Vec<usize> =
-                self.open_orders_new.long.keys().cloned().collect();
+                self.open_orders.long.keys().cloned().collect();
             open_orders_keys_long.sort();
             for idx in open_orders_keys_long {
                 // Process close fills long
-                if !self.open_orders_new.long[&idx].closes.is_empty() {
+                if !self.open_orders.long[&idx].closes.is_empty() {
                     let mut closes_to_process = Vec::new();
                     {
-                        for close_order in &self.open_orders_new.long[&idx].closes {
+                        for close_order in &self.open_orders.long[&idx].closes {
                             if self.order_filled(k, idx, close_order) {
                                 closes_to_process.push(close_order.clone());
                             }
@@ -450,10 +522,10 @@ impl Backtest {
                     }
                 }
                 // Process entry fills long
-                if !self.open_orders_new.long[&idx].entries.is_empty() {
+                if !self.open_orders.long[&idx].entries.is_empty() {
                     let mut entries_to_process = Vec::new();
                     {
-                        for entry_order in &self.open_orders_new.long[&idx].entries {
+                        for entry_order in &self.open_orders.long[&idx].entries {
                             if self.order_filled(k, idx, entry_order) {
                                 entries_to_process.push(entry_order.clone());
                             }
@@ -469,14 +541,14 @@ impl Backtest {
         }
         if self.trading_enabled.short {
             let mut open_orders_keys_short: Vec<usize> =
-                self.open_orders_new.short.keys().cloned().collect();
+                self.open_orders.short.keys().cloned().collect();
             open_orders_keys_short.sort();
             for idx in open_orders_keys_short {
                 // Process close fills short
-                if !self.open_orders_new.short[&idx].closes.is_empty() {
+                if !self.open_orders.short[&idx].closes.is_empty() {
                     let mut closes_to_process = Vec::new();
                     {
-                        for close_order in &self.open_orders_new.short[&idx].closes {
+                        for close_order in &self.open_orders.short[&idx].closes {
                             if self.order_filled(k, idx, close_order) {
                                 closes_to_process.push(close_order.clone());
                             }
@@ -491,10 +563,10 @@ impl Backtest {
                     }
                 }
                 // Process entry fills short
-                if !self.open_orders_new.short[&idx].entries.is_empty() {
+                if !self.open_orders.short[&idx].entries.is_empty() {
                     let mut entries_to_process = Vec::new();
                     {
-                        for entry_order in &self.open_orders_new.short[&idx].entries {
+                        for entry_order in &self.open_orders.short[&idx].entries {
                             if self.order_filled(k, idx, entry_order) {
                                 entries_to_process.push(entry_order.clone());
                             }
@@ -849,7 +921,7 @@ impl Backtest {
         // check if coin is delisted; if so, close pos as unstuck close
         if let Some(&delist_timestamp) = self.delist_timestamps.get(&idx) {
             if k >= delist_timestamp && self.positions.long.contains_key(&idx) {
-                self.open_orders_new.long.get_mut(&idx).unwrap().closes = [Order {
+                self.open_orders.long.get_mut(&idx).unwrap().closes = [Order {
                     qty: -self.positions.long[&idx].size,
                     price: round_(
                         f64::min(
@@ -861,7 +933,7 @@ impl Backtest {
                     order_type: OrderType::CloseUnstuckLong,
                 }]
                 .to_vec();
-                self.open_orders_new.long.entry(idx).or_default().entries = Vec::new();
+                self.open_orders.long.entry(idx).or_default().entries = Vec::new();
                 return;
             }
         }
@@ -876,7 +948,7 @@ impl Backtest {
         if self.order_filled(k + 1, idx, &next_entry_order)
             && self.has_next_grid_order(&next_entry_order, LONG)
         {
-            self.open_orders_new.long.entry(idx).or_default().entries = calc_entries_long(
+            self.open_orders.long.entry(idx).or_default().entries = calc_entries_long(
                 &self.exchange_params_list[idx],
                 &state_params,
                 &self.bot_params_pair.long,
@@ -884,7 +956,7 @@ impl Backtest {
                 &self.trailing_prices.long[&idx],
             );
         } else {
-            self.open_orders_new.long.entry(idx).or_default().entries = [next_entry_order].to_vec();
+            self.open_orders.long.entry(idx).or_default().entries = [next_entry_order].to_vec();
         }
         let next_close_order = calc_next_close_long(
             &self.exchange_params_list[idx],
@@ -897,7 +969,7 @@ impl Backtest {
         if self.order_filled(k + 1, idx, &next_close_order)
             && self.has_next_grid_order(&next_close_order, LONG)
         {
-            self.open_orders_new.long.entry(idx).or_default().closes = calc_closes_long(
+            self.open_orders.long.entry(idx).or_default().closes = calc_closes_long(
                 &self.exchange_params_list[idx],
                 &state_params,
                 &self.bot_params_pair.long,
@@ -905,7 +977,7 @@ impl Backtest {
                 &self.trailing_prices.long[&idx],
             );
         } else {
-            self.open_orders_new.long.entry(idx).or_default().closes = [next_close_order].to_vec();
+            self.open_orders.long.entry(idx).or_default().closes = [next_close_order].to_vec();
         }
     }
 
@@ -921,7 +993,7 @@ impl Backtest {
         // check if coin is delisted; if so, close pos as unstuck close
         if let Some(&delist_timestamp) = self.delist_timestamps.get(&idx) {
             if k >= delist_timestamp && self.positions.short.contains_key(&idx) {
-                self.open_orders_new.short.get_mut(&idx).unwrap().closes = [Order {
+                self.open_orders.short.get_mut(&idx).unwrap().closes = [Order {
                     qty: self.positions.short[&idx].size.abs(),
                     price: round_(
                         f64::max(
@@ -933,7 +1005,7 @@ impl Backtest {
                     order_type: OrderType::CloseUnstuckLong,
                 }]
                 .to_vec();
-                self.open_orders_new.short.entry(idx).or_default().entries = Vec::new();
+                self.open_orders.short.entry(idx).or_default().entries = Vec::new();
                 return;
             }
         }
@@ -948,7 +1020,7 @@ impl Backtest {
         if self.order_filled(k + 1, idx, &next_entry_order)
             && self.has_next_grid_order(&next_entry_order, SHORT)
         {
-            self.open_orders_new.short.entry(idx).or_default().entries = calc_entries_short(
+            self.open_orders.short.entry(idx).or_default().entries = calc_entries_short(
                 &self.exchange_params_list[idx],
                 &state_params,
                 &self.bot_params_pair.short,
@@ -956,8 +1028,7 @@ impl Backtest {
                 &self.trailing_prices.short[&idx],
             );
         } else {
-            self.open_orders_new.short.entry(idx).or_default().entries =
-                [next_entry_order].to_vec();
+            self.open_orders.short.entry(idx).or_default().entries = [next_entry_order].to_vec();
         }
 
         let next_close_order = calc_next_close_short(
@@ -971,7 +1042,7 @@ impl Backtest {
         if self.order_filled(k + 1, idx, &next_close_order)
             && self.has_next_grid_order(&next_close_order, SHORT)
         {
-            self.open_orders_new.short.entry(idx).or_default().closes = calc_closes_short(
+            self.open_orders.short.entry(idx).or_default().closes = calc_closes_short(
                 &self.exchange_params_list[idx],
                 &state_params,
                 &self.bot_params_pair.short,
@@ -979,7 +1050,7 @@ impl Backtest {
                 &self.trailing_prices.short[&idx],
             );
         } else {
-            self.open_orders_new.short.entry(idx).or_default().closes = [next_close_order].to_vec()
+            self.open_orders.short.entry(idx).or_default().closes = [next_close_order].to_vec()
         }
     }
 
@@ -1072,9 +1143,9 @@ impl Backtest {
                             self.exchange_params_list[idx].price_step,
                         ),
                     );
-                    if self.open_orders_new.long[&idx].closes.is_empty()
-                        || self.open_orders_new.long[&idx].closes[0].qty == 0.0
-                        || close_price < self.open_orders_new.long[&idx].closes[0].price
+                    if self.open_orders.long[&idx].closes.is_empty()
+                        || self.open_orders.long[&idx].closes[0].qty == 0.0
+                        || close_price < self.open_orders.long[&idx].closes[0].price
                     {
                         let close_qty = -f64::min(
                             self.positions.long[&idx].size,
@@ -1114,9 +1185,9 @@ impl Backtest {
                             self.exchange_params_list[idx].price_step,
                         ),
                     );
-                    if self.open_orders_new.short[&idx].closes.is_empty()
-                        || self.open_orders_new.short[&idx].closes[0].qty == 0.0
-                        || close_price > self.open_orders_new.short[&idx].closes[0].price
+                    if self.open_orders.short[&idx].closes.is_empty()
+                        || self.open_orders.short[&idx].closes[0].qty == 0.0
+                        || close_price > self.open_orders.short[&idx].closes[0].price
                     {
                         let close_qty = f64::min(
                             self.positions.short[&idx].size.abs(),
@@ -1196,17 +1267,17 @@ impl Backtest {
             }
         }
         let (unstucking_idx, unstucking_pside, unstucking_close) = self.calc_unstucking_close(k);
-        if unstucking_idx != NO_POS {
+        if unstucking_pside != NO_POS {
             match unstucking_pside {
                 LONG => {
-                    self.open_orders_new
+                    self.open_orders
                         .long
                         .get_mut(&unstucking_idx)
                         .unwrap()
                         .closes = [unstucking_close].to_vec();
                 }
                 SHORT => {
-                    self.open_orders_new
+                    self.open_orders
                         .short
                         .get_mut(&unstucking_idx)
                         .unwrap()
@@ -1235,7 +1306,7 @@ impl Backtest {
             let mut actives_without_pos = Vec::<usize>::new();
             if positions_long_indices.len() < self.bot_params_pair.long.n_positions {
                 actives_without_pos = self.update_actives(k, LONG);
-                self.open_orders_new
+                self.open_orders
                     .long
                     .retain(|&idx, _| self.actives.long.contains(&idx));
             }
@@ -1243,7 +1314,7 @@ impl Backtest {
 
             for idx in active_long_indices {
                 if actives_without_pos.contains(&idx)
-                    || self.open_orders_new.long.get(&idx).map_or(false, |orders| {
+                    || self.open_orders.long.get(&idx).map_or(false, |orders| {
                         orders.closes.iter().any(|order| {
                             order.order_type == OrderType::CloseUnstuckLong
                                 || order.order_type == OrderType::CloseTrailingLong
@@ -1271,26 +1342,22 @@ impl Backtest {
             let mut actives_without_pos = Vec::<usize>::new();
             if positions_short_indices.len() < self.bot_params_pair.short.n_positions {
                 actives_without_pos = self.update_actives(k, SHORT);
-                self.open_orders_new
+                self.open_orders
                     .short
                     .retain(|&idx, _| self.actives.short.contains(&idx));
             }
             let active_short_indices: Vec<usize> = self.actives.short.iter().cloned().collect();
             for idx in active_short_indices {
                 if actives_without_pos.contains(&idx)
-                    || self
-                        .open_orders_new
-                        .short
-                        .get(&idx)
-                        .map_or(false, |orders| {
-                            orders.closes.iter().any(|order| {
-                                order.order_type == OrderType::CloseUnstuckShort
-                                    || order.order_type == OrderType::CloseTrailingShort
-                            }) || orders.entries.iter().any(|order| {
-                                order.order_type == OrderType::EntryTrailingNormalShort
-                                    || order.order_type == OrderType::EntryTrailingCroppedShort
-                            })
+                    || self.open_orders.short.get(&idx).map_or(false, |orders| {
+                        orders.closes.iter().any(|order| {
+                            order.order_type == OrderType::CloseUnstuckShort
+                                || order.order_type == OrderType::CloseTrailingShort
+                        }) || orders.entries.iter().any(|order| {
+                            order.order_type == OrderType::EntryTrailingNormalShort
+                                || order.order_type == OrderType::EntryTrailingCroppedShort
                         })
+                    })
                 {
                     self.update_open_orders_short_single(k, idx);
                 }
@@ -1300,15 +1367,15 @@ impl Backtest {
         if !self.is_stuck.long.is_empty() || !self.is_stuck.short.is_empty() {
             let (unstucking_idx, unstucking_pside, unstucking_close) =
                 self.calc_unstucking_close(k);
-            if unstucking_idx != NO_POS {
+            if unstucking_pside != NO_POS {
                 match unstucking_pside {
                     LONG => {
-                        if let Some(orders) = self.open_orders_new.long.get_mut(&unstucking_idx) {
+                        if let Some(orders) = self.open_orders.long.get_mut(&unstucking_idx) {
                             orders.closes = vec![unstucking_close];
                         }
                     }
                     SHORT => {
-                        if let Some(orders) = self.open_orders_new.short.get_mut(&unstucking_idx) {
+                        if let Some(orders) = self.open_orders.short.get_mut(&unstucking_idx) {
                             orders.closes = vec![unstucking_close];
                         }
                     }
diff --git a/passivbot-rust/src/closes.rs b/passivbot-rust/src/closes.rs
index 7d0fbe418..8a53958fa 100644
--- a/passivbot-rust/src/closes.rs
+++ b/passivbot-rust/src/closes.rs
@@ -1,4 +1,3 @@
-use crate::constants::{CLOSE, LONG, NO_POS, SHORT};
 use crate::entries::calc_min_entry_qty;
 use crate::types::{
     BotParams, BotParamsPair, EMABands, ExchangeParams, Order, OrderType, Position, Positions,
diff --git a/passivbot-rust/src/constants.rs b/passivbot-rust/src/constants.rs
index b64b12186..b6cb1e407 100644
--- a/passivbot-rust/src/constants.rs
+++ b/passivbot-rust/src/constants.rs
@@ -3,6 +3,6 @@ pub const LOW: usize = 1;
 pub const CLOSE: usize = 2;
 pub const VOLUME: usize = 3;
 
-pub const LONG: usize = 3;
-pub const SHORT: usize = 4;
-pub const NO_POS: usize = 5;
+pub const LONG: usize = 0;
+pub const SHORT: usize = 1;
+pub const NO_POS: usize = 2;

From 1df94277773da212a1d8153d5d36464d233eb430 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Wed, 25 Sep 2024 18:25:46 -0400
Subject: [PATCH 13/32] sort symbol list

---
 src/downloader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/downloader.py b/src/downloader.py
index 5c9863bb8..0c8686745 100644
--- a/src/downloader.py
+++ b/src/downloader.py
@@ -1360,10 +1360,10 @@ async def prepare_hlcvs(config: dict):
             np.diff(data[:, 0]) == interval_ms
         ).all(), f"gaps in hlcv data {symbol}"  # verify integrous 1m hlcs
         hlcvsd[symbol] = data
-    symbols = list(hlcvsd.keys())
+    symbols = sorted(hlcvsd.keys())
     if len(symbols) > 1:
         print(f"Unifying data for {len(symbols)} coins into single numpy array...")
-    timestamps, unified_data = unify_hlcv_data(hlcvsd.values())
+    timestamps, unified_data = unify_hlcv_data([hlcvsd[s] for s in symbols])
     return symbols, timestamps, unified_data
 
 

From 834f187feccd985c961ecddee1b3dbe968991a90 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Thu, 26 Sep 2024 12:15:34 -0400
Subject: [PATCH 14/32] optimize calc_preferred_coins

---
 passivbot-rust/src/backtest.rs | 179 +++++++++++----------------------
 1 file changed, 58 insertions(+), 121 deletions(-)

diff --git a/passivbot-rust/src/backtest.rs b/passivbot-rust/src/backtest.rs
index 78ec7125e..e8b8a02a3 100644
--- a/passivbot-rust/src/backtest.rs
+++ b/passivbot-rust/src/backtest.rs
@@ -110,54 +110,12 @@ pub struct TradingEnabled {
 }
 
 pub struct RollingVolumeSum {
-    long: HashMap<usize, f64>,
-    short: HashMap<usize, f64>,
+    long: Vec<f64>,
+    short: Vec<f64>,
     prev_k_long: usize,
     prev_k_short: usize,
 }
 
-impl RollingVolumeSum {
-    fn new() -> Self {
-        RollingVolumeSum {
-            long: HashMap::new(),
-            short: HashMap::new(),
-            prev_k_long: 0,
-            prev_k_short: 0,
-        }
-    }
-
-    fn update(&mut self, hlcvs: &Array3<f64>, k: usize, pside: usize, window: usize) {
-        let (volume_sums, prev_k) = match pside {
-            LONG => (&mut self.long, &mut self.prev_k_long),
-            SHORT => (&mut self.short, &mut self.prev_k_short),
-            _ => panic!("Invalid pside"),
-        };
-
-        let start_idx = k.saturating_sub(window);
-        let prev_start_idx = prev_k.saturating_sub(window);
-
-        for idx in 0..hlcvs.shape()[1] {
-            let mut sum = *volume_sums.entry(idx).or_insert(0.0);
-
-            // Remove volumes outside the new window
-            if *prev_k > start_idx {
-                for i in prev_start_idx..start_idx {
-                    sum -= hlcvs[[i, idx, VOLUME]];
-                }
-            }
-
-            // Add new volumes
-            for i in (*prev_k).max(start_idx)..k {
-                sum += hlcvs[[i, idx, VOLUME]];
-            }
-
-            volume_sums.insert(idx, sum);
-        }
-
-        *prev_k = k;
-    }
-}
-
 pub struct Backtest {
     hlcvs: Array3<f64>, // 3D array: (n_timesteps, n_coins, 4)
     bot_params_pair: BotParamsPair,
@@ -184,6 +142,7 @@ pub struct Backtest {
     n_eligible_long: usize,
     n_eligible_short: usize,
     rolling_volume_sum: RollingVolumeSum,
+    volume_indices_buffer: Option<Vec<(f64, usize)>>,
 }
 
 impl Backtest {
@@ -252,7 +211,13 @@ impl Backtest {
             did_fill_short: HashSet::new(),
             n_eligible_long,
             n_eligible_short,
-            rolling_volume_sum: RollingVolumeSum::new(),
+            rolling_volume_sum: RollingVolumeSum {
+                long: vec![0.0; n_coins],
+                short: vec![0.0; n_coins],
+                prev_k_long: 0,
+                prev_k_short: 0,
+            },
+            volume_indices_buffer: Some(vec![(0.0, 0); n_coins]), // Initialize here
         }
     }
 
@@ -264,93 +229,65 @@ impl Backtest {
         };
 
         let window = bot_params.filter_rolling_window;
-        let start_idx = k.saturating_sub(window);
-
-        // Calculate volume sums for all coins
-        let mut volume_sums: Vec<(usize, f64)> = (0..self.n_coins)
-            .map(|idx| {
-                let sum = self.hlcvs.slice(s![start_idx..k, idx, VOLUME]).sum();
-                (idx, sum)
-            })
-            .collect();
-
-        // Sort by volume in descending order
-        volume_sums.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
-
-        // Take top n_eligible indices
-        let filtered_indices: Vec<usize> = volume_sums
-            .into_iter()
-            .take(n_eligible)
-            .map(|(idx, _)| idx)
-            .collect();
+        let start_k = k.saturating_sub(window);
 
-        // Calculate noisiness
-        let mut noisiness: Vec<(usize, f64)> = filtered_indices
-            .into_iter()
-            .map(|idx| {
-                let slice = self.hlcvs.slice(s![start_idx..k, idx, ..]);
-                let nrr_sum: f64 = slice
-                    .axis_iter(Axis(0))
-                    .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE])
-                    .sum();
-                let mean_nrr = nrr_sum / (k - start_idx) as f64;
-                (idx, mean_nrr)
-            })
-            .collect();
-
-        // Sort by noisiness in descending order
-        noisiness.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
-
-        // Return indices sorted by noisiness
-        noisiness.into_iter().map(|(idx, _)| idx).collect()
-    }
-
-    pub fn calc_preferred_coins_old(&self, k: usize, pside: usize) -> Vec<usize> {
-        let (bot_params, n_eligible) = match pside {
-            LONG => (&self.bot_params_pair.long, self.n_eligible_long),
-            SHORT => (&self.bot_params_pair.short, self.n_eligible_short),
+        let (rolling_volume_sum, prev_k) = match pside {
+            LONG => (
+                &mut self.rolling_volume_sum.long,
+                &mut self.rolling_volume_sum.prev_k_long,
+            ),
+            SHORT => (
+                &mut self.rolling_volume_sum.short,
+                &mut self.rolling_volume_sum.prev_k_short,
+            ),
             _ => panic!("Invalid pside"),
         };
 
-        let start_idx = k.saturating_sub(bot_params.filter_rolling_window);
-
-        // Calculate volume sums
-        let mut volume_sums: Vec<(usize, f64)> = (0..self.n_coins)
-            .map(|idx| {
-                let sum = self.hlcvs.slice(s![start_idx..k, idx, VOLUME]).sum();
-                (idx, sum)
-            })
-            .collect();
+        // Use the pre-allocated buffer for volume indices
+        let volume_indices = self.volume_indices_buffer.as_mut().unwrap();
 
-        // Sort by volume in descending order
-        volume_sums.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
+        // Update rolling volume sums
+        if k > window && k - *prev_k < window {
+            // Rolling calculation
+            for idx in 0..self.n_coins {
+                rolling_volume_sum[idx] -= self
+                    .hlcvs
+                    .slice(s![*prev_k - window..start_k, idx, VOLUME])
+                    .sum();
+                rolling_volume_sum[idx] += self.hlcvs.slice(s![*prev_k..k, idx, VOLUME]).sum();
+                volume_indices[idx] = (rolling_volume_sum[idx], idx);
+            }
+        } else {
+            // Full calculation
+            for idx in 0..self.n_coins {
+                rolling_volume_sum[idx] = self.hlcvs.slice(s![start_k..k, idx, VOLUME]).sum();
+                volume_indices[idx] = (rolling_volume_sum[idx], idx);
+            }
+        }
+        *prev_k = k;
 
-        // Filter by volume
-        let filtered_indices: Vec<usize> = volume_sums
-            .into_iter()
-            .take(n_eligible)
-            .map(|(idx, _)| idx)
-            .collect();
+        // Partial sort to get top n_eligible coins by volume
+        volume_indices.select_nth_unstable_by(n_eligible, |a, b| {
+            b.0.partial_cmp(&a.0).unwrap_or(Ordering::Equal)
+        });
 
-        // Calculate noisiness
-        let mut noisiness: Vec<(usize, f64)> = filtered_indices
-            .into_iter()
-            .map(|idx| {
-                let slice = self.hlcvs.slice(s![start_idx..k, idx, ..]);
-                let nrr_sum: f64 = slice
-                    .axis_iter(Axis(0))
-                    .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE])
-                    .sum();
-                let mean_nrr = nrr_sum / (k - start_idx) as f64;
-                (idx, mean_nrr)
-            })
-            .collect();
+        // Calculate noisiness for top n_eligible coins
+        let mut noisinesses = vec![(0.0f64, 0usize); n_eligible];
+        for (i, &(_, idx)) in volume_indices.iter().take(n_eligible).enumerate() {
+            let noisiness: f64 = self
+                .hlcvs
+                .slice(s![start_k..k, idx, ..])
+                .axis_iter(Axis(0))
+                .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE])
+                .sum();
+            noisinesses[i] = (noisiness, idx);
+        }
 
         // Sort by noisiness in descending order
-        noisiness.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(Ordering::Equal));
+        noisinesses.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(Ordering::Equal));
 
         // Return indices sorted by noisiness
-        noisiness.into_iter().map(|(idx, _)| idx).collect()
+        noisinesses.into_iter().map(|(_, idx)| idx).collect()
     }
 
     pub fn run(&mut self) -> (Vec<Fill>, Vec<f64>) {

From 784317adbe6ad2b1179fd45edfb734cd31355623 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Thu, 26 Sep 2024 17:27:50 -0400
Subject: [PATCH 15/32] further optimizations of calc_preferred_coins

---
 passivbot-rust/src/backtest.rs | 54 ++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/passivbot-rust/src/backtest.rs b/passivbot-rust/src/backtest.rs
index e8b8a02a3..fe8ad44b8 100644
--- a/passivbot-rust/src/backtest.rs
+++ b/passivbot-rust/src/backtest.rs
@@ -154,14 +154,6 @@ impl Backtest {
     ) -> Self {
         let n_timesteps = hlcvs.shape()[0];
         let n_coins = hlcvs.shape()[1];
-        let n_eligible_long = bot_params_pair.long.n_positions.max(
-            (n_coins as f64 * (1.0 - bot_params_pair.long.filter_relative_volume_clip_pct)).round()
-                as usize,
-        );
-        let n_eligible_short = bot_params_pair.short.n_positions.max(
-            (n_coins as f64 * (1.0 - bot_params_pair.short.filter_relative_volume_clip_pct)).round()
-                as usize,
-        );
         let initial_emas = (0..n_coins)
             .map(|i| {
                 let close_price = hlcvs[[0, i, CLOSE]];
@@ -176,6 +168,14 @@ impl Backtest {
         let mut bot_params_pair_cloned = bot_params_pair.clone();
         bot_params_pair_cloned.long.n_positions = n_coins.min(bot_params_pair.long.n_positions);
         bot_params_pair_cloned.short.n_positions = n_coins.min(bot_params_pair.short.n_positions);
+        let n_eligible_long = bot_params_pair_cloned.long.n_positions.max(
+            (n_coins as f64 * (1.0 - bot_params_pair.long.filter_relative_volume_clip_pct)).round()
+                as usize,
+        );
+        let n_eligible_short = bot_params_pair_cloned.short.n_positions.max(
+            (n_coins as f64 * (1.0 - bot_params_pair.short.filter_relative_volume_clip_pct)).round()
+                as usize,
+        );
         Backtest {
             hlcvs,
             bot_params_pair: bot_params_pair_cloned,
@@ -222,9 +222,26 @@ impl Backtest {
     }
 
     pub fn calc_preferred_coins(&mut self, k: usize, pside: usize) -> Vec<usize> {
-        let (bot_params, n_eligible) = match pside {
-            LONG => (&self.bot_params_pair.long, self.n_eligible_long),
-            SHORT => (&self.bot_params_pair.short, self.n_eligible_short),
+        let (bot_params, n_positions) = match pside {
+            LONG => (
+                &self.bot_params_pair.long,
+                self.bot_params_pair.long.n_positions,
+            ),
+            SHORT => (
+                &self.bot_params_pair.short,
+                self.bot_params_pair.short.n_positions,
+            ),
+            _ => panic!("Invalid pside"),
+        };
+
+        // Early return if all coins are already eligible
+        if self.n_coins <= n_positions {
+            return (0..self.n_coins).collect();
+        }
+
+        let n_eligible = match pside {
+            LONG => self.n_eligible_long,
+            SHORT => self.n_eligible_short,
             _ => panic!("Invalid pside"),
         };
 
@@ -266,21 +283,21 @@ impl Backtest {
         }
         *prev_k = k;
 
-        // Partial sort to get top n_eligible coins by volume
-        volume_indices.select_nth_unstable_by(n_eligible, |a, b| {
-            b.0.partial_cmp(&a.0).unwrap_or(Ordering::Equal)
-        });
+        // Sort by volume in descending order
+        volume_indices.sort_unstable_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(Ordering::Equal));
 
         // Calculate noisiness for top n_eligible coins
-        let mut noisinesses = vec![(0.0f64, 0usize); n_eligible];
-        for (i, &(_, idx)) in volume_indices.iter().take(n_eligible).enumerate() {
+        let actual_n_eligible = n_eligible.min(self.n_coins);
+        let mut noisinesses = Vec::with_capacity(actual_n_eligible);
+
+        for &(_, idx) in volume_indices.iter().take(actual_n_eligible) {
             let noisiness: f64 = self
                 .hlcvs
                 .slice(s![start_k..k, idx, ..])
                 .axis_iter(Axis(0))
                 .map(|row| (row[HIGH] - row[LOW]) / row[CLOSE])
                 .sum();
-            noisinesses[i] = (noisiness, idx);
+            noisinesses.push((noisiness, idx));
         }
 
         // Sort by noisiness in descending order
@@ -289,7 +306,6 @@ impl Backtest {
         // Return indices sorted by noisiness
         noisinesses.into_iter().map(|(_, idx)| idx).collect()
     }
-
     pub fn run(&mut self) -> (Vec<Fill>, Vec<f64>) {
         let check_points: Vec<usize> = (0..7).map(|i| i * 60 * 24).collect();
         let n_timesteps = self.hlcvs.shape()[0];

From ba9f0c1728d11ddb660e370cb16b3e3f018d0489 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sat, 28 Sep 2024 09:28:11 -0400
Subject: [PATCH 16/32] add missing params

---
 src/pure_funcs.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/pure_funcs.py b/src/pure_funcs.py
index 5560d7855..28162f761 100644
--- a/src/pure_funcs.py
+++ b/src/pure_funcs.py
@@ -597,6 +597,8 @@ def get_template_live_config(passivbot_mode="neat_grid"):
                     "long_entry_trailing_grid_ratio": [-1.0, 1.0],
                     "long_entry_trailing_retracement_pct": [0.0, 0.1],
                     "long_entry_trailing_threshold_pct": [-0.1, 0.1],
+                    "long_filter_rolling_window": [10.0, 1440.0],
+                    "long_filter_relative_volume_clip_pct": [0.0, 1.0],
                     "long_n_positions": [1.0, 20.0],
                     "long_total_wallet_exposure_limit": [0.0, 2.0],
                     "long_unstuck_close_pct": [0.001, 0.1],
@@ -620,6 +622,8 @@ def get_template_live_config(passivbot_mode="neat_grid"):
                     "short_entry_trailing_grid_ratio": [-1.0, 1.0],
                     "short_entry_trailing_retracement_pct": [0.0, 0.1],
                     "short_entry_trailing_threshold_pct": [-0.1, 0.1],
+                    "short_filter_rolling_window": [10.0, 1440.0],
+                    "short_filter_relative_volume_clip_pct": [0.0, 1.0],
                     "short_n_positions": [1.0, 20.0],
                     "short_total_wallet_exposure_limit": [0.0, 2.0],
                     "short_unstuck_close_pct": [0.001, 0.1],

From b05c57250785b8e343d9763d7b7cc0a3527bff5b Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sat, 28 Sep 2024 09:46:20 -0400
Subject: [PATCH 17/32] adapt to new filter params

---
 src/passivbot.py | 107 ++++++++++++++++++++++-------------------------
 1 file changed, 50 insertions(+), 57 deletions(-)

diff --git a/src/passivbot.py b/src/passivbot.py
index 2790b77f1..a96027621 100644
--- a/src/passivbot.py
+++ b/src/passivbot.py
@@ -235,20 +235,16 @@ async def execute_to_exchange(self, debug_mode=False):
         if to_cancel or to_create:
             self.previous_REST_update_ts = 0
 
-    def is_forager_mode(self):
+    def is_forager_mode(self, pside=None):
         n_approved_symbols = len(self.config["live"]["approved_coins"])
         if n_approved_symbols == 0:
             return True
+        if pside is None:
+            return self.is_forager_mode("long") or self.is_forager_mode("short")
         if (
-            self.config["bot"]["long"]["total_wallet_exposure_limit"] > 0.0
-            and self.config["bot"]["long"]["n_positions"] > 0
-            and round(self.config["bot"]["long"]["n_positions"]) < n_approved_symbols
-        ):
-            return True
-        if (
-            self.config["bot"]["short"]["total_wallet_exposure_limit"] > 0.0
-            and self.config["bot"]["short"]["n_positions"] > 0
-            and round(self.config["bot"]["short"]["n_positions"]) < n_approved_symbols
+            self.config["bot"][pside]["total_wallet_exposure_limit"] > 0.0
+            and self.config["bot"][pside]["n_positions"] > 0
+            and round(self.config["bot"][pside]["n_positions"]) < n_approved_symbols
         ):
             return True
         return False
@@ -259,6 +255,8 @@ def set_live_configs(self):
             "total_wallet_exposure_limit",
             "unstuck_loss_allowance_pct",
             "unstuck_close_pct",
+            "filter_rolling_window",
+            "filter_relative_volume_clip_pct",
         }
         self.config["bot"]["long"]["n_positions"] = round(self.config["bot"]["long"]["n_positions"])
         self.config["bot"]["short"]["n_positions"] = round(self.config["bot"]["short"]["n_positions"])
@@ -719,29 +717,24 @@ def update_PB_modes(self):
                     self.ideal_actives[pside][symbol] = ""
                 if symbol in self.actual_actives[pside]:
                     self.PB_modes[pside][symbol] = self.forced_modes[pside][symbol]
-        if self.forager_mode:
-            if self.config["live"]["relative_volume_filter_clip_pct"] > 0.0:
-                self.calc_volumes()
-                # filter by relative volume
-                eligible_symbols = sorted(self.volumes, key=lambda x: self.volumes[x])[
-                    int(
-                        round(
-                            len(self.volumes) * self.config["live"]["relative_volume_filter_clip_pct"]
-                        )
-                    ) :
-                ]
-            else:
-                eligible_symbols = list(self.eligible_symbols)
-            self.calc_noisiness()  # ideal symbols are high noise symbols
-
-            # calc ideal actives for long and short separately
-            for pside in self.actual_actives:
-                if (
-                    self.config["bot"][pside]["n_positions"] > 0
-                    and self.config["bot"][pside]["total_wallet_exposure_limit"] > 0.0
-                ):
+            if self.is_forager_mode(pside):
+                if self.config["bot"][pside]["filter_relative_volume_clip_pct"] > 0.0:
+                    volumes = self.calc_volumes(pside)
+                    # filter by relative volume
+                    n_eligible = round(
+                        len(volumes)
+                        * (1 - self.config["bot"][pside]["filter_relative_volume_clip_pct"])
+                    )
+                    eligible_symbols = sorted(volumes, key=lambda x: volumes[x], reverse=True)[
+                        : int(max(n_eligible, self.config["bot"][pside]["n_positions"]))
+                    ]
+                else:
+                    eligible_symbols = list(self.eligible_symbols)
+                # ideal symbols are high noise symbols
+                noisiness = self.calc_noisiness(pside, eligible_symbols=eligible_symbols)
+                if self.is_enabled(pside=pside):
                     self.warn_on_high_effective_min_cost(pside)
-                for symbol in sorted(self.noisiness, key=lambda x: self.noisiness[x], reverse=True):
+                for symbol in sorted(noisiness, key=lambda x: noisiness[x], reverse=True):
                     if (
                         not self.is_enabled(symbol, pside)
                         or symbol not in self.eligible_symbols
@@ -750,10 +743,8 @@ def update_PB_modes(self):
                         or not self.effective_min_cost_is_low_enough(pside, symbol)
                     ):
                         continue
-                    slots_full = (
-                        len(self.ideal_actives[pside]) >= self.config["bot"][pside]["n_positions"]
-                    )
-                    if slots_full:
+                    if len(self.ideal_actives[pside]) >= self.config["bot"][pside]["n_positions"]:
+                        # slots full
                         break
                     if symbol not in self.ideal_actives[pside]:
                         self.ideal_actives[pside][symbol] = ""
@@ -780,14 +771,12 @@ def update_PB_modes(self):
                     if len(slots_filled) >= self.config["bot"][pside]["n_positions"]:
                         break
                     self.PB_modes[pside][symbol] = "normal"
-        else:
-            # if not forager mode, all eligible symbols are ideal symbols, unless symbol in forced_modes
-            for pside in ["long", "short"]:
-                if (
-                    self.config["bot"][pside]["n_positions"] > 0
-                    and self.config["bot"][pside]["total_wallet_exposure_limit"] > 0.0
-                ):
+            else:
+                # if not forager mode, all eligible symbols are ideal symbols, unless symbol in forced_modes
+                if self.is_enabled(pside=pside):
                     self.warn_on_high_effective_min_cost(pside)
+                else:
+                    continue
                 for symbol in self.eligible_symbols:
                     if self.is_enabled(symbol, pside):
                         if not self.effective_min_cost_is_low_enough(pside, symbol):
@@ -1312,9 +1301,11 @@ def update_effective_min_cost(self, symbol=None):
                 logging.error(f"error with {get_function_name()} for {symbol}: {e}")
                 traceback.print_exc()
 
-    def is_enabled(self, symbol, pside=None):
+    def is_enabled(self, symbol=None, pside=None):
         if pside is None:
             return self.is_enabled(symbol, "long") or self.is_enabled(symbol, "short")
+        if symbol is None:
+            return any([self.is_enabled(symbol, pside) for symbol in self.live_configs])
         return (
             symbol in self.live_configs
             and self.live_configs[symbol][pside]["wallet_exposure_limit"] > 0.0
@@ -1875,27 +1866,29 @@ async def start_bot(self, debug_mode=False):
         if not debug_mode:
             await self.run_execution_loop()
 
-    def calc_noisiness(self):
-        if not hasattr(self, "noisiness"):
-            self.noisiness = {}
-        n = int(round(self.config["live"]["ohlcv_rolling_window"]))
-        for symbol in self.eligible_symbols:
+    def calc_noisiness(self, pside, eligible_symbols=None):
+        if eligible_symbols is None:
+            eligible_symbols = self.eligible_symbols
+        noisiness = {}
+        n = int(round(self.config["bot"][pside]["filter_rolling_window"]))
+        for symbol in eligible_symbols:
             if symbol in self.ohlcvs_1m and self.ohlcvs_1m[symbol]:
                 ohlcvs_1m = [v for v in self.ohlcvs_1m[symbol].values()[-n:]]
-                self.noisiness[symbol] = np.mean([(x[2] - x[3]) / x[4] for x in ohlcvs_1m])
+                noisiness[symbol] = np.mean([(x[2] - x[3]) / x[4] for x in ohlcvs_1m])
             else:
-                self.noisiness[symbol] = 0.0
+                noisiness[symbol] = 0.0
+        return noisiness
 
-    def calc_volumes(self):
-        if not hasattr(self, "volumes"):
-            self.volumes = {}
-        n = int(round(self.config["live"]["ohlcv_rolling_window"]))
+    def calc_volumes(self, pside):
+        n = int(round(self.config["bot"][pside]["filter_rolling_window"]))
+        volumes = {}
         for symbol in self.ohlcvs_1m:
             if self.ohlcvs_1m[symbol] and len(self.ohlcvs_1m[symbol]) > 0:
                 ohlcvs_1m = [v for v in self.ohlcvs_1m[symbol].values()[-n:]]
-                self.volumes[symbol] = sum([x[4] * x[5] for x in ohlcvs_1m])
+                volumes[symbol] = sum([x[4] * x[5] for x in ohlcvs_1m])
             else:
-                self.volumes[symbol] = 0.0
+                volumes[symbol] = 0.0
+        return volumes
 
     async def execute_multiple(self, orders: [dict], type_: str, max_n_executions: int):
         if not orders:

From 3a9694dda7d9ff42ba387acd856cd817f0c1e291 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sat, 28 Sep 2024 10:31:18 -0400
Subject: [PATCH 18/32] update docs to new filter parameters

---
 docs/configuration.md | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 3b4ae8343..ebfc97094 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -108,7 +108,16 @@ If a position is stuck, bot will use profits made on other positions to realize
 - `unstuck_threshold`:
 	- if a position is bigger than a threshold, consider it stuck and activate unstucking.
 	- `if wallet_exposure / wallet_exposure_limit > unstuck_threshold: unstucking enabled`
-	- e.g. if a position size is $500 and max allowed position size is $1000, then position is 50% full. If unstuck_threshold==0.45, then unstuck the position until its size is $450.
+	- e.g. if a position size is $500 and max allowed position size is $1000, then position is 50% full. If unstuck_threshold==0.45, then unstuck the position until its size is $450.  
+
+### Filter Parameters
+
+Coins selected for trading are filtered by volume and noisiness. First, filter coins by volume, dropping x% of the lowest volume coins, then sort the eligible coins by noisiness and select the top noisiest coins for trading.  
+
+- `filter_relative_volume_clip_pct`: Volume filter: disapprove the lowest relative volume coins. E.g. `filter_relative_volume_clip_pct=0.1`: drop 10% lowest volume coins. Set to zero to allow all.
+- `filter_rolling_window`: number of minutes to look into the past to compute volume and noisiness, used for dynamic coin selection in forager mode.
+	- noisiness is normalized relative range of 1m ohlcvs: `mean((high - low) / close)`
+	- in forager mode, bot will select coins with highest noisiness for opening positions
 
 ## Live Trading Settings
 - `approved_coins`: list of coins approved for trading. If empty, all coins are approved.
@@ -136,12 +145,8 @@ If a position is stuck, bot will use profits made on other positions to realize
 - `max_n_cancellations_per_batch`: will cancel n open orders per execution
 - `max_n_creations_per_batch`: will create n new orders per execution
 - `minimum_coin_age_days`: disallow coins younger than a given number of days
-- `ohlcv_rolling_window`: number of minutes to look into the past to compute volume and noisiness, used for dynamic coin selection in forager mode.
-	- noisiness is normalized relative range of 1m ohlcvs: `mean((high - low) / close)`
-	- in forager mode, bot will select coins with highest noisiness for opening positions
 - `pnls_max_lookback_days`: how far into the past to fetch pnl history
 - `price_distance_threshold`: minimum distance to current price action required for EMA based limit orders
-- `relative_volume_filter_clip_pct`: Volume filter: disapprove the lowest relative volume coins. Default 0.1 == 10%. Set to zero to allow all.
 - `time_in_force`: default is good-till-cancelled
 - `user`: fetch API key/secret from api-keys.json
 

From 10f98ebf33d5d9b66ffef5dac34b7e7c16396830 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Sun, 29 Sep 2024 10:55:51 -0400
Subject: [PATCH 19/32] n_positions is not greater than n_eligible

---
 src/passivbot.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/passivbot.py b/src/passivbot.py
index a96027621..2cad78ea2 100644
--- a/src/passivbot.py
+++ b/src/passivbot.py
@@ -140,7 +140,6 @@ def __init__(self, config: dict):
             3000.0, self.config["live"]["execution_delay_seconds"] * 1000
         )
         self.quote = "USDT"
-        self.forager_mode = self.is_forager_mode()
 
         self.minimum_market_age_millis = (
             self.config["live"]["minimum_coin_age_days"] * 24 * 60 * 60 * 1000
@@ -258,8 +257,10 @@ def set_live_configs(self):
             "filter_rolling_window",
             "filter_relative_volume_clip_pct",
         }
-        self.config["bot"]["long"]["n_positions"] = round(self.config["bot"]["long"]["n_positions"])
-        self.config["bot"]["short"]["n_positions"] = round(self.config["bot"]["short"]["n_positions"])
+        for pside in ["long", "short"]:
+            self.config["bot"][pside]["n_positions"] = min(
+                len(self.eligible_symbols), int(round(self.config["bot"]["long"]["n_positions"]))
+            )
         for symbol in self.markets_dict:
             self.live_configs[symbol] = deepcopy(self.config["bot"])
             self.live_configs[symbol]["leverage"] = self.config["live"]["leverage"]
@@ -668,7 +669,7 @@ async def init_flags(self):
                 if not self.markets_dict[symbol]["active"]:
                     self.forced_modes[pside][symbol] = "tp_only"
 
-        if self.forager_mode and self.minimum_market_age_millis > 0:
+        if self.is_forager_mode() and self.minimum_market_age_millis > 0:
             if not hasattr(self, "first_timestamps"):
                 self.first_timestamps = await get_first_ohlcv_timestamps(
                     cc=self.cca, symbols=sorted(self.eligible_symbols)
@@ -679,7 +680,7 @@ async def init_flags(self):
             self.first_timestamps = None
 
     def is_old_enough(self, symbol):
-        if self.forager_mode and self.minimum_market_age_millis > 0:
+        if self.is_forager_mode() and self.minimum_market_age_millis > 0:
             if symbol in self.first_timestamps:
                 return utc_ms() - self.first_timestamps[symbol] > self.minimum_market_age_millis
             else:
@@ -873,7 +874,7 @@ def effective_min_cost_is_low_enough(self, pside, symbol):
             WE_limit = self.live_configs[symbol][pside]["wallet_exposure_limit"]
             assert WE_limit > 0.0
         except:
-            if self.forager_mode:
+            if self.is_forager_mode(pside):
                 WE_limit = (
                     self.config["bot"][pside]["total_wallet_exposure_limit"]
                     / self.config["bot"][pside]["n_positions"]

From 276447088c13e9239aae646b50f4dd086625e927 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Mon, 30 Sep 2024 11:35:52 -0400
Subject: [PATCH 20/32] volume clip at 0.5 and sort keys template config

---
 configs/template.json | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/configs/template.json b/configs/template.json
index 88062f907..0853013e4 100644
--- a/configs/template.json
+++ b/configs/template.json
@@ -20,8 +20,8 @@
                   "entry_trailing_grid_ratio": -0.28,
                   "entry_trailing_retracement_pct": 0.0024735,
                   "entry_trailing_threshold_pct": -0.062799,
-                  "filter_rolling_window": 60,
-                  "filter_relative_volume_clip_pct": 0.95,
+                  "filter_relative_volume_clip_pct": 0.5,
+                  "filter_rolling_window": 60.0,
                   "n_positions": 10.776,
                   "total_wallet_exposure_limit": 0.97499,
                   "unstuck_close_pct": 0.049666,
@@ -45,8 +45,8 @@
                    "entry_trailing_grid_ratio": -0.3633,
                    "entry_trailing_retracement_pct": 0.06044,
                    "entry_trailing_threshold_pct": -0.084207,
-                   "filter_rolling_window": 60,
-                   "filter_relative_volume_clip_pct": 0.95,
+                   "filter_relative_volume_clip_pct": 0.5,
+                   "filter_rolling_window": 60.0,
                    "n_positions": 7.6679,
                    "total_wallet_exposure_limit": 0.0,
                    "unstuck_close_pct": 0.052781,
@@ -86,8 +86,8 @@
                          "long_entry_trailing_grid_ratio": [-1.0, 1.0],
                          "long_entry_trailing_retracement_pct": [0.0, 0.1],
                          "long_entry_trailing_threshold_pct": [-0.1, 0.1],
-                         "long_filter_rolling_window": [10.0, 1440.0],
                          "long_filter_relative_volume_clip_pct": [0.0, 1.0],
+                         "long_filter_rolling_window": [10.0, 1440.0],
                          "long_n_positions": [1.0, 20.0],
                          "long_total_wallet_exposure_limit": [0.0, 5.0],
                          "long_unstuck_close_pct": [0.001, 0.1],
@@ -111,8 +111,8 @@
                          "short_entry_trailing_grid_ratio": [-1.0, 1.0],
                          "short_entry_trailing_retracement_pct": [0.0, 0.1],
                          "short_entry_trailing_threshold_pct": [-0.1, 0.1],
-                         "short_filter_rolling_window": [10.0, 1440.0],
                          "short_filter_relative_volume_clip_pct": [0.0, 1.0],
+                         "short_filter_rolling_window": [10.0, 1440.0],
                          "short_n_positions": [1.0, 20.0],
                          "short_total_wallet_exposure_limit": [0.0, 5.0],
                          "short_unstuck_close_pct": [0.001, 0.1],

From a29502ea1a48eed11865c53fe01d3dc3a869bacd Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Tue, 1 Oct 2024 11:11:27 -0400
Subject: [PATCH 21/32] use shared memory to avoid data duplication during
 multiprocessing

---
 passivbot-rust/src/backtest.rs |  8 ++++----
 passivbot-rust/src/python.rs   | 33 ++++++++++++++++++++++++++++++---
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/passivbot-rust/src/backtest.rs b/passivbot-rust/src/backtest.rs
index fe8ad44b8..f8032c01a 100644
--- a/passivbot-rust/src/backtest.rs
+++ b/passivbot-rust/src/backtest.rs
@@ -116,8 +116,8 @@ pub struct RollingVolumeSum {
     prev_k_short: usize,
 }
 
-pub struct Backtest {
-    hlcvs: Array3<f64>, // 3D array: (n_timesteps, n_coins, 4)
+pub struct Backtest<'a> {
+    hlcvs: &'a Array3<f64>,
     bot_params_pair: BotParamsPair,
     exchange_params_list: Vec<ExchangeParams>,
     backtest_params: BacktestParams,
@@ -145,9 +145,9 @@ pub struct Backtest {
     volume_indices_buffer: Option<Vec<(f64, usize)>>,
 }
 
-impl Backtest {
+impl<'a> Backtest<'a> {
     pub fn new(
-        hlcvs: Array3<f64>,
+        hlcvs: &'a Array3<f64>,
         bot_params_pair: BotParamsPair,
         exchange_params_list: Vec<ExchangeParams>,
         backtest_params: &BacktestParams,
diff --git a/passivbot-rust/src/python.rs b/passivbot-rust/src/python.rs
index dc4983e7f..dd88e86bc 100644
--- a/passivbot-rust/src/python.rs
+++ b/passivbot-rust/src/python.rs
@@ -11,6 +11,8 @@ use crate::types::{
     Analysis, BacktestParams, BotParams, BotParamsPair, EMABands, ExchangeParams, Order, OrderBook,
     Position, StateParams, TrailingPriceBundle,
 };
+use memmap::MmapOptions;
+use ndarray::ShapeBuilder;
 use ndarray::{Array1, Array2, Array3, Array4, ArrayBase, ArrayD};
 use numpy::{
     IntoPyArray, PyArray1, PyArray2, PyArray3, PyArray4, PyReadonlyArray2, PyReadonlyArray3,
@@ -20,15 +22,40 @@ use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
 use pyo3::types::{PyDict, PyList};
 use pyo3::wrap_pyfunction;
+use std::{fs::File, slice};
 
 #[pyfunction]
 pub fn run_backtest(
-    hlcvs: PyReadonlyArray3<f64>,
+    shared_memory_file: &str,
+    hlcvs_shape: (usize, usize, usize),
+    hlcvs_dtype: &str,
     bot_params_pair_dict: &PyDict,
     exchange_params_list: &PyAny,
     backtest_params_dict: &PyDict,
 ) -> PyResult<(Py<PyArray2<PyObject>>, Py<PyArray1<f64>>, Py<PyDict>)> {
-    let hlcvs_rust = hlcvs.as_array();
+    // Open the memory-mapped file
+    let file = File::open(shared_memory_file)
+        .map_err(|e| PyValueError::new_err(format!("Unable to open shared memory file: {}", e)))?;
+
+    let mmap = unsafe {
+        MmapOptions::new()
+            .map(&file)
+            .map_err(|e| PyValueError::new_err(format!("Unable to map file: {}", e)))?
+    };
+
+    // Create an ndarray view of the memory-mapped file
+    let hlcvs_rust = unsafe {
+        match hlcvs_dtype {
+            "<f8" => {
+                let data = slice::from_raw_parts(
+                    mmap.as_ptr() as *const f64,
+                    hlcvs_shape.0 * hlcvs_shape.1 * hlcvs_shape.2,
+                );
+                Array3::<f64>::from_shape_vec(hlcvs_shape.into_shape(), data.to_vec()).unwrap()
+            }
+            _ => return Err(PyValueError::new_err("Unsupported dtype for HLCV data")),
+        }
+    };
 
     let bot_params_pair = bot_params_pair_from_dict(bot_params_pair_dict)?;
     let exchange_params = {
@@ -55,7 +82,7 @@ pub fn run_backtest(
     let backtest_params = backtest_params_from_dict(backtest_params_dict)?;
 
     let mut backtest = Backtest::new(
-        hlcvs_rust.to_owned(),
+        &hlcvs_rust,
         bot_params_pair,
         exchange_params,
         &backtest_params,

From d27d688f7b586bb6555e65d8009e8c9733b19ecf Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Tue, 1 Oct 2024 11:11:47 -0400
Subject: [PATCH 22/32] new dependency memmap

---
 passivbot-rust/Cargo.lock | 33 +++++++++++++++++++++++++++++++++
 passivbot-rust/Cargo.toml |  1 +
 2 files changed, 34 insertions(+)

diff --git a/passivbot-rust/Cargo.lock b/passivbot-rust/Cargo.lock
index a71c50a4b..d393ee37b 100644
--- a/passivbot-rust/Cargo.lock
+++ b/passivbot-rust/Cargo.lock
@@ -58,6 +58,16 @@ dependencies = [
  "rawpointer",
 ]
 
+[[package]]
+name = "memmap"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
 [[package]]
 name = "memoffset"
 version = "0.9.1"
@@ -155,6 +165,7 @@ dependencies = [
 name = "passivbot_rust"
 version = "0.1.0"
 dependencies = [
+ "memmap",
  "ndarray",
  "numpy",
  "pyo3",
@@ -309,6 +320,28 @@ version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
 
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
 [[package]]
 name = "windows-targets"
 version = "0.52.5"
diff --git a/passivbot-rust/Cargo.toml b/passivbot-rust/Cargo.toml
index 8a4163dad..e5ad785dd 100644
--- a/passivbot-rust/Cargo.toml
+++ b/passivbot-rust/Cargo.toml
@@ -11,3 +11,4 @@ crate-type = ["cdylib"]
 pyo3 = { version = "0.21.2", features = ["extension-module"] }
 ndarray = "0.15.6"
 numpy = "0.21.0"
+memmap = "0.7.0"
\ No newline at end of file

From c3a8b43da3c7979e43d0a4b6425ab5292dc44725 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Tue, 1 Oct 2024 11:12:15 -0400
Subject: [PATCH 23/32] default to template config if config path not passed

---
 src/backtest.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/backtest.py b/src/backtest.py
index 2a2b19bc3..a23d40970 100644
--- a/src/backtest.py
+++ b/src/backtest.py
@@ -222,7 +222,9 @@ async def main():
         datefmt="%Y-%m-%dT%H:%M:%S",
     )
     parser = argparse.ArgumentParser(prog="backtest", description="run forager backtest")
-    parser.add_argument("config_path", type=str, default=None, help="path to hjson passivbot config")
+    parser.add_argument(
+        "config_path", type=str, default=None, nargs="?", help="path to json passivbot config"
+    )
     template_config = get_template_live_config("v7")
     del template_config["optimize"]
     keep_live_keys = {
@@ -235,7 +237,12 @@ async def main():
             del template_config["live"][key]
     add_arguments_recursively(parser, template_config)
     args = parser.parse_args()
-    config = load_config("configs/template.hjson" if args.config_path is None else args.config_path)
+    if args.config_path is None:
+        logging.info(f"loading default template config configs/template.json")
+        config = load_config("configs/template.json")
+    else:
+        logging.info(f"loading config {args.config_path}")
+        config = load_config(args.config_path)
     update_config_with_args(config, args)
     config = format_config(config)
     symbols, hlcvs, mss, results_path = await prepare_hlcvs_mss(config)

From 00d3f8a50197a0c4d985fd1aca4222a387752ca9 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Tue, 1 Oct 2024 11:12:40 -0400
Subject: [PATCH 24/32] use shared memory for parallel optimizing

---
 src/optimize.py | 70 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 54 insertions(+), 16 deletions(-)

diff --git a/src/optimize.py b/src/optimize.py
index 0fae3a202..914d8e9c4 100644
--- a/src/optimize.py
+++ b/src/optimize.py
@@ -5,6 +5,7 @@
 import argparse
 import multiprocessing
 import subprocess
+import mmap
 from multiprocessing import shared_memory
 from backtest import (
     prepare_hlcvs_mss,
@@ -37,6 +38,22 @@
 import json
 import pprint
 from deap import base, creator, tools, algorithms
+from contextlib import contextmanager
+import tempfile
+
+
+def create_shared_memory_file(hlcvs):
+    temp_file = tempfile.NamedTemporaryFile(delete=False)
+    shared_memory_file = temp_file.name
+
+    try:
+        with open(shared_memory_file, "wb") as f:
+            f.write(hlcvs.tobytes())
+    except IOError as e:
+        print(f"Error writing to shared memory file: {e}")
+        raise
+
+    return shared_memory_file
 
 
 def mutPolynomialBoundedWrapper(individual, eta, low, up, indpb):
@@ -144,18 +161,24 @@ def config_to_individual(config):
     return individual
 
 
+@contextmanager
+def managed_mmap(filename, dtype, shape):
+    try:
+        mmap = np.memmap(filename, dtype=dtype, mode="r", shape=shape)
+        yield mmap
+    finally:
+        del mmap
+
+
 class Evaluator:
-    def __init__(self, hlcvs, config, mss):
-        self.hlcvs = hlcvs
-        self.shared_hlcvs = shared_memory.SharedMemory(create=True, size=self.hlcvs.nbytes)
-        self.shared_hlcvs_np = np.ndarray(
-            self.hlcvs.shape, dtype=self.hlcvs.dtype, buffer=self.shared_hlcvs.buf
-        )
-        np.copyto(self.shared_hlcvs_np, self.hlcvs)
-        del self.hlcvs
+    def __init__(self, shared_memory_file, hlcvs_shape, hlcvs_dtype, config, mss):
+        self.shared_memory_file = shared_memory_file
+        self.hlcvs_shape = hlcvs_shape
+        self.hlcvs_dtype = hlcvs_dtype
 
+        self.mmap_context = managed_mmap(self.shared_memory_file, self.hlcvs_dtype, self.hlcvs_shape)
+        self.shared_hlcvs_np = self.mmap_context.__enter__()
         self.config = config
-
         _, self.exchange_params, self.backtest_params = prep_backtest_args(config, mss)
 
     def evaluate(self, individual):
@@ -164,7 +187,9 @@ def evaluate(self, individual):
             config, [], exchange_params=self.exchange_params, backtest_params=self.backtest_params
         )
         fills, equities, analysis = pbr.run_backtest(
-            self.shared_hlcvs_np,
+            self.shared_memory_file,
+            self.shared_hlcvs_np.shape,
+            self.shared_hlcvs_np.dtype.str,
             bot_params,
             self.exchange_params,
             self.backtest_params,
@@ -193,10 +218,22 @@ def calc_fitness(self, analysis):
             w_1 = modifier - analysis[self.config["optimize"]["scoring"][1]]
         return w_0, w_1
 
-    def cleanup(self):
-        # Close and unlink the shared memory
-        self.shared_hlcvs.close()
-        self.shared_hlcvs.unlink()
+    def __del__(self):
+        if hasattr(self, "mmap_context"):
+            self.mmap_context.__exit__(None, None, None)
+
+    def __getstate__(self):
+        # This method is called when pickling. We exclude mmap_context and shared_hlcvs_np
+        state = self.__dict__.copy()
+        del state["mmap_context"]
+        del state["shared_hlcvs_np"]
+        return state
+
+    def __setstate__(self, state):
+        # This method is called when unpickling. We recreate mmap_context and shared_hlcvs_np
+        self.__dict__.update(state)
+        self.mmap_context = managed_mmap(self.shared_memory_file, self.hlcvs_dtype, self.hlcvs_shape)
+        self.shared_hlcvs_np = self.mmap_context.__enter__()
 
 
 def add_extra_options(parser):
@@ -283,7 +320,8 @@ async def main():
         f"optimize_results/{date_fname}_{coins_fname}_{hash_snippet}_all_results.txt"
     )
     try:
-        evaluator = Evaluator(hlcvs, config, mss)
+        shared_memory_file = create_shared_memory_file(hlcvs)
+        evaluator = Evaluator(shared_memory_file, hlcvs.shape, hlcvs.dtype, config, mss)
         creator.create("FitnessMulti", base.Fitness, weights=(-1.0, -1.0))  # Minimize both objectives
         creator.create("Individual", list, fitness=creator.FitnessMulti)
 
@@ -398,7 +436,7 @@ def create_individual():
     finally:
         # Close the pool
         logging.info(f"attempting clean shutdown...")
-        evaluator.cleanup()
+        os.unlink(shared_memory_file)
         sys.exit(0)
         # pool.close()
         # pool.join()

From 5bc025140b6df94298b591bdf2db9cb3a623386f Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Tue, 1 Oct 2024 12:14:13 -0400
Subject: [PATCH 25/32] adapt to shared memory

---
 src/backtest.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/src/backtest.py b/src/backtest.py
index a23d40970..ad00312a7 100644
--- a/src/backtest.py
+++ b/src/backtest.py
@@ -28,6 +28,22 @@
 import logging
 from main import manage_rust_compilation
 
+import tempfile
+from contextlib import contextmanager
+
+
+@contextmanager
+def create_shared_memory_file(hlcvs):
+    temp_file = tempfile.NamedTemporaryFile(delete=False)
+    shared_memory_file = temp_file.name
+    try:
+        with open(shared_memory_file, "wb") as f:
+            f.write(hlcvs.tobytes())
+        yield shared_memory_file
+    finally:
+        os.unlink(shared_memory_file)
+
+
 plt.rcParams["figure.figsize"] = [29, 18]
 
 
@@ -167,7 +183,17 @@ def run_backtest(hlcvs, mss, config: dict):
     bot_params, exchange_params, backtest_params = prep_backtest_args(config, mss)
     print(f"Starting backtest...")
     sts = utc_ms()
-    fills, equities, analysis = pbr.run_backtest(hlcvs, bot_params, exchange_params, backtest_params)
+
+    with create_shared_memory_file(hlcvs) as shared_memory_file:
+        fills, equities, analysis = pbr.run_backtest(
+            shared_memory_file,
+            hlcvs.shape,
+            hlcvs.dtype.str,
+            bot_params,
+            exchange_params,
+            backtest_params,
+        )
+
     print(f"seconds elapsed for backtest: {(utc_ms() - sts) / 1000:.4f}")
     return fills, equities, analysis
 

From 04ad09818d37bb10269f2ba8bd0d30d9334e2150 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Tue, 1 Oct 2024 12:16:11 -0400
Subject: [PATCH 26/32] rewrite cleanup

---
 src/optimize.py | 41 ++++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/src/optimize.py b/src/optimize.py
index 914d8e9c4..509b9756f 100644
--- a/src/optimize.py
+++ b/src/optimize.py
@@ -32,7 +32,6 @@
 from main import manage_rust_compilation
 import numpy as np
 from uuid import uuid4
-import signal
 import logging
 import traceback
 import json
@@ -136,11 +135,6 @@ def cxSimulatedBinaryBoundedWrapper(ind1, ind2, eta, low, up):
     return ind1, ind2
 
 
-def signal_handler(signal, frame):
-    print("\nOptimization interrupted by user. Exiting gracefully...")
-    sys.exit(0)
-
-
 def individual_to_config(individual, template=None):
     if template is None:
         template = get_template_live_config("v7")
@@ -163,11 +157,18 @@ def config_to_individual(config):
 
 @contextmanager
 def managed_mmap(filename, dtype, shape):
+    mmap = None
     try:
         mmap = np.memmap(filename, dtype=dtype, mode="r", shape=shape)
         yield mmap
+    except FileNotFoundError:
+        if shutdown_event.is_set():
+            yield None
+        else:
+            raise
     finally:
-        del mmap
+        if mmap is not None:
+            del mmap
 
 
 class Evaluator:
@@ -230,10 +231,11 @@ def __getstate__(self):
         return state
 
     def __setstate__(self, state):
-        # This method is called when unpickling. We recreate mmap_context and shared_hlcvs_np
         self.__dict__.update(state)
         self.mmap_context = managed_mmap(self.shared_memory_file, self.hlcvs_dtype, self.hlcvs_shape)
         self.shared_hlcvs_np = self.mmap_context.__enter__()
+        if self.shared_hlcvs_np is None:
+            print("Warning: Unable to recreate shared memory mapping during unpickling.")
 
 
 def add_extra_options(parser):
@@ -295,7 +297,6 @@ async def main():
     add_arguments_recursively(parser, template_config)
     add_extra_options(parser)
     args = parser.parse_args()
-    signal.signal(signal.SIGINT, signal_handler)
     logging.basicConfig(
         format="%(asctime)s %(levelname)-8s %(message)s",
         level=logging.INFO,
@@ -419,6 +420,7 @@ def create_individual():
         print(logbook)
 
         logging.info(f"Optimization complete.")
+
         try:
             logging.info(f"Extracting best config...")
             result = subprocess.run(
@@ -430,16 +432,25 @@ def create_individual():
             print(result.stdout)
         except Exception as e:
             logging.error(f"failed to extract best config {e}")
-        ########
     except Exception as e:
+        logging.error(f"An error occurred: {e}")
         traceback.print_exc()
     finally:
-        # Close the pool
-        logging.info(f"attempting clean shutdown...")
-        os.unlink(shared_memory_file)
+        if "pool" in locals():
+            logging.info("Closing and terminating the process pool...")
+            pool.close()
+            pool.terminate()
+            pool.join()
+
+        if shared_memory_file and os.path.exists(shared_memory_file):
+            logging.info(f"Removing shared memory file: {shared_memory_file}")
+            try:
+                os.unlink(shared_memory_file)
+            except Exception as e:
+                logging.error(f"Error removing shared memory file: {e}")
+
+        logging.info("Cleanup complete. Exiting.")
         sys.exit(0)
-        # pool.close()
-        # pool.join()
 
 
 if __name__ == "__main__":

From 55dd211e1cae61fcbd45b1c5a48220e1e1421510 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Wed, 2 Oct 2024 11:46:41 -0400
Subject: [PATCH 27/32] log loading config with -lc

---
 src/passivbot.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/passivbot.py b/src/passivbot.py
index 2cad78ea2..3a26d3498 100644
--- a/src/passivbot.py
+++ b/src/passivbot.py
@@ -277,6 +277,9 @@ def set_live_configs(self):
             if symbol in self.flags and self.flags[symbol].live_config_path is not None:
                 try:
                     loaded = load_config(self.flags[symbol].live_config_path)
+                    logging.info(
+                        f"successfully loaded {self.flags[symbol].live_config_path} for {symbol}"
+                    )
                     for pside in loaded["bot"]:
                         for k, v in loaded["bot"][pside].items():
                             if k not in skip:

From cb6c81d3ceaa9cf37a0570d34797acea6f7089f3 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Wed, 2 Oct 2024 12:14:13 -0400
Subject: [PATCH 28/32] compile rust if never compiled

---
 src/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main.py b/src/main.py
index 84bb62b06..a89bf113b 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1,5 +1,4 @@
 import asyncio
-from passivbot import main
 import os
 import time
 import subprocess
@@ -98,4 +97,5 @@ def manage_rust_compilation():
 
 if __name__ == "__main__":
     manage_rust_compilation()
+    from passivbot import main
     asyncio.run(main())

From 0fb5c609d7049087734f24f42d7e360d2538a515 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Wed, 2 Oct 2024 12:57:13 -0400
Subject: [PATCH 29/32] add prints

---
 src/main.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/main.py b/src/main.py
index a89bf113b..046431b03 100644
--- a/src/main.py
+++ b/src/main.py
@@ -26,6 +26,7 @@ def check_compilation_needed():
         # Find the most recently modified compiled extension
         compiled_files = [path for path in COMPILED_EXTENSION_PATHS if os.path.exists(path)]
         if not compiled_files:
+            print(f"No Rust extension found. Compiling...")
             return True  # No extension found, compilation needed
 
         compiled_time = max(os.path.getmtime(path) for path in compiled_files)
@@ -36,6 +37,7 @@ def check_compilation_needed():
                 if file.endswith(".rs"):
                     file_path = os.path.join(root, file)
                     if os.path.getmtime(file_path) > compiled_time:
+                        print(f"Rust extension found, but out of date. Recompiling...")
                         return True  # A source file is newer, compilation needed
 
         return False  # No compilation needed
@@ -98,4 +100,5 @@ def manage_rust_compilation():
 if __name__ == "__main__":
     manage_rust_compilation()
     from passivbot import main
+
     asyncio.run(main())

From 757db2deec7258e8c22808024feaa2fa2fadf004 Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Wed, 2 Oct 2024 12:57:33 -0400
Subject: [PATCH 30/32] print full path in recursive_config_update

---
 src/procedures.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/procedures.py b/src/procedures.py
index 248bf7435..5f8d1e7d8 100644
--- a/src/procedures.py
+++ b/src/procedures.py
@@ -1246,15 +1246,22 @@ def add_arguments_recursively(parser, config, prefix="", acronyms=set()):
             acronyms.add(acronym)
 
 
-def recursive_config_update(config, key, value):
+def recursive_config_update(config, key, value, path=None):
+    if path is None:
+        path = []
+
     if key in config:
         if value != config[key]:
-            print(f"changed {key} {config[key]} -> {value}")
+            full_path = ".".join(path + [key])
+            print(f"changed {full_path} {config[key]} -> {value}")
             config[key] = value
         return True
+
     key_split = key.split("_")
     if key_split[0] in config:
-        return recursive_config_update(config[key_split[0]], "_".join(key_split[1:]), value)
+        new_path = path + [key_split[0]]
+        return recursive_config_update(config[key_split[0]], "_".join(key_split[1:]), value, new_path)
+
     return False
 
 

From 23c0e408e755b373cf5bdb45300dbaf558a4473b Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Wed, 2 Oct 2024 14:18:36 -0400
Subject: [PATCH 31/32] use npy instead of csv; cleanup code

---
 src/downloader.py | 1310 +++------------------------------------------
 1 file changed, 69 insertions(+), 1241 deletions(-)

diff --git a/src/downloader.py b/src/downloader.py
index 0c8686745..3ce5454d4 100644
--- a/src/downloader.py
+++ b/src/downloader.py
@@ -36,938 +36,6 @@
 from pure_funcs import ts_to_date, ts_to_date_utc, date_to_ts2, get_dummy_settings, get_day, numpyize
 
 
-class Downloader:
-    """
-    Downloader class for tick data. Fetches data from specified time until now or specified time.
-    """
-
-    def __init__(self, config: dict):
-        self.fetch_delay_seconds = 0.75
-        self.config = config
-        # use binance data for bybit
-        self.config["exchange"] = (
-            "binance"
-            if self.config["exchange"] in ["bybit", "bitget", "okx", "kucoin"]
-            else self.config["exchange"]
-        )
-        self.spot = "spot" in config and config["spot"]
-        self.tick_filepath = os.path.join(
-            config["caches_dirpath"], f"{config['session_name']}_ticks_cache.npy"
-        )
-        try:
-            self.start_time = int(
-                parser.parse(self.config["start_date"])
-                .replace(tzinfo=datetime.timezone.utc)
-                .timestamp()
-                * 1000
-            )
-        except Exception:
-            raise Exception(f"Unrecognized date format for start time {config['start_date']}")
-        try:
-            self.end_time = int(
-                parser.parse(self.config["end_date"])
-                .replace(tzinfo=datetime.timezone.utc)
-                .timestamp()
-                * 1000
-            )
-            if self.end_time > utc_ms():
-                raise Exception(f"End date later than current time {config['end_date']}")
-        except Exception:
-            raise Exception(f"Unrecognized date format for end time {config['end_date']}")
-        if self.config["exchange"] == "binance":
-            if self.spot:
-                self.daily_base_url = "https://data.binance.vision/data/spot/daily/aggTrades/"
-                self.monthly_base_url = "https://data.binance.vision/data/spot/monthly/aggTrades/"
-            else:
-                market_type = "cm" if config["inverse"] else "um"
-                self.daily_base_url = (
-                    f"https://data.binance.vision/data/futures/{market_type}/daily/aggTrades/"
-                )
-                self.monthly_base_url = (
-                    f"https://data.binance.vision/data/futures/{market_type}/monthly/aggTrades/"
-                )
-        elif self.config["exchange"] == "bybit":
-            self.daily_base_url = "https://public.bybit.com/trading/"
-        else:
-            raise Exception(f"unknown exchange {config['exchange']}")
-        if "historical_data_path" in self.config and self.config["historical_data_path"]:
-            self.filepath = make_get_filepath(
-                os.path.join(
-                    self.config["historical_data_path"],
-                    "historical_data",
-                    self.config["exchange"],
-                    f"agg_trades_{'spot' if self.spot else 'futures'}",
-                    self.config["symbol"],
-                    "",
-                )
-            )
-        else:
-            self.filepath = make_get_filepath(
-                os.path.join(
-                    "historical_data",
-                    self.config["exchange"],
-                    f"agg_trades_{'spot' if self.spot else 'futures'}",
-                    self.config["symbol"],
-                    "",
-                )
-            )
-
-    def validate_dataframe(self, df: pd.DataFrame) -> Tuple[bool, pd.DataFrame, pd.DataFrame]:
-        """
-        Validates a dataframe and detects gaps in it. Also detects missing trades in the beginning and end.
-        @param df: Dataframe to check for gaps.
-        @return: A tuple with following result: if missing values present, the cleaned dataframe, a dataframe with start and end of gaps.
-        """
-        df.sort_values("trade_id", inplace=True)
-        df.drop_duplicates("trade_id", inplace=True)
-        df.reset_index(drop=True, inplace=True)
-        missing_end_frame = df["trade_id"][df["trade_id"].diff() != 1]
-        gaps = pd.DataFrame()
-        gaps["start"] = df.iloc[missing_end_frame[1:].index - 1]["trade_id"].tolist()
-        gaps["end"] = missing_end_frame[1:].tolist()
-        missing_ids = df["trade_id"].iloc[0] % 100000
-        if missing_ids != 0:
-            gaps = gaps.append(
-                {
-                    "start": df["trade_id"].iloc[0] - missing_ids,
-                    "end": df["trade_id"].iloc[0] - 1,
-                },
-                ignore_index=True,
-            )
-        missing_ids = df["trade_id"].iloc[-1] % 100000
-        if missing_ids != 99999:
-            gaps = gaps.append(
-                {
-                    "start": df["trade_id"].iloc[-1],
-                    "end": df["trade_id"].iloc[-1] + (100000 - missing_ids - 1),
-                },
-                ignore_index=True,
-            )
-        if gaps.empty:
-            return False, df, gaps
-        else:
-            gaps["start"] = gaps["start"].astype(np.int64)
-            gaps["end"] = gaps["end"].astype(np.int64)
-            gaps.sort_values("start", inplace=True)
-            gaps.reset_index(drop=True, inplace=True)
-            gaps["start"] = gaps["start"].replace(0, 1)
-            return True, df, gaps
-
-    def read_dataframe(self, path: str) -> pd.DataFrame:
-        """
-        Reads a dataframe with correct data types.
-        @param path: The path to the dataframe.
-        @return: The read dataframe.
-        """
-        try:
-            df = pd.read_csv(
-                path,
-                dtype={
-                    "trade_id": np.int64,
-                    "price": np.float64,
-                    "qty": np.float64,
-                    "timestamp": np.int64,
-                    "is_buyer_maker": np.int8,
-                },
-            )
-        except ValueError as e:
-            df = pd.DataFrame()
-            print_(["Error in reading dataframe", e])
-        return df
-
-    def save_dataframe(self, df: pd.DataFrame, filename: str, missing: bool, verified: bool) -> str:
-        """
-        Saves a processed dataframe. Creates the name based on first and last trade id and first and last timestamp.
-        Deletes dataframes that are obsolete. For example, when gaps were filled.
-        @param df: The dataframe to save.
-        @param filename: The current name of the dataframe.
-        @param missing: If the dataframe had gaps.
-        @return:
-        """
-        if verified:
-            new_name = f'{df["trade_id"].iloc[0]}_{df["trade_id"].iloc[-1]}_{df["timestamp"].iloc[0]}_{df["timestamp"].iloc[-1]}_verified.csv'
-        else:
-            new_name = f'{df["trade_id"].iloc[0]}_{df["trade_id"].iloc[-1]}_{df["timestamp"].iloc[0]}_{df["timestamp"].iloc[-1]}.csv'
-        if new_name != filename:
-            print_(
-                [
-                    "Saving file",
-                    new_name,
-                    ts_to_date(int(new_name.split("_")[2]) / 1000),
-                ]
-            )
-            df.to_csv(os.path.join(self.filepath, new_name), index=False)
-            new_name = ""
-            try:
-                os.remove(os.path.join(self.filepath, filename))
-                print_(["Removed file", filename])
-            except:
-                pass
-        elif missing:
-            print_(["Replacing file", filename])
-            df.to_csv(os.path.join(self.filepath, filename), index=False)
-        else:
-            new_name = ""
-        return new_name
-
-    def transform_ticks(self, ticks: list) -> pd.DataFrame:
-        """
-        Transforms tick data into a cleaned dataframe with correct data types.
-        @param ticks: List of tick dictionaries.
-        @return: Clean dataframe with correct data types.
-        """
-        df = pd.DataFrame(ticks)
-        if not df.empty:
-            df["trade_id"] = df["trade_id"].astype(np.int64)
-            df["price"] = df["price"].astype(np.float64)
-            df["qty"] = df["qty"].astype(np.float64)
-            df["timestamp"] = df["timestamp"].astype(np.int64)
-            df["is_buyer_maker"] = df["is_buyer_maker"].astype(np.int8)
-            df.sort_values("trade_id", inplace=True)
-            df.drop_duplicates("trade_id", inplace=True)
-            df.reset_index(drop=True, inplace=True)
-        return df
-
-    def get_filenames(self) -> list:
-        """
-        Returns a sorted list of all file names in the directory.
-        @return: Sorted list of file names.
-        """
-        return sorted(
-            [f for f in os.listdir(self.filepath) if f.endswith(".csv")],
-            key=lambda x: int(eval(x[: x.find("_")].replace(".cs", "").replace("v", ""))),
-        )
-
-    def new_id(
-        self,
-        first_timestamp,
-        last_timestamp,
-        first_trade_id,
-        length,
-        start_time,
-        prev_div,
-    ):
-        """
-        Calculates a new id based on several parameters. Uses a weighted approach for more stability.
-        @param first_timestamp: First timestamp in current result.
-        @param last_timestamp: Last timestamp in current result.
-        @param first_trade_id: First trade id in current result.
-        @param length: The amount of trades in the current result.
-        @param start_time: The time to look for.
-        @param prev_div: Previous results of this function.
-        @return: Estimated trade id.
-        """
-        div = int((last_timestamp - first_timestamp) / length)
-        prev_div.append(div)
-        forward = int((first_timestamp - start_time) / np.mean(prev_div))
-        return max(1, int(first_trade_id - forward)), prev_div, forward
-
-    async def find_time(self, start_time) -> pd.DataFrame:
-        """
-        Finds the trades according to the time.
-        Uses different approaches for exchanges depending if time based fetching is supported.
-        If time based searching is supported, directly fetch the data.
-        If time based searching is not supported, start with current trades and move closer to start time based on estimation.
-        @param start_time: Time to look for.
-        @return: Dataframe with first trade later or equal to start time.
-        """
-        try:
-            ticks = await self.bot.fetch_ticks_time(start_time)
-            return self.transform_ticks(ticks)
-        except:
-            print_(["Finding id for start time..."])
-            ticks = await self.bot.fetch_ticks()
-            df = self.transform_ticks(ticks)
-            highest_id = df["trade_id"].iloc[-1]
-            prev_div = []
-            first_ts = df["timestamp"].iloc[0]
-            last_ts = df["timestamp"].iloc[-1]
-            first_id = df["trade_id"].iloc[0]
-            length = len(df)
-            while not start_time >= first_ts or not start_time <= last_ts:
-                loop_start = time()
-                nw_id, prev_div, forward = self.new_id(
-                    first_ts, last_ts, first_id, length, start_time, prev_div
-                )
-                print_(
-                    [
-                        "Current time span from",
-                        df["timestamp"].iloc[0],
-                        "to",
-                        df["timestamp"].iloc[-1],
-                        "with earliest trade id",
-                        df["trade_id"].iloc[0],
-                        "estimating distance of",
-                        forward,
-                        "trades",
-                    ]
-                )
-                if nw_id > highest_id:
-                    nw_id = highest_id
-                try:
-                    ticks = await self.bot.fetch_ticks(from_id=int(nw_id), do_print=False)
-                    df = self.transform_ticks(ticks)
-                    if not df.empty:
-                        first_ts = df["timestamp"].iloc[0]
-                        last_ts = df["timestamp"].iloc[-1]
-                        first_id = df["trade_id"].iloc[0]
-                        length = len(df)
-                        if nw_id == 1 and first_ts >= start_time:
-                            break
-                except Exception:
-                    print("Failed to fetch or transform...")
-                await asyncio.sleep(max(0.0, self.fetch_delay_seconds - time() + loop_start))
-            print_(["Found id for start time!"])
-            return df[df["timestamp"] >= start_time]
-
-    def get_zip(self, base_url, symbol, date):
-        """
-        Fetches a full day of trades from the Binance repository.
-        @param symbol: Symbol to fetch.
-        @param date: Day to download.
-        @return: Dataframe with full day.
-        """
-        print_(["Fetching", symbol, date])
-        url = f"{base_url}{symbol.upper()}/{symbol.upper()}-aggTrades-{date}.zip"
-        print(url)
-        df = pd.DataFrame(columns=["trade_id", "price", "qty", "timestamp", "is_buyer_maker"])
-        column_names = [
-            "trade_id",
-            "price",
-            "qty",
-            "first",
-            "last",
-            "timestamp",
-            "is_buyer_maker",
-        ]
-        if self.spot:
-            column_names.append("best_match")
-        try:
-            resp = urlopen(url)
-            file_tmp = BytesIO()
-            with tqdm.wrapattr(
-                open(os.devnull, "wb"), "write", miniters=1, total=getattr(resp, "length", None)
-            ) as fout:
-                for chunk in resp:
-                    fout.write(chunk)
-                    file_tmp.write(chunk)
-
-            with zipfile.ZipFile(file_tmp) as my_zip_file:
-                for contained_file in my_zip_file.namelist():
-                    tf = pd.read_csv(my_zip_file.open(contained_file), names=column_names)
-                    if tf.trade_id.iloc[0] == "agg_trade_id":
-                        # catch cases where header is included as first row
-                        print("header in first row: attempting fix...")
-                        tf = tf.iloc[1:].reset_index()
-                        tf.is_buyer_maker = tf.is_buyer_maker == "true"
-                        tf.drop(
-                            errors="ignore",
-                            columns=["index"],
-                            inplace=True,
-                        )
-                    tf.drop(
-                        errors="ignore",
-                        columns=["first", "last", "best_match"],
-                        inplace=True,
-                    )
-                    tf["trade_id"] = tf["trade_id"].astype(np.int64)
-                    tf["price"] = tf["price"].astype(np.float64)
-                    tf["qty"] = tf["qty"].astype(np.float64)
-                    tf["timestamp"] = tf["timestamp"].astype(np.int64)
-                    tf["is_buyer_maker"] = tf["is_buyer_maker"].astype(np.int8)
-                    tf.sort_values("trade_id", inplace=True)
-                    tf.drop_duplicates("trade_id", inplace=True)
-                    tf.reset_index(drop=True, inplace=True)
-                    if df.empty:
-                        df = tf
-                    else:
-                        df = pd.concat([df, tf])
-        except Exception as e:
-            print("Failed to fetch", date, e)
-        return df
-
-    async def find_df_enclosing_timestamp(self, timestamp, guessed_chunk=None):
-        if guessed_chunk is not None:
-            if guessed_chunk[0]["timestamp"] < timestamp < guessed_chunk[-1]["timestamp"]:
-                print_(["found id"])
-                return self.transform_ticks(guessed_chunk)
-        else:
-            guessed_chunk = sorted(
-                await self.bot.fetch_ticks(do_print=False), key=lambda x: x["trade_id"]
-            )
-            return await self.find_df_enclosing_timestamp(timestamp, guessed_chunk)
-
-        if timestamp < guessed_chunk[0]["timestamp"]:
-            guessed_id = guessed_chunk[0]["trade_id"] - len(guessed_chunk) * (
-                guessed_chunk[0]["timestamp"] - timestamp
-            ) / (guessed_chunk[-1]["timestamp"] - guessed_chunk[0]["timestamp"])
-        else:
-            guessed_id = guessed_chunk[-1]["trade_id"] + len(guessed_chunk) * (
-                timestamp - guessed_chunk[-1]["timestamp"]
-            ) / (guessed_chunk[-1]["timestamp"] - guessed_chunk[0]["timestamp"])
-        guessed_id = int(guessed_id - len(guessed_chunk) / 2)
-        guessed_chunk = sorted(
-            await self.bot.fetch_ticks(guessed_id, do_print=False),
-            key=lambda x: x["trade_id"],
-        )
-        print_(
-            [
-                f"guessed_id {guessed_id} earliest ts {ts_to_date(guessed_chunk[0]['timestamp'] / 1000)[:19]} last ts {ts_to_date(guessed_chunk[-1]['timestamp'] / 1000)[:19]} target ts {ts_to_date(timestamp / 1000)[:19]}"
-            ]
-        )
-        return await self.find_df_enclosing_timestamp(timestamp, guessed_chunk)
-
-    def deduce_trade_ids(self, daily_ticks, df_for_id_matching):
-        for idx in [0, -1]:
-            match = daily_ticks[
-                (daily_ticks.timestamp == df_for_id_matching.timestamp.iloc[idx])
-                & (daily_ticks.price == df_for_id_matching.price.iloc[idx])
-                & (daily_ticks.qty == df_for_id_matching.qty.iloc[idx])
-            ]
-            if len(match) == 1:
-                id_at_match = df_for_id_matching.trade_id.iloc[idx]
-                return np.arange(
-                    id_at_match - match.index[0],
-                    id_at_match - match.index[0] + len(daily_ticks),
-                )
-                # trade_ids = np.arange(id_at_match, id_at_match + len(daily_ticks.loc[match.index:]))
-                return match, id_at_match
-        raise Exception("unable to make trade ids")
-
-    async def get_csv_gz_old(self, base_url, symbol, date, df_for_id_matching):
-        """
-        Fetches a full day of trades from the Bybit repository.
-        @param symbol: Symbol to fetch.
-        @param date: Day to download.
-        @return: Dataframe with full day.
-        """
-        print_(["Fetching", symbol, date])
-        url = f"{base_url}{symbol.upper()}/{symbol.upper()}{date}.csv.gz"
-        df = pd.DataFrame(columns=["trade_id", "price", "qty", "timestamp", "is_buyer_maker"])
-        try:
-            resp = urlopen(url)
-            with gzip.open(BytesIO(resp.read())) as f:
-                ff = pd.read_csv(f)
-                trade_ids = np.zeros(len(ff)).astype(np.int64)
-                tf = pd.DataFrame(
-                    {
-                        "trade_id": trade_ids,
-                        "price": ff.price.astype(np.float64),
-                        "qty": ff["size"].astype(np.float64),
-                        "timestamp": (ff.timestamp * 1000).astype(np.int64),
-                        "is_buyer_maker": (ff.side == "Sell").astype(np.int8),
-                    }
-                )
-                tf["trade_id"] = self.deduce_trade_ids(tf, df_for_id_matching)
-                tf.sort_values("timestamp", inplace=True)
-                tf.reset_index(drop=True, inplace=True)
-                del ff
-                df = tf
-        except Exception as e:
-            print("Failed to fetch", date, e)
-        return df
-
-    async def download_ticks(self):
-        """
-        Searches for previously downloaded files and fills gaps in them if necessary.
-        Downloads any missing data based on the specified time frame.
-        @return:
-        """
-        if self.config["exchange"] == "binance":
-            if self.spot:
-                self.bot = await create_binance_bot_spot(get_dummy_settings(self.config))
-            else:
-                self.bot = await create_binance_bot(get_dummy_settings(self.config))
-        elif self.config["exchange"] == "bybit":
-            self.bot = await create_bybit_bot(get_dummy_settings(self.config))
-        else:
-            print(self.config["exchange"], "not found")
-            return
-
-        filenames = self.get_filenames()
-        mod_files = []
-        highest_id = 0
-        for f in filenames:
-            verified = False
-            try:
-                first_time = int(f.split("_")[2])
-                last_time = int(f.split("_")[3].split(".")[0])
-                if len(f.split("_")) > 4:
-                    verified = True
-            except:
-                first_time = sys.maxsize
-                last_time = sys.maxsize
-            if (
-                not verified
-                and last_time >= self.start_time
-                and (self.end_time == -1 or (first_time <= self.end_time))
-                or last_time == sys.maxsize
-            ):
-                print_(["Validating file", f, ts_to_date(first_time / 1000)])
-                df = self.read_dataframe(os.path.join(self.filepath, f))
-                missing, df, gaps = self.validate_dataframe(df)
-                exists = False
-                if gaps.empty:
-                    first_id = df["trade_id"].iloc[0]
-                    self.save_dataframe(df, f, missing, True)
-                else:
-                    first_id = (
-                        df["trade_id"].iloc[0]
-                        if df["trade_id"].iloc[0] < gaps["start"].iloc[0]
-                        else gaps["start"].iloc[0]
-                    )
-                if not gaps.empty and (
-                    f != filenames[-1] or str(first_id - first_id % 100000) not in f
-                ):
-                    last_id = df["trade_id"].iloc[-1]
-                    for i in filenames:
-                        tmp_first_id = int(i.split("_")[0])
-                        tmp_last_id = int(i.split("_")[1].replace(".csv", ""))
-                        if (
-                            (first_id - first_id % 100000) == tmp_first_id
-                            and (
-                                (first_id - first_id % 100000 + 99999) == tmp_last_id
-                                or (highest_id == tmp_first_id or highest_id == tmp_last_id)
-                                or highest_id > last_id
-                            )
-                            and first_id != 1
-                            and i != f
-                        ):
-                            exists = True
-                            break
-                if missing and df["timestamp"].iloc[-1] > self.start_time and not exists:
-                    current_time = df["timestamp"].iloc[-1]
-                    for i in gaps.index:
-                        print_(
-                            [
-                                "Filling gaps from id",
-                                gaps["start"].iloc[i],
-                                "to id",
-                                gaps["end"].iloc[i],
-                            ]
-                        )
-                        current_id = gaps["start"].iloc[i]
-                        while current_id < gaps["end"].iloc[i] and utc_ms() - current_time > 10000:
-                            loop_start = time()
-                            try:
-                                fetched_new_trades = await self.bot.fetch_ticks(int(current_id))
-                                tf = self.transform_ticks(fetched_new_trades)
-                                if tf.empty:
-                                    print_(["Response empty. No new trades, exiting..."])
-                                    await asyncio.sleep(
-                                        max(
-                                            0.0,
-                                            self.fetch_delay_seconds - time() + loop_start,
-                                        )
-                                    )
-                                    break
-                                if current_id == tf["trade_id"].iloc[-1]:
-                                    print_(["Same trade ID again. No new trades, exiting..."])
-                                    await asyncio.sleep(
-                                        max(
-                                            0.0,
-                                            self.fetch_delay_seconds - time() + loop_start,
-                                        )
-                                    )
-                                    break
-                                current_id = tf["trade_id"].iloc[-1]
-                                df = pd.concat([df, tf])
-                                df.sort_values("trade_id", inplace=True)
-                                df.drop_duplicates("trade_id", inplace=True)
-                                df = df[
-                                    df["trade_id"]
-                                    <= gaps["end"].iloc[i] - gaps["end"].iloc[i] % 100000 + 99999
-                                ]
-                                df.reset_index(drop=True, inplace=True)
-                                current_time = df["timestamp"].iloc[-1]
-                            except Exception:
-                                print_(["Failed to fetch or transform..."])
-                            await asyncio.sleep(
-                                max(0.0, self.fetch_delay_seconds - time() + loop_start)
-                            )
-                if not df.empty:
-                    if df["trade_id"].iloc[-1] > highest_id:
-                        highest_id = df["trade_id"].iloc[-1]
-                if not exists:
-                    tf = df[df["trade_id"].mod(100000) == 0]
-                    if len(tf) > 1:
-                        df = df[: tf.index[-1]]
-                    nf = self.save_dataframe(df, f, missing, verified)
-                    mod_files.append(nf)
-                elif df["trade_id"].iloc[0] != 1:
-                    os.remove(os.path.join(self.filepath, f))
-                    print_(["Removed file fragment", f])
-
-        chunk_gaps = []
-        filenames = self.get_filenames()
-        prev_last_id = 0
-        prev_last_time = self.start_time
-        for f in filenames:
-            first_id = int(f.split("_")[0])
-            last_id = int(f.split("_")[1])
-            first_time = int(f.split("_")[2])
-            last_time = int(f.split("_")[3].split(".")[0])
-            if (
-                first_id - 1 != prev_last_id
-                and f not in mod_files
-                and first_time >= prev_last_time
-                and first_time >= self.start_time
-                and not prev_last_time > self.end_time
-            ):
-                chunk_gaps.append((prev_last_time, first_time, prev_last_id, first_id - 1))
-            if first_time >= self.start_time or last_time >= self.start_time:
-                prev_last_id = last_id
-                prev_last_time = last_time
-
-        if len(filenames) < 1:
-            chunk_gaps.append((self.start_time, self.end_time, 0, 0))
-        if prev_last_time < self.end_time:
-            chunk_gaps.append((prev_last_time, self.end_time, prev_last_id, 0))
-
-        seen = set()
-        chunk_gaps_dedup = []
-        for elm in chunk_gaps:
-            if elm not in seen:
-                chunk_gaps_dedup.append(elm)
-                seen.add(elm)
-        chunk_gaps = chunk_gaps_dedup
-
-        for gaps in chunk_gaps:
-            start_time, end_time, start_id, end_id = gaps
-            df = pd.DataFrame()
-
-            current_id = start_id + 1
-            current_time = start_time
-
-            if self.config["exchange"] == "binance":
-                fetched_new_trades = await self.bot.fetch_ticks(1)
-                tf = self.transform_ticks(fetched_new_trades)
-                earliest = tf["timestamp"].iloc[0]
-
-                if earliest > start_time:
-                    start_time = earliest
-                    current_time = start_time
-
-                tmp = pd.date_range(
-                    start=datetime.datetime.fromtimestamp(
-                        start_time / 1000, datetime.timezone.utc
-                    ).date(),
-                    end=datetime.datetime.fromtimestamp(
-                        end_time / 1000, datetime.timezone.utc
-                    ).date(),
-                    freq="D",
-                ).to_pydatetime()
-                days = [date.strftime("%Y-%m-%d") for date in tmp]
-                df = pd.DataFrame(columns=["trade_id", "price", "qty", "timestamp", "is_buyer_maker"])
-
-                months_done = set()
-                months_failed = set()
-                for day in days:
-                    month = day[:7]
-                    if month in months_done:
-                        continue
-                    if month in months_failed:
-                        tf = self.get_zip(self.daily_base_url, self.config["symbol"], day)
-                        if tf.empty:
-                            print_(["failed to fetch daily", day])
-                            continue
-                    else:
-                        tf = self.get_zip(self.monthly_base_url, self.config["symbol"], month)
-                        if tf.empty:
-                            print_(["failed to fetch monthly", month])
-                            months_failed.add(month)
-                            tf = self.get_zip(self.daily_base_url, self.config["symbol"], day)
-                        else:
-                            months_done.add(month)
-                    tf = tf[tf["timestamp"] >= start_time]
-                    tf = tf[tf["timestamp"] <= end_time]
-                    if start_id != 0:
-                        tf = tf[tf["trade_id"] > start_id]
-                    if end_id != 0:
-                        tf = tf[tf["trade_id"] <= end_id]
-                    if df.empty:
-                        df = tf
-                    else:
-                        df = pd.concat([df, tf])
-                    df.sort_values("trade_id", inplace=True)
-                    df.drop_duplicates("trade_id", inplace=True)
-                    df.reset_index(drop=True, inplace=True)
-
-                    if not df.empty and (
-                        (df["trade_id"].iloc[0] % 100000 == 0 and len(df) >= 100000)
-                        or df["trade_id"].iloc[0] % 100000 != 0
-                    ):
-                        for index, row in df[df["trade_id"] % 100000 == 0].iterrows():
-                            if index != 0:
-                                self.save_dataframe(
-                                    df[
-                                        (df["trade_id"] >= row["trade_id"] - 1000000)
-                                        & (df["trade_id"] < row["trade_id"])
-                                    ],
-                                    "",
-                                    True,
-                                    False,
-                                )
-                                df = df[df["trade_id"] >= row["trade_id"]]
-                    if not df.empty:
-                        start_id = df["trade_id"].iloc[0] - 1
-                        start_time = df["timestamp"].iloc[0]
-                        current_time = df["timestamp"].iloc[-1]
-                        current_id = df["trade_id"].iloc[-1] + 1
-            if start_id == 0:
-                df = await self.find_time(start_time)
-                current_id = df["trade_id"].iloc[-1] + 1
-                current_time = df["timestamp"].iloc[-1]
-
-            end_id = sys.maxsize if end_id == 0 else end_id - 1
-
-            if current_id <= end_id and current_time <= end_time and utc_ms() - current_time > 10000:
-                print_(
-                    [
-                        "Downloading from",
-                        ts_to_date(float(current_time) / 1000),
-                        "to",
-                        ts_to_date(float(end_time) / 1000),
-                    ]
-                )
-
-            while (
-                current_id <= end_id and current_time <= end_time and utc_ms() - current_time > 10000
-            ):
-                loop_start = time()
-                fetched_new_trades = await self.bot.fetch_ticks(int(current_id))
-                tf = self.transform_ticks(fetched_new_trades)
-                if tf.empty:
-                    print_(["Response empty. No new trades, exiting..."])
-                    await asyncio.sleep(max(0.0, self.fetch_delay_seconds - time() + loop_start))
-                    break
-                if current_id == tf["trade_id"].iloc[-1]:
-                    print_(["Same trade ID again. No new trades, exiting..."])
-                    await asyncio.sleep(max(0.0, self.fetch_delay_seconds - time() + loop_start))
-                    break
-                df = pd.concat([df, tf])
-                df.sort_values("trade_id", inplace=True)
-                df.drop_duplicates("trade_id", inplace=True)
-                df.reset_index(drop=True, inplace=True)
-                current_time = tf["timestamp"].iloc[-1]
-                current_id = tf["trade_id"].iloc[-1] + 1
-                tf = df[df["trade_id"].mod(100000) == 0]
-                if not tf.empty and len(df) > 1:
-                    if df["trade_id"].iloc[0] % 100000 == 0 and len(tf) > 1:
-                        self.save_dataframe(df[: tf.index[-1]], "", True, False)
-                        df = df[tf.index[-1] :]
-                    elif df["trade_id"].iloc[0] % 100000 != 0 and len(tf) == 1:
-                        self.save_dataframe(df[: tf.index[-1]], "", True, False)
-                        df = df[tf.index[-1] :]
-                await asyncio.sleep(max(0.0, self.fetch_delay_seconds - time() + loop_start))
-            if not df.empty:
-                df = df[df["timestamp"] >= start_time]
-                if start_id != 0 and not df.empty:
-                    df = df[df["trade_id"] > start_id]
-                elif end_id != sys.maxsize and not df.empty:
-                    df = df[df["trade_id"] <= end_id]
-                elif end_time != sys.maxsize and not df.empty:
-                    df = df[df["timestamp"] <= end_time]
-                if not df.empty:
-                    self.save_dataframe(df, "", True, False)
-
-        try:
-            await self.bot.session.close()
-        except:
-            pass
-
-    async def prepare_files(self):
-        """
-        Takes downloaded data and prepares a numpy array for use in backtesting.
-        @return:
-        """
-        filenames = [
-            f
-            for f in self.get_filenames()
-            if int(f.split("_")[3].split(".")[0]) >= self.start_time
-            and int(f.split("_")[2]) <= self.end_time
-        ]
-        left_overs = pd.DataFrame()
-        sample_size_ms = 1000
-        current_index = 0
-
-        try:
-            first_frame = pd.read_csv(
-                os.path.join(self.filepath, filenames[0]),
-                dtype={
-                    "price": np.float64,
-                    "is_buyer_maker": np.float64,
-                    "timestamp": np.float64,
-                    "qty": np.float64,
-                },
-                usecols=["price", "is_buyer_maker", "timestamp", "qty"],
-            )
-            first_frame = first_frame[
-                (first_frame["timestamp"] >= self.start_time)
-                & (first_frame["timestamp"] <= self.end_time)
-            ]
-            earliest_time = first_frame.timestamp.iloc[0] // sample_size_ms * sample_size_ms
-        except Exception as e:
-            print_(["Error in determining earliest time", e])
-            earliest_time = self.start_time
-
-        try:
-            last_frame = pd.read_csv(
-                os.path.join(self.filepath, filenames[-1]),
-                dtype={
-                    "price": np.float64,
-                    "is_buyer_maker": np.float64,
-                    "timestamp": np.float64,
-                    "qty": np.float64,
-                },
-                usecols=["price", "is_buyer_maker", "timestamp", "qty"],
-            )
-            last_frame = last_frame[
-                (last_frame["timestamp"] >= self.start_time)
-                & (last_frame["timestamp"] <= self.end_time)
-            ]
-            latest_time = last_frame.timestamp.iloc[-1] // sample_size_ms * sample_size_ms
-        except Exception as e:
-            print_(["Error in determining latest time", e])
-            latest_time = self.end_time
-
-        array = np.zeros(
-            (int((latest_time - earliest_time) / sample_size_ms + 1), 3),
-            dtype=np.float64,
-        )
-
-        for f in filenames:
-            chunk = pd.read_csv(
-                os.path.join(self.filepath, f),
-                dtype={
-                    "price": np.float64,
-                    "is_buyer_maker": np.float64,
-                    "timestamp": np.float64,
-                    "qty": np.float64,
-                },
-                usecols=["price", "is_buyer_maker", "timestamp", "qty"],
-            )
-
-            chunk = pd.concat([left_overs, chunk])
-            chunk.sort_values("timestamp", inplace=True)
-            chunk = chunk[
-                (chunk["timestamp"] >= self.start_time) & (chunk["timestamp"] <= self.end_time)
-            ]
-
-            cut_off = (
-                chunk.timestamp.iloc[-1] // sample_size_ms * sample_size_ms - 1 - (1 * sample_size_ms)
-            )
-
-            left_overs = chunk[chunk["timestamp"] > cut_off]
-            chunk = chunk[chunk["timestamp"] <= cut_off]
-
-            sampled_ticks = calc_samples(chunk[["timestamp", "qty", "price"]].values)
-            if current_index != 0 and array[current_index - 1, 0] + 1000 != sampled_ticks[0, 0]:
-                size = int((sampled_ticks[0, 0] - array[current_index - 1, 0]) / sample_size_ms) - 1
-                tmp = np.zeros((size, 3), dtype=np.float64)
-                tmp[:, 0] = np.arange(
-                    array[current_index - 1, 0] + sample_size_ms,
-                    sampled_ticks[0, 0],
-                    sample_size_ms,
-                    dtype=np.float64,
-                )
-                tmp[:, 2] = array[current_index - 1, 2]
-                array[current_index : current_index + len(tmp)] = tmp
-                current_index += len(tmp)
-            array[current_index : current_index + len(sampled_ticks)] = sampled_ticks
-            current_index += len(sampled_ticks)
-
-            print(
-                "\rloaded chunk of data",
-                f,
-                ts_to_date(float(f.split("_")[2]) / 1000),
-                end="     ",
-            )
-        print("\n")
-
-        # Fill in anything left over
-        if not left_overs.empty:
-            sampled_ticks = calc_samples(left_overs[["timestamp", "qty", "price"]].values)
-            if current_index != 0 and array[current_index - 1, 0] + 1000 != sampled_ticks[0, 0]:
-                size = int((sampled_ticks[0, 0] - array[current_index - 1, 0]) / sample_size_ms) - 1
-                tmp = np.zeros((size, 3), dtype=np.float64)
-                tmp[:, 0] = np.arange(
-                    array[current_index - 1, 0] + sample_size_ms,
-                    sampled_ticks[0, 0],
-                    sample_size_ms,
-                    dtype=np.float64,
-                )
-                tmp[:, 2] = array[current_index - 1, 2]
-                array[current_index : current_index + len(tmp)] = tmp
-                current_index += len(tmp)
-            array[current_index : current_index + len(sampled_ticks)] = sampled_ticks
-            current_index += len(sampled_ticks)
-
-        # Fill the gap at the end with the latest price
-        # Should not be necessary anymore
-        if current_index + 1 < len(array):
-            size = len(array) - current_index
-            tmp = np.zeros((size, 3), dtype=np.float64)
-            tmp[:, 0] = np.arange(
-                array[current_index - 1, 0] + sample_size_ms,
-                array[current_index - 1, 0] + ((size + 1) * sample_size_ms),
-                sample_size_ms,
-                dtype=np.float64,
-            )
-            tmp[:, 2] = array[current_index - 1, 2]
-            array[current_index : current_index + len(tmp)] = tmp
-            current_index += len(tmp)
-
-        print_(
-            [
-                "Saving single file with",
-                len(array),
-                " ticks to",
-                self.tick_filepath,
-                "...",
-            ]
-        )
-        np.save(self.tick_filepath, array)
-        print_(["Saved single file!"])
-
-    async def get_sampled_ticks(self) -> np.ndarray:
-        """
-        Function for direct use in the backtester. Checks if the numpy arrays exist and if so loads them.
-        If they do not exist or if their length doesn't match, download the missing data and create them.
-        @return: numpy array.
-        """
-        if os.path.exists(self.tick_filepath):
-            print_(["Loading cached tick data from", self.tick_filepath])
-            tick_data = np.load(self.tick_filepath)
-            return tick_data
-        await self.download_ticks()
-        await self.prepare_files()
-        tick_data = np.load(self.tick_filepath)
-        return tick_data
-
-
-def get_zip(url: str):
-    col_names = ["timestamp", "open", "high", "low", "close", "volume"]
-    try:
-        resp = urlopen(url)
-        file_tmp = BytesIO()
-        with tqdm.wrapattr(
-            open(os.devnull, "wb"), "write", miniters=1, total=getattr(resp, "length", None)
-        ) as fout:
-            for chunk in resp:
-                fout.write(chunk)
-                file_tmp.write(chunk)
-        dfs = []
-        with zipfile.ZipFile(file_tmp) as my_zip_file:
-            for contained_file in my_zip_file.namelist():
-                df = pd.read_csv(my_zip_file.open(contained_file))
-                df.columns = col_names + [str(i) for i in range(len(df.columns) - len(col_names))]
-                dfs.append(df[col_names])
-        return pd.concat(dfs).sort_values("timestamp").reset_index()
-    except Exception as e:
-        print(e)
-
-
 async def fetch_zips(url):
     try:
         async with aiohttp.ClientSession() as session:
@@ -1017,15 +85,6 @@ def get_first_ohlcv_ts(symbol: str, spot=False) -> int:
         return 0
 
 
-def findall(string, pattern):
-    """Yields all the positions of
-    the pattern in the string"""
-    i = string.find(pattern)
-    while i != -1:
-        yield i
-        i = string.find(pattern, i + 1)
-
-
 def get_days_in_between(start_day, end_day):
     date_format = "%Y-%m-%d"
     start_date = datetime.datetime.strptime(start_day, date_format)
@@ -1059,8 +118,9 @@ async def download_ohlcvs_bybit_sub(
     start_date, end_date = get_day(start_date), get_day(end_date)
     assert date_to_ts2(end_date) >= date_to_ts2(start_date), "end_date is older than start_date"
     dirpath = make_get_filepath(f"historical_data/ohlcvs_bybit{'_spot' if spot else ''}/{symbol}/")
+    convert_csv_to_npy(dirpath)
     ideal_days = get_days_in_between(start_date, end_date)
-    days_done = [filename[:-4] for filename in os.listdir(dirpath) if ".csv" in filename]
+    days_done = [filename[:-4] for filename in os.listdir(dirpath) if ".npy" in filename]
     days_to_get = [day for day in ideal_days if day not in days_done]
     dfs = {}
     if len(days_to_get) > 0:
@@ -1083,15 +143,15 @@ async def download_ohlcvs_bybit_sub(
                 for day, df in sorted(dfs_.items()):
                     if day in days_done:
                         continue
-                    filepath = f"{dirpath}{day}.csv"
-                    df.to_csv(filepath)
+                    filepath = f"{dirpath}{day}.npy"
+                    dump_ohlcv_data(df, filepath)
                     dumped.append(day)
                 if not download_only:
                     dfs.update(dfs_)
     if not download_only:
         for day in ideal_days:
-            if os.path.exists(f"{dirpath}{day}.csv"):
-                dfs[day] = pd.read_csv(f"{dirpath}{day}.csv")
+            if os.path.exists(f"{dirpath}{day}.npy"):
+                dfs[day] = load_ohlcv_data(f"{dirpath}{day}.npy")
         if len(dfs) == 0:
             return pd.DataFrame(columns=["timestamp", "open", "high", "low", "close", "volume"])
         df = pd.concat(dfs.values()).sort_values("timestamp").reset_index()
@@ -1153,14 +213,16 @@ def convert_to_ohlcv(df, spot, interval=60000):
     for x in ["open", "high", "low", "close"]:
         ohlcvs[x] = ohlcvs[x].fillna(closes)
     ohlcvs["volume"] = ohlcvs["volume"].fillna(0.0)
-    return ohlcvs
+    ohlcvs.loc[:, "timestamp"] = ohlcvs.index.values
+    columns = ["timestamp", "open", "high", "low", "close", "volume"]
+    return ohlcvs[columns]
 
 
 async def download_single_ohlcvs_binance(url: str, fpath: str):
     try:
         print(f"fetching {url}")
         csv = await get_zip_binance(url)
-        csv.to_csv(fpath)
+        dump_ohlcv_data(csv, fpath)
     except Exception as e:
         print(f"failed to download {url} {e}")
 
@@ -1175,6 +237,7 @@ async def download_ohlcvs_binance(
     start_tss=None,
 ) -> pd.DataFrame:
     dirpath = make_get_filepath(f"historical_data/ohlcvs_{'spot' if spot else 'futures'}/{symbol}/")
+    convert_csv_to_npy(dirpath)
     base_url = "https://data.binance.vision/data/"
     base_url += "spot/" if spot else f"futures/{'cm' if inverse else 'um'}/"
     col_names = ["timestamp", "open", "high", "low", "close", "volume"]
@@ -1192,7 +255,7 @@ async def download_ohlcvs_binance(
     months = [m for m in months if m != month_now]
 
     # do months async
-    months_filepaths = {month: os.path.join(dirpath, month + ".csv") for month in months}
+    months_filepaths = {month: os.path.join(dirpath, month + ".npy") for month in months}
     missing_months = {k: v for k, v in months_filepaths.items() if not os.path.exists(v)}
     await asyncio.gather(
         *[
@@ -1205,11 +268,11 @@ async def download_ohlcvs_binance(
     months_done = sorted([x for x in os.listdir(dirpath) if x[:-4] in months_filepaths])
 
     # do days async
-    days_filepaths = {day: os.path.join(dirpath, day + ".csv") for day in days}
+    days_filepaths = {day: os.path.join(dirpath, day + ".npy") for day in days}
     missing_days = {
         k: v
         for k, v in days_filepaths.items()
-        if not os.path.exists(v) and k[:7] + ".csv" not in months_done
+        if not os.path.exists(v) and k[:7] + ".npy" not in months_done
     }
     await asyncio.gather(
         *[
@@ -1224,17 +287,17 @@ async def download_ohlcvs_binance(
     # delete days contained in months
     fnames = os.listdir(dirpath)
     for fname in fnames:
-        if fname.endswith(".csv") and len(fname) == 14:
-            if fname[:7] + ".csv" in fnames:
+        if fname.endswith(".npy") and len(fname) == 14:
+            if fname[:7] + ".npy" in fnames:
                 print("deleting", os.path.join(dirpath, fname))
                 os.remove(os.path.join(dirpath, fname))
 
     if not download_only:
         fnames = os.listdir(dirpath)
         dfs = [
-            pd.read_csv(os.path.join(dirpath, fpath))
+            load_ohlcv_data(os.path.join(dirpath, fpath))
             for fpath in months_done + days_done
-            if fpath in fnames
+            if fpath in fnames and fpath.endswith(".npy")
         ]
         try:
             df = pd.concat(dfs)[col_names].sort_values("timestamp")
@@ -1269,7 +332,7 @@ def count_longest_identical_data(hlc, symbol, verbose=True):
     return longest_consecutive
 
 
-def attempt_gap_fix_hlcs(df, symbol=None):
+def attempt_gap_fix_hlcvs(df, symbol=None):
     interval = 60 * 1000
     max_hours = 12
     max_gap = interval * 60 * max_hours
@@ -1300,7 +363,7 @@ async def load_hlcvs(symbol, start_date, end_date, base_dir="backtests", exchang
         df = await download_ohlcvs_binance(symbol, False, start_date, end_date, False)
     elif exchange == "bybit":
         df = await download_ohlcvs_bybit(symbol, start_date, end_date)
-        df = attempt_gap_fix_hlcs(df, symbol=symbol)
+        df = attempt_gap_fix_hlcvs(df, symbol=symbol)
     else:
         raise Exception(f"downloading ohlcvs from exchange {exchange} not supported")
     if len(df) == 0:
@@ -1358,7 +421,7 @@ async def prepare_hlcvs(config: dict):
             continue
         assert (
             np.diff(data[:, 0]) == interval_ms
-        ).all(), f"gaps in hlcv data {symbol}"  # verify integrous 1m hlcs
+        ).all(), f"gaps in hlcv data {symbol}"  # verify integrous 1m hlcvs
         hlcvsd[symbol] = data
     symbols = sorted(hlcvsd.keys())
     if len(symbols) > 1:
@@ -1367,135 +430,6 @@ async def prepare_hlcvs(config: dict):
     return symbols, timestamps, unified_data
 
 
-async def load_hlc_cache(
-    symbol,
-    inverse,
-    start_date,
-    end_date,
-    base_dir="backtests",
-    spot=False,
-    exchange="binance",
-    start_tss=None,
-    minimum_coin_age_days=None,
-):
-    cache_fname = (
-        f"{ts_to_date_utc(date_to_ts2(start_date))[:10]}_"
-        + f"{ts_to_date_utc(date_to_ts2(end_date))[:10]}_ohlcv_cache.npy"
-    )
-
-    filepath = make_get_filepath(
-        os.path.join(base_dir, exchange + ("_spot" if spot else ""), symbol, "caches", cache_fname)
-    )
-    if os.path.exists(filepath):
-        data = np.load(filepath)
-    else:
-        if exchange == "bybit":
-            df = await download_ohlcvs_bybit(symbol, start_date, end_date, spot, download_only=False)
-            df = attempt_gap_fix_hlcs(df)
-        else:
-            df = await download_ohlcvs_binance(
-                symbol, inverse, start_date, end_date, spot, start_tss=start_tss
-            )
-        if len(df) == 0:
-            return pd.DataFrame()
-        df = df[df.timestamp >= date_to_ts2(start_date)]
-        df = df[df.timestamp <= date_to_ts2(end_date)]
-        data = df[["timestamp", "high", "low", "close"]].values
-        np.save(filepath, data)
-    try:
-        count_longest_identical_data(data, symbol)
-    except Exception as e:
-        print("error checking integrity", e)
-    if minimum_coin_age_days:
-        if start_tss and symbol in start_tss:
-            first_ts = start_tss[symbol]
-        else:
-            first_ts = (await get_first_ohlcv_timestamps(symbols=[symbol]))[symbol]
-        new_start_ts = max(first_ts + 1000 * 60 * 60 * 24 * minimum_coin_age_days, data[0][0])
-        if new_start_ts != data[0][0]:
-            print(
-                f"changing start date for {symbol} {ts_to_date_utc(data[0][0])} -> {ts_to_date_utc(new_start_ts)}"
-            )
-            data = data[data[:, 0] >= new_start_ts]
-    return data
-
-
-async def prepare_multsymbol_data(
-    symbols, start_date, end_date, base_dir, exchange
-) -> (float, np.ndarray):
-    """
-    returns first timestamp and hlc data in the form
-    [
-        [
-            [sym0_high0, sym0_low0, sym0_close0],
-            [sym0_high1, sym0_low1, sym0_close1],
-            ...
-        ],
-        [
-            [sym1_high0, sym1_low0, sym1_close0],
-            [sym1_high1, sym1_low1, sym1_close1],
-            ...
-        ],
-        ...
-    ]
-    """
-    if end_date in ["today", "now", ""]:
-        end_date = ts_to_date_utc(utc_ms())[:10]
-    hlcs = []
-    interval = 60000.0
-    for symbol in symbols:
-        data = await load_hlc_cache(symbol, False, start_date, end_date, base_dir, False, exchange)
-        assert (
-            np.diff(data[:, 0]) == interval
-        ).all(), f"gaps in hlc data {symbol}"  # verify integrous 1m hlcs
-        dft = pd.DataFrame(
-            data, columns=["timestamp"] + [f"{symbol}_{key}" for key in ["high", "low", "close"]]
-        )
-        hlcs.append(dft)
-
-    tss = np.arange(
-        min([x.timestamp.iloc[0] for x in hlcs]),
-        max([x.timestamp.iloc[-1] for x in hlcs]) + interval,
-        interval,
-    )
-    df = pd.concat([x.set_index("timestamp").reindex(tss) for x in hlcs], axis=1, join="outer")
-    df = df.fillna(0.0)
-    return df.index[0], np.array([df.values[:, i : i + 3] for i in range(0, len(symbols) * 3, 3)])
-
-
-def pad_hlcs(hlcs, timestamps):
-    start_timestamp = timestamps[0]
-    interval = 60000
-    num_timestamps = len(timestamps)
-
-    # Initialize the padded_hlcs array with NaNs
-    padded_hlcs = np.full((num_timestamps, 3), np.nan)
-
-    # Calculate the indices for where the hlcs data should be placed in the padded array
-    hlcs_start_idx = int((hlcs[0, 0] - start_timestamp) // interval)
-    hlcs_end_idx = int((hlcs[-1, 0] - start_timestamp) // interval)
-
-    # Fill the hlcs data into the padded array
-    padded_indices = ((hlcs[:, 0] - start_timestamp) // interval).astype(int)
-    padded_hlcs[padded_indices, :] = hlcs[:, 1:]
-
-    # Frontfill
-    front_fill_value = hlcs[0, 3]
-    padded_hlcs[:hlcs_start_idx, :] = front_fill_value
-
-    # Backfill
-    back_fill_value = hlcs[-1, 3]
-    padded_hlcs[hlcs_end_idx + 1 :, :] = back_fill_value
-
-    # Forward fill remaining NaNs using numpy's `np.nan_to_num` and `np.fmax.accumulate`
-    nan_mask = np.isnan(padded_hlcs[:, 0])
-    idx = np.where(~nan_mask, np.arange(num_timestamps), 0)
-    np.maximum.accumulate(idx, out=idx)
-    padded_hlcs = padded_hlcs[idx]
-
-    return padded_hlcs
-
-
 def unify_hlcv_data(hlcv_list) -> (np.ndarray, np.ndarray):
 
     # Find the global start and end timestamps
@@ -1537,149 +471,49 @@ def unify_hlcv_data(hlcv_list) -> (np.ndarray, np.ndarray):
     return timestamps, unified_array
 
 
-async def prepare_hlcs_forager(config: dict):
-    """
-    returns
-        [timestamp],
-        [
-            [
-                [sym0_high0, sym0_low0, sym0_close0],
-                [sym1_high0, sym1_low0, sym1_close0],
-                [sym2_high0, sym2_low0, sym2_close0],
-            ],
-            [
-                [sym0_high1, sym0_low1, sym0_close1],
-                [sym1_high1, sym1_low1, sym1_close1],
-                [sym2_high1, sym2_low1, sym2_close1],
-            ]
-        ]
-    """
-    symbols = config["backtest"]["symbols"]
-    start_date = config["backtest"]["start_date"]
-    end_date = config["backtest"]["end_date"]
-    base_dir = config["backtest"]["base_dir"]
-    exchange = config["backtest"]["exchange"]
-    minimum_coin_age_days = config["live"]["minimum_coin_age_days"]
-    if end_date in ["today", "now", ""]:
-        end_date = ts_to_date_utc(utc_ms())[:10]
-    hlcsd = {}
-    interval_ms = 60000
-    start_tss = None
-    if exchange == "binance":
-        start_tss = await get_first_ohlcv_timestamps(cc=ccxt.binanceusdm(), symbols=symbols)
-    for symbol in symbols:
-        data = await load_hlc_cache(
-            symbol,
-            False,
-            start_date,
-            end_date,
-            base_dir,
-            False,
-            exchange,
-            start_tss=start_tss,
-            minimum_coin_age_days=minimum_coin_age_days,
-        )
-        if len(data) == 0:
-            continue
-        assert (
-            np.diff(data[:, 0]) == interval_ms
-        ).all(), f"gaps in hlc data {symbol}"  # verify integrous 1m hlcs
-        hlcsd[symbol] = data
-    # hlcsd is {symbol: array([[timestamp, high, low, close]])}
-    first_timestamp = min([x[0][0] for x in hlcsd.values()])
-    last_timestamp = max([x[-1][0] for x in hlcsd.values()])
-    timestamps = np.arange(first_timestamp, last_timestamp + interval_ms, interval_ms)
-
-    unified_data = []
-    for symbol, data in hlcsd.items():
-        padded_hlcs = pad_hlcs(data, timestamps)
-        unified_data.append(padded_hlcs)
-
-    return timestamps, np.array(unified_data).transpose(1, 0, 2)
+def convert_csv_to_npy(filepath):
+    if not os.path.exists(filepath):
+        return False
+    if os.path.isdir(filepath):
+        for fp in os.listdir(filepath):
+            convert_csv_to_npy(os.path.join(filepath, fp))
+        return False
+    if filepath.endswith(".csv"):
+        columns = ["timestamp", "open", "high", "low", "close", "volume"]
+        npy_filepath = filepath.replace(".csv", ".npy")
+        csv_data = pd.read_csv(filepath)[columns]
+        dump_ohlcv_data(csv_data, npy_filepath)
+        os.remove(filepath)
+        print(f"successfully converted {filepath} to {npy_filepath}")
+        return True
+
+
+def dump_ohlcv_data(data, filepath):
+    npy_filepath = filepath.replace(".csv", ".npy")
+    columns = ["timestamp", "open", "high", "low", "close", "volume"]
+    if isinstance(data, pd.DataFrame):
+        to_dump = data[columns].astype(float).values
+    elif isinstance(data, np.ndarray):
+        to_dump = data
+    else:
+        raise Exception(f"unknown file type {filepath} dump_ohlcv_data")
+    np.save(npy_filepath, to_dump)
 
 
-def format_hlcs_forager(hlcsd: dict):
-    interval_ms = 60000
-    first_timestamp = min([x[0][0] for x in hlcsd.values()])
-    last_timestamp = max([x[-1][0] for x in hlcsd.values()])
-    timestamps = np.arange(first_timestamp, last_timestamp + interval_ms, interval_ms)
-
-    unified_data = []
-    for symbol, data in hlcsd.items():
-        padded_hlcs = pad_hlcs(numpyize(data), timestamps)
-        unified_data.append(padded_hlcs)
-
-    return timestamps, np.array(unified_data).transpose(1, 0, 2)
-
-
-def calc_noisiness(timestamps, hlcs, timeframe="15m"):
-    """
-    Takes 1m hlcs and timestamps as input and calculates noisiness aggregated over a specified timeframe.
-
-    Args:
-        timestamps (np.array): Array of timestamps.
-        hlcs (np.array): 3D array of shape (time, symbols, [high, low, close]).
-        timeframe (str): Aggregation timeframe ("15m", "5m", "1h", "4h", "1d").
-
-    Returns:
-        np.array: 2D array with adjusted timestamps and noisiness values for each symbol per new timeframe.
-    """
-
-    if timeframe == "15m":
-        n_mins, tf = 15, 1000 * 60 * 15
-    elif timeframe == "5m":
-        n_mins, tf = 5, 1000 * 60 * 5
-    elif timeframe == "1h":
-        n_mins, tf = 60, 1000 * 60 * 60
-    elif timeframe == "4h":
-        n_mins, tf = 60 * 4, 1000 * 60 * 60 * 4
-    elif timeframe == "1d":
-        n_mins, tf = 60 * 24, 1000 * 60 * 60 * 24
+def load_ohlcv_data(filepath):
+    npy_filepath = filepath.replace(".csv", ".npy")
+    columns = ["timestamp", "open", "high", "low", "close", "volume"]
+    if os.path.exists(npy_filepath):
+        loaded_data = np.load(npy_filepath, allow_pickle=True)
     else:
-        raise Exception(f"unsupported timeframe: {timeframe}")
-
-    # Calculate the first and last timestamp for the new adjusted timeframe
-    first_ts = timestamps[0] // tf * tf
-    last_ts = timestamps[-1] // tf * tf
-    new_timestamps = np.arange(first_ts, last_ts + tf, tf)
-
-    # Number of symbols and the number of new timeframes
-    num_symbols = hlcs.shape[1]
-    num_periods = len(new_timestamps)
-
-    # Initialize the noisiness array
-    noisiness = np.zeros((num_periods - 1, num_symbols))
-
-    start_idx = timestamps[0] // tf * tf
-    for i in range(start_idx, len(hlcs) + tf, n_mins):
-        slice_ = hlcs[max(0, i - n_mins) : i]
-        high = slice_.max()
-        low = slice_.min()
-
-    # Process each symbol
-    for symbol_index in range(num_symbols):
-        # Aggregate high, low, and close for each timeframe
-        for i in range(1, num_periods):
-            # Determine indices in the original array that fall into the current timeframe bucket
-            mask = (timestamps >= new_timestamps[i - 1]) & (timestamps < new_timestamps[i])
-            if np.any(mask):
-                highs = hlcs[mask, symbol_index, 0]
-                lows = hlcs[mask, symbol_index, 1]
-                closes = hlcs[mask, symbol_index, 2]
-
-                # Compute high max, low min, and the last close in the interval
-                period_high = np.max(highs)
-                period_low = np.min(lows)
-                period_close = closes[-1]
-
-                # Calculate noisiness
-                if period_close == 0.0:
-                    noisiness[i - 1, symbol_index] = 0.0
-                else:
-                    noisiness[i - 1, symbol_index] = (period_high - period_low) / period_close
-
-    # Return adjusted timestamps (excluding the last since it doesn't complete the interval) and noisiness values
-    return new_timestamps[:-1], noisiness
+        print(f"loading {filepath}")
+        csv_data = pd.read_csv(filepath)[columns]
+        print(f"dumping {npy_filepath}")
+        dump_ohlcv_data(csv_data, npy_filepath)
+        print(f"removing {filepath}")
+        os.remove(filepath)
+        loaded_data = csv_data.values
+    return pd.DataFrame(loaded_data, columns=columns)
 
 
 async def main():
@@ -1696,20 +530,14 @@ async def main():
 
     args = parser.parse_args()
     config = prepare_backtest_config(args)
-    if config["ohlcv"]:
-        data = await load_hlc_cache(
-            config["symbol"],
-            config["inverse"],
-            config["start_date"],
-            config["end_date"],
-            spot=config["spot"],
-            exchange=config["exchange"],
-        )
-    else:
-        downloader = Downloader(config)
-        await downloader.download_ticks()
-        if not args.download_only:
-            await downloader.prepare_files()
+    data = await load_hlc_cache(
+        config["symbol"],
+        config["inverse"],
+        config["start_date"],
+        config["end_date"],
+        spot=config["spot"],
+        exchange=config["exchange"],
+    )
 
 
 if __name__ == "__main__":

From bba64290b2b39a66435016b28ba653358d2650cb Mon Sep 17 00:00:00 2001
From: Eirik Narjord <eirik.narjord@gmail.com>
Date: Wed, 2 Oct 2024 14:21:38 -0400
Subject: [PATCH 32/32] bug fix: init_markets_dict needs param verbose

---
 src/exchanges/binance.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/exchanges/binance.py b/src/exchanges/binance.py
index f708bd8e0..b0e2a0718 100644
--- a/src/exchanges/binance.py
+++ b/src/exchanges/binance.py
@@ -80,9 +80,9 @@ async def print_new_user_suggestion(self):
         print(front_pad + "#" * (max_len + 2) + back_pad)
         print("\n\n")
 
-    async def init_markets_dict(self):
+    async def init_markets_dict(self, verbose=True):
         await self.print_new_user_suggestion()
-        await super().init_markets_dict()
+        await super().init_markets_dict(verbose=verbose)
 
     def set_market_specific_settings(self):
         super().set_market_specific_settings()