Merge pull request #2 from aoki-h-jp/feature/1.0.0/downloading

Feature/1.0.0/downloading
aoki-h-jp · Aug 25, 2023 · 6dc7836 · 6dc7836
2 parents 21765be + 5dda4cf
commit 6dc7836
Show file tree

Hide file tree

Showing 8 changed files with 292 additions and 3 deletions.
diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
@@ -19,7 +19,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install pipenv
-        pip install git+https://github.com/aoki-h-jp/binance-bulk-downloader
+        pip install git+https://github.com/aoki-h-jp/bybit-bulk-downloader
         pipenv install --dev
 
     - name: Run pytest

diff --git a/.gitignore b/.gitignore
@@ -36,7 +36,7 @@ MANIFEST
 pip-log.txt
 pip-delete-this-directory.txt
 
-# Unit test / coverage reports
+# Unit tests / coverage reports
 htmlcov/
 .tox/
 .nox/

diff --git a/README.md b/README.md
@@ -1,2 +1,80 @@
 # bybit-bulk-downloader
+[![Python 3.11](https://img.shields.io/badge/python-3.11-blue.svg)](https://www.python.org/downloads/release/python-3110//)
+[![Format code](https://github.com/aoki-h-jp/bybit-bulk-downloader/actions/workflows/Formatter.yml/badge.svg)](https://github.com/aoki-h-jp/bybit-bulk-downloader/actions/workflows/Formatter.yml)
+[![Run pytest on all branches](https://github.com/aoki-h-jp/bybit-bulk-downloader/actions/workflows/pytest.yaml/badge.svg)](https://github.com/aoki-h-jp/bybit-bulk-downloader/actions/workflows/pytest.yaml)
+
+## ## Python library for bulk downloading bybit historical data
 A Python library to efficiently and concurrently download historical data files from bybit. Supports all asset types (spot, USDT Perpetual, Inverse Perpetual &amp; Inverse Futures).
+
+## Installation
+
+```bash
+pip install git+https://github.com/aoki-h-jp/bybit-bulk-downloader
+```
+
+## Usage
+### Download all kline_for_metatrader4 data
+
+```python
+from bybit_bulk_downloader.downloader import BybitBulkDownloader
+
+downloader = BybitBulkDownloader(data_type='kline_for_metatrader4')
+downloader.run_download()
+```
+
+### Download all premium_index data
+
+```python
+from bybit_bulk_downloader.downloader import BybitBulkDownloader
+
+downloader = BybitBulkDownloader(data_type='premium_index')
+downloader.run_download()
+```
+
+### Download all spot_index data
+
+```python
+from bybit_bulk_downloader.downloader import BybitBulkDownloader
+
+downloader = BybitBulkDownloader(data_type='spot_index')
+downloader.run_download()
+```
+### Download all trading data
+
+```python
+from bybit_bulk_downloader.downloader import BybitBulkDownloader
+
+downloader = BybitBulkDownloader(data_type='trading')
+downloader.run_download()
+```
+
+## pytest
+
+```bash
+python -m pytest
+```
+
+## Available data types
+✅: Implemented and tested. ❌: Not available on bybit.
+
+### by data_type
+
+| data_type           | spot | futures   |
+| :------------------ | :--: | :--: |
+| kline_for_metatrader4           | ✅   | ❌   |
+| premium_index           | ❌   | ✅   |
+| spot_index           | ✅   | ❌   |
+| trading | ❌   | ✅   |
+
+## If you want to report a bug or request a feature
+Please create an issue on this repository!
+
+## Disclaimer
+This project is for educational purposes only. You should not construe any such information or other material as legal,
+tax, investment, financial, or other advice. Nothing contained here constitutes a solicitation, recommendation,
+endorsement, or offer by me or any third party service provider to buy or sell any securities or other financial
+instruments in this or in any other jurisdiction in which such solicitation or offer would be unlawful under the
+securities laws of such jurisdiction.
+
+Under no circumstances will I be held responsible or liable in any way for any claims, damages, losses, expenses, costs,
+or liabilities whatsoever, including, without limitation, any direct or indirect damages for loss of profits.
diff --git a/bybit_bulk_downloader/__init__.py b/bybit_bulk_downloader/__init__.py
@@ -1,3 +1,4 @@
 """
 bybit bulk downloader: Python library to efficiently and concurrently download historical data files from Bybit. Supports all asset types (spot, USDT-M, COIN-M, options) and all data frequencies.
 """
+import bybit_bulk_downloader.downloader
diff --git a/bybit_bulk_downloader/downloader.py b/bybit_bulk_downloader/downloader.py
@@ -0,0 +1,122 @@
+"""
+bybit_bulk_downloader
+"""
+import gzip
+# import standard libraries
+import os
+import shutil
+from concurrent.futures import ThreadPoolExecutor
+
+# import third-party libraries
+import requests
+from bs4 import BeautifulSoup
+from rich import print
+from rich.progress import track
+
+
+class BybitBulkDownloader:
+    _CHUNK_SIZE = 20
+    _BYBIT_DATA_DOWNLOAD_BASE_URL = "https://public.bybit.com/"
+    _DATA_TYPE = ("kline_for_metatrader4", "premium_index", "spot_index", "trading")
+
+    def __init__(self, destination_dir=".", data_type="trading"):
+        """
+        :param destination_dir: Directory to save the downloaded data.
+        :param data_type: Data type to download. Available data types are: "kline_for_metatrader4", "premium_index", "spot_index", "trading".
+        """
+        self._destination_dir = destination_dir
+        self._data_type = data_type
+
+    def _get_url_from_bybit(self):
+        """
+        Get the URL of the data to download from Bybit.
+        :return: list of URLs to download.
+        """
+        url = "https://public.bybit.com/" + self._data_type + "/"
+        response = requests.get(url)
+        soup = BeautifulSoup(response.text, "html.parser")
+        symbol_list = []
+        for link in soup.find_all("a"):
+            link_sym = link.get("href")
+            if self._data_type == "kline_for_metatrader4":
+                soup_year = BeautifulSoup(
+                    requests.get(url + link.get("href")).text, "html.parser"
+                )
+                for link_year in soup_year.find_all("a"):
+                    link_sym += link_year.get("href")
+                    symbol_list.append(link_sym)
+            else:
+                symbol_list.append(link_sym)
+        download_list = []
+        for sym in track(symbol_list, description="Listing files"):
+            soup_sym = BeautifulSoup(requests.get(url + sym).text, "html.parser")
+            for link in soup_sym.find_all("a"):
+                download_list.append(url + sym + link.get("href"))
+
+        return download_list
+
+    @staticmethod
+    def make_chunks(lst, n) -> list:
+        """
+        Make chunks
+        :param lst: Raw list
+        :param n: size of chunk
+        :return: list of chunks
+        """
+        return [lst[i : i + n] for i in range(0, len(lst), n)]
+
+    def _download(self, url):
+        """
+        Execute the download.
+        :param url: URL
+        :return: None
+        """
+        print(f"Downloading: {url}")
+        prefix_start = 3
+        prefix_end = 6
+        if self._data_type == "kline_for_metatrader4":
+            prefix_end += 1
+        # Create the destination directory if it does not exist
+        parts = url.split("/")
+        parts.insert(3, "bybit_data")
+        prefix = "/".join(parts[prefix_start:prefix_end])
+
+        # Download the file
+        filepath = os.path.join(
+            str(self._destination_dir) + "/" + "/".join(parts[prefix_start:])
+        )
+        filedir = os.path.dirname(filepath)
+        # if not exists, create the directory
+        if not os.path.exists(filedir):
+            os.makedirs(filedir)
+
+        print(f"[green]Downloading: {filepath}[/green]")
+        response = requests.get(url, filepath)
+        with open(filepath, "wb") as file:
+            for chunk in response.iter_content(chunk_size=8192):
+                file.write(chunk)
+
+        # Decompress the file
+        print(f"[green]Unzipped: {filepath}[/green]")
+        with gzip.open(filepath, mode="rb") as gzip_file:
+            with open(filepath.replace(".gz", ""), mode="wb") as decompressed_file:
+                shutil.copyfileobj(gzip_file, decompressed_file)
+
+        # Delete the compressed file
+        os.remove(filepath)
+        print(f"[green]Deleted: {filepath}[/green]")
+
+    def run_download(self):
+        """
+        Execute download concurrently.
+        :return: None
+        """
+        print(
+            f"[bold blue]Downloading {self._data_type} data from Bybit...[/bold blue]"
+        )
+        for prefix_chunk in track(
+            self.make_chunks(self._get_url_from_bybit(), self._CHUNK_SIZE),
+            description="Downloading",
+        ):
+            with ThreadPoolExecutor() as executor:
+                executor.map(self._download, prefix_chunk)
diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
     name="bybit-bulk-downloader",
     version="1.0.0",
     description=" Python library to efficiently and concurrently download historical data files from Binance. Supports all asset types (spot, USDT-M, COIN-M, options) and all data frequencies.",
-    install_requires=["requests", "rich", "pytest"],
+    install_requires=["requests", "rich", "pytest", "bs4"],
     author="aoki-h-jp",
     author_email="[email protected]",
     license="MIT",

diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/test_download.py b/tests/test_download.py
@@ -0,0 +1,88 @@
+# import standard libraries
+import os
+
+# import third party libraries
+import pytest
+
+# import my libraries
+from bybit_bulk_downloader.downloader import BybitBulkDownloader
+
+BYBIT_DATA = "bybit_data"
+
+
+def dynamic_test_params():
+    """
+    Generate params for tests
+    :return:
+    """
+    for data_type in BybitBulkDownloader._DATA_TYPE:
+        yield pytest.param(data_type)
+
+
+@pytest.mark.parametrize("data_type", dynamic_test_params())
+def test_download(tmpdir, data_type):
+    """
+    Test download
+    :param tmpdir:
+    :param data_type: data type
+    :return:
+    """
+    downloader = BybitBulkDownloader(
+        destination_dir=tmpdir,
+        data_type=data_type,
+    )
+    if data_type == "kline_for_metatrader4":
+        single_download_url = "https://public.bybit.com/kline_for_metatrader4/ADAUSDT/2022/ADAUSDT_15_2022-09-01_2022-09-30.csv.gz"
+        downloader._download(single_download_url)
+        # If exists csv file on destination dir, test is passed.
+        assert os.path.exists(
+            os.path.join(
+                tmpdir,
+                BYBIT_DATA,
+                "kline_for_metatrader4/ADAUSDT/2022/ADAUSDT_15_2022-09-01_2022-09-30.csv",
+            )
+        )
+
+    elif data_type == "premium_index":
+        single_download_url = "https://public.bybit.com/premium_index/ADAUSD/ADAUSD2022-03-24_premium_index.csv.gz"
+        downloader._download(single_download_url)
+        # If exists csv file on destination dir, test is passed.
+        assert os.path.exists(
+            os.path.join(
+                tmpdir,
+                BYBIT_DATA,
+                "premium_index/ADAUSD/ADAUSD2022-03-24_premium_index.csv",
+            )
+        )
+
+    elif data_type == "spot_index":
+        single_download_url = "https://public.bybit.com/spot_index/ADAUSD/ADAUSD2022-03-24_index_price.csv.gz"
+        downloader._download(single_download_url)
+        # If exists csv file on destination dir, test is passed.
+        assert os.path.exists(
+            os.path.join(
+                tmpdir, BYBIT_DATA, "spot_index/ADAUSD/ADAUSD2022-03-24_index_price.csv"
+            )
+        )
+
+    elif data_type == "trading":
+        single_download_url = "https://public.bybit.com/trading/10000LADYSUSDT/10000LADYSUSDT2023-05-11.csv.gz"
+        downloader._download(single_download_url)
+        # If exists csv file on destination dir, test is passed.
+        print(
+            os.path.join(
+                tmpdir,
+                BYBIT_DATA,
+                "trading/10000LADYSUSDT/10000LADYSUSDT2023-05-11.csv",
+            )
+        )
+        assert os.path.exists(
+            os.path.join(
+                tmpdir,
+                BYBIT_DATA,
+                "trading/10000LADYSUSDT/10000LADYSUSDT2023-05-11.csv",
+            )
+        )
+
+    else:
+        raise ValueError("Invalid data type: {}".format(data_type))