Skip to content

Commit

Permalink
Merge pull request #2 from aoki-h-jp/feature/1.0.0/downloading
Browse files Browse the repository at this point in the history
Feature/1.0.0/downloading
  • Loading branch information
aoki-h-jp authored Aug 25, 2023
2 parents 21765be + 5dda4cf commit 6dc7836
Show file tree
Hide file tree
Showing 8 changed files with 292 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install pipenv
pip install git+https://github.com/aoki-h-jp/binance-bulk-downloader
pip install git+https://github.com/aoki-h-jp/bybit-bulk-downloader
pipenv install --dev
- name: Run pytest
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ MANIFEST
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
# Unit tests / coverage reports
htmlcov/
.tox/
.nox/
Expand Down
78 changes: 78 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,80 @@
# bybit-bulk-downloader
[![Python 3.11](https://img.shields.io/badge/python-3.11-blue.svg)](https://www.python.org/downloads/release/python-3110//)
[![Format code](https://github.com/aoki-h-jp/bybit-bulk-downloader/actions/workflows/Formatter.yml/badge.svg)](https://github.com/aoki-h-jp/bybit-bulk-downloader/actions/workflows/Formatter.yml)
[![Run pytest on all branches](https://github.com/aoki-h-jp/bybit-bulk-downloader/actions/workflows/pytest.yaml/badge.svg)](https://github.com/aoki-h-jp/bybit-bulk-downloader/actions/workflows/pytest.yaml)

## ## Python library for bulk downloading bybit historical data
A Python library to efficiently and concurrently download historical data files from bybit. Supports all asset types (spot, USDT Perpetual, Inverse Perpetual & Inverse Futures).

## Installation

```bash
pip install git+https://github.com/aoki-h-jp/bybit-bulk-downloader
```

## Usage
### Download all kline_for_metatrader4 data

```python
from bybit_bulk_downloader.downloader import BybitBulkDownloader

downloader = BybitBulkDownloader(data_type='kline_for_metatrader4')
downloader.run_download()
```

### Download all premium_index data

```python
from bybit_bulk_downloader.downloader import BybitBulkDownloader

downloader = BybitBulkDownloader(data_type='premium_index')
downloader.run_download()
```

### Download all spot_index data

```python
from bybit_bulk_downloader.downloader import BybitBulkDownloader

downloader = BybitBulkDownloader(data_type='spot_index')
downloader.run_download()
```
### Download all trading data

```python
from bybit_bulk_downloader.downloader import BybitBulkDownloader

downloader = BybitBulkDownloader(data_type='trading')
downloader.run_download()
```

## pytest

```bash
python -m pytest
```

## Available data types
✅: Implemented and tested. ❌: Not available on bybit.

### by data_type

| data_type | spot | futures |
| :------------------ | :--: | :--: |
| kline_for_metatrader4 |||
| premium_index |||
| spot_index |||
| trading |||

## If you want to report a bug or request a feature
Please create an issue on this repository!

## Disclaimer
This project is for educational purposes only. You should not construe any such information or other material as legal,
tax, investment, financial, or other advice. Nothing contained here constitutes a solicitation, recommendation,
endorsement, or offer by me or any third party service provider to buy or sell any securities or other financial
instruments in this or in any other jurisdiction in which such solicitation or offer would be unlawful under the
securities laws of such jurisdiction.

Under no circumstances will I be held responsible or liable in any way for any claims, damages, losses, expenses, costs,
or liabilities whatsoever, including, without limitation, any direct or indirect damages for loss of profits.
1 change: 1 addition & 0 deletions bybit_bulk_downloader/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""
bybit bulk downloader: Python library to efficiently and concurrently download historical data files from Bybit. Supports all asset types (spot, USDT-M, COIN-M, options) and all data frequencies.
"""
import bybit_bulk_downloader.downloader
122 changes: 122 additions & 0 deletions bybit_bulk_downloader/downloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
"""
bybit_bulk_downloader
"""
import gzip
# import standard libraries
import os
import shutil
from concurrent.futures import ThreadPoolExecutor

# import third-party libraries
import requests
from bs4 import BeautifulSoup
from rich import print
from rich.progress import track


class BybitBulkDownloader:
_CHUNK_SIZE = 20
_BYBIT_DATA_DOWNLOAD_BASE_URL = "https://public.bybit.com/"
_DATA_TYPE = ("kline_for_metatrader4", "premium_index", "spot_index", "trading")

def __init__(self, destination_dir=".", data_type="trading"):
"""
:param destination_dir: Directory to save the downloaded data.
:param data_type: Data type to download. Available data types are: "kline_for_metatrader4", "premium_index", "spot_index", "trading".
"""
self._destination_dir = destination_dir
self._data_type = data_type

def _get_url_from_bybit(self):
"""
Get the URL of the data to download from Bybit.
:return: list of URLs to download.
"""
url = "https://public.bybit.com/" + self._data_type + "/"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
symbol_list = []
for link in soup.find_all("a"):
link_sym = link.get("href")
if self._data_type == "kline_for_metatrader4":
soup_year = BeautifulSoup(
requests.get(url + link.get("href")).text, "html.parser"
)
for link_year in soup_year.find_all("a"):
link_sym += link_year.get("href")
symbol_list.append(link_sym)
else:
symbol_list.append(link_sym)
download_list = []
for sym in track(symbol_list, description="Listing files"):
soup_sym = BeautifulSoup(requests.get(url + sym).text, "html.parser")
for link in soup_sym.find_all("a"):
download_list.append(url + sym + link.get("href"))

return download_list

@staticmethod
def make_chunks(lst, n) -> list:
"""
Make chunks
:param lst: Raw list
:param n: size of chunk
:return: list of chunks
"""
return [lst[i : i + n] for i in range(0, len(lst), n)]

def _download(self, url):
"""
Execute the download.
:param url: URL
:return: None
"""
print(f"Downloading: {url}")
prefix_start = 3
prefix_end = 6
if self._data_type == "kline_for_metatrader4":
prefix_end += 1
# Create the destination directory if it does not exist
parts = url.split("/")
parts.insert(3, "bybit_data")
prefix = "/".join(parts[prefix_start:prefix_end])

# Download the file
filepath = os.path.join(
str(self._destination_dir) + "/" + "/".join(parts[prefix_start:])
)
filedir = os.path.dirname(filepath)
# if not exists, create the directory
if not os.path.exists(filedir):
os.makedirs(filedir)

print(f"[green]Downloading: {filepath}[/green]")
response = requests.get(url, filepath)
with open(filepath, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)

# Decompress the file
print(f"[green]Unzipped: {filepath}[/green]")
with gzip.open(filepath, mode="rb") as gzip_file:
with open(filepath.replace(".gz", ""), mode="wb") as decompressed_file:
shutil.copyfileobj(gzip_file, decompressed_file)

# Delete the compressed file
os.remove(filepath)
print(f"[green]Deleted: {filepath}[/green]")

def run_download(self):
"""
Execute download concurrently.
:return: None
"""
print(
f"[bold blue]Downloading {self._data_type} data from Bybit...[/bold blue]"
)
for prefix_chunk in track(
self.make_chunks(self._get_url_from_bybit(), self._CHUNK_SIZE),
description="Downloading",
):
with ThreadPoolExecutor() as executor:
executor.map(self._download, prefix_chunk)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
name="bybit-bulk-downloader",
version="1.0.0",
description=" Python library to efficiently and concurrently download historical data files from Binance. Supports all asset types (spot, USDT-M, COIN-M, options) and all data frequencies.",
install_requires=["requests", "rich", "pytest"],
install_requires=["requests", "rich", "pytest", "bs4"],
author="aoki-h-jp",
author_email="[email protected]",
license="MIT",
Expand Down
Empty file added tests/__init__.py
Empty file.
88 changes: 88 additions & 0 deletions tests/test_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# import standard libraries
import os

# import third party libraries
import pytest

# import my libraries
from bybit_bulk_downloader.downloader import BybitBulkDownloader

BYBIT_DATA = "bybit_data"


def dynamic_test_params():
"""
Generate params for tests
:return:
"""
for data_type in BybitBulkDownloader._DATA_TYPE:
yield pytest.param(data_type)


@pytest.mark.parametrize("data_type", dynamic_test_params())
def test_download(tmpdir, data_type):
"""
Test download
:param tmpdir:
:param data_type: data type
:return:
"""
downloader = BybitBulkDownloader(
destination_dir=tmpdir,
data_type=data_type,
)
if data_type == "kline_for_metatrader4":
single_download_url = "https://public.bybit.com/kline_for_metatrader4/ADAUSDT/2022/ADAUSDT_15_2022-09-01_2022-09-30.csv.gz"
downloader._download(single_download_url)
# If exists csv file on destination dir, test is passed.
assert os.path.exists(
os.path.join(
tmpdir,
BYBIT_DATA,
"kline_for_metatrader4/ADAUSDT/2022/ADAUSDT_15_2022-09-01_2022-09-30.csv",
)
)

elif data_type == "premium_index":
single_download_url = "https://public.bybit.com/premium_index/ADAUSD/ADAUSD2022-03-24_premium_index.csv.gz"
downloader._download(single_download_url)
# If exists csv file on destination dir, test is passed.
assert os.path.exists(
os.path.join(
tmpdir,
BYBIT_DATA,
"premium_index/ADAUSD/ADAUSD2022-03-24_premium_index.csv",
)
)

elif data_type == "spot_index":
single_download_url = "https://public.bybit.com/spot_index/ADAUSD/ADAUSD2022-03-24_index_price.csv.gz"
downloader._download(single_download_url)
# If exists csv file on destination dir, test is passed.
assert os.path.exists(
os.path.join(
tmpdir, BYBIT_DATA, "spot_index/ADAUSD/ADAUSD2022-03-24_index_price.csv"
)
)

elif data_type == "trading":
single_download_url = "https://public.bybit.com/trading/10000LADYSUSDT/10000LADYSUSDT2023-05-11.csv.gz"
downloader._download(single_download_url)
# If exists csv file on destination dir, test is passed.
print(
os.path.join(
tmpdir,
BYBIT_DATA,
"trading/10000LADYSUSDT/10000LADYSUSDT2023-05-11.csv",
)
)
assert os.path.exists(
os.path.join(
tmpdir,
BYBIT_DATA,
"trading/10000LADYSUSDT/10000LADYSUSDT2023-05-11.csv",
)
)

else:
raise ValueError("Invalid data type: {}".format(data_type))

0 comments on commit 6dc7836

Please sign in to comment.