Skip to content

Commit

Permalink
Merge pull request #11 from aoki-h-jp/feature/1.0.1/example-liquidation
Browse files Browse the repository at this point in the history
Feature/1.0.1/example liquidation
  • Loading branch information
aoki-h-jp authored Sep 6, 2023
2 parents f307044 + 344587d commit feadb22
Show file tree
Hide file tree
Showing 11 changed files with 157 additions and 133 deletions.
9 changes: 3 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,11 @@ Download the following data from binance (USDT-M), bybit (linear) in one go.
| liquidationSnapshot |||

## /feature
### preprocessing
### preprocessing.py
Perform preprocessing for downloaded data.

### information_correlation
Calculate and Visualize information correlation (IC). High IC (>0.05) indicates an important feature.

### engineering
### engineering.py
Perform feature engineering for preprocessed data.

### evaluation
### evaluation.py
Evaluate the performance of the engineered features.
1 change: 1 addition & 0 deletions crypto_features/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
crypto_features
"""
import crypto_features.download
import crypto_features.example
import crypto_features.feature
Empty file.
5 changes: 5 additions & 0 deletions crypto_features/example/download_fundingrate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from crypto_features.download.download_fundingrate import (
BinanceFundingRateDownload, BybitFundingRateDownload)

BinanceFundingRateDownload().run_download()
BybitFundingRateDownload().run_download()
6 changes: 6 additions & 0 deletions crypto_features/example/download_klines.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from crypto_features.download.download_klines import (BinanceKlinesDownload,
BybitKlinesDownload)

# download klines data from Binance and Bybit
BinanceKlinesDownload().run_download()
BybitKlinesDownload().run_download()
4 changes: 4 additions & 0 deletions crypto_features/example/download_liquidationsnapshot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from crypto_features.download.download_liquidationsnapshot import \
BinanceLiquidationSnapshotDownload

BinanceLiquidationSnapshotDownload().run_download()
54 changes: 34 additions & 20 deletions crypto_features/feature/engineering.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
"""
Feature engineering module
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from .information_correlation import InformationCorrelation


class FeatureEngineering:
def __init__(self, **kwargs):
Expand Down Expand Up @@ -194,16 +191,18 @@ def count_liquidation(self, count_minutes: int) -> pd.Series:
:param count_minutes: minutes to count liquidation
"""

def count_buy_sell_in_last_1min(current_time, df):
subset = df[
(df.index < current_time)
& (df.index >= current_time - pd.Timedelta(minutes=count_minutes))
]
buy_count = subset[subset["side"] == "BUY"].shape[0]
sell_count = subset[subset["side"] == "SELL"].shape[0]
return buy_count - sell_count

return self._liquidationSnapshot.index.to_series().apply(
lambda x: len(
self._liquidationSnapshot[
(self._liquidationSnapshot.index < x)
& (
self._liquidationSnapshot.index
> x - pd.Timedelta(minutes=count_minutes)
)
]
)
lambda x: count_buy_sell_in_last_1min(x, self._liquidationSnapshot)
)

def count_quote_liquidation(self, count_minutes: int) -> pd.Series:
Expand All @@ -216,15 +215,30 @@ def count_quote_liquidation(self, count_minutes: int) -> pd.Series:
self._liquidationSnapshot["price"]
* self._liquidationSnapshot["original_quantity"]
)
return self._liquidationSnapshot.index.to_series().apply(
lambda x: self._liquidationSnapshot[
(self._liquidationSnapshot.index < x)
& (
self._liquidationSnapshot.index
> x - pd.Timedelta(minutes=count_minutes)
)
]["amount"].sum()
times = self._liquidationSnapshot.index.values
amounts = self._liquidationSnapshot["amount"].values
sides = self._liquidationSnapshot["side"].values

def np_adjusted_sum_amount_in_last_1min(idx):
current_time = times[idx]
mask = (times < current_time) & (
times >= current_time - pd.Timedelta(minutes=count_minutes)
)
adjusted_amounts = np.where(
sides[mask] == "BUY", amounts[mask], -amounts[mask]
)
return adjusted_amounts.sum()

se = pd.Series(
np.array(
[
np_adjusted_sum_amount_in_last_1min(i)
for i in range(len(self._liquidationSnapshot))
]
),
index=times,
)
return se

def mean_liquidation(self, count_minutes: int) -> pd.Series:
"""
Expand Down
90 changes: 88 additions & 2 deletions crypto_features/feature/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
"""
Evaluation of the features
"""
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from .information_correlation import InformationCorrelation
from rich import print
from scipy.stats import pearsonr
from sklearn.linear_model import LinearRegression


class EvaluationFeature:
Expand Down Expand Up @@ -52,3 +56,85 @@ def visualize_histogram(self, return_minutes: int):
plt.tight_layout()
plt.savefig(f"feature_vs_return_{return_minutes}.png")
plt.close()

def format_array(self, return_minutes=1):
"""
Format the array.
:param return_minutes: The return minutes.
:return: formatted feature and return array.
"""
klines = self._klines
feature = self._feature

close_chg_pct_header = f"close_chg_pct_after_{return_minutes}min"
klines["close"] = klines["close"].astype(float)
klines[close_chg_pct_header] = klines["close"].pct_change(
return_minutes, fill_method="bfill"
)
klines[close_chg_pct_header] = klines[close_chg_pct_header].shift(
-return_minutes
)
klines[close_chg_pct_header] = klines[close_chg_pct_header].fillna(0)
klines[close_chg_pct_header] = klines[close_chg_pct_header].replace(
[np.inf, -np.inf, np.nan, -np.nan], 0
)
klines[close_chg_pct_header] = klines[close_chg_pct_header].astype(float)
klines[close_chg_pct_header] = klines[close_chg_pct_header].round(4)

feature_arr = feature[feature.index.isin(klines.index)].values
return_arr = klines[klines.index.isin(feature.index)][
close_chg_pct_header
].values

assert len(feature_arr) == len(
return_arr
), f"len(feature_arr)={len(feature_arr)}, len(return_arr)={len(return_arr)}"

return feature_arr, return_arr

def information_correlation(self, return_minutes=1, **kwargs):
"""
Calculate and visualize the information correlation.
:param return_minutes: The return minutes.
"""
if not os.path.exists("information_correlation"):
os.mkdir("information_correlation")

klines = self._klines
feature = self._feature

feature_arr, klines_arr = InformationCorrelation.format_array(
klines, feature, return_minutes
)
print("[green] Start calculating the information correlation... [/green]")

# Pearson's correlation coefficient
rho, pval = pearsonr(feature_arr, klines_arr)
print(f"rho={rho}, pval={pval}")

lr = LinearRegression()
lr.fit(feature_arr.reshape(-1, 1), klines_arr.reshape(-1, 1))
print(f"coef={lr.coef_[0][0]}, intercept={lr.intercept_[0]}")

# Visualize
plt.scatter(feature_arr, klines_arr * 100)
plt.plot(
feature_arr,
lr.predict(feature_arr.reshape(-1, 1)) * 100,
color="red",
linewidth=1,
linestyle="-.",
)
plt.xlabel(feature.name)
plt.ylabel(f"close_chg_pct_after_{return_minutes}min [%]")
plt.title(
f"rho={round(rho, 3)}, pval={round(pval, 3)}\ncoef={round(lr.coef_[0][0], 3)}, intercept={round(lr.intercept_[0], 3)}\n{feature.name} vs close_chg_pct_after_{return_minutes}min"
)
plt.tight_layout()
save_dir = f"information_correlation/{feature.name}_vs_close_chg_pct_after_{return_minutes}min.png"
if kwargs.get("save_name", False):
save_dir = save_dir.replace(".png", f"_{kwargs['save_name']}.png")
plt.savefig(save_dir)
print(f"Saved: {save_dir}")
plt.close()
104 changes: 0 additions & 104 deletions crypto_features/feature/information_correlation.py

This file was deleted.

15 changes: 15 additions & 0 deletions crypto_features/feature/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,15 @@ def _load_liquidationsnapshot_data(self, symbol) -> pd.DataFrame:

return df

def load_klines_data(self):
self._load_klines_data()

def load_fundingrate_data(self):
self._load_fundingrate_data()

def load_liquidationsnapshot_data(self):
self._load_liquidationsnapshot_data()


class PreprocessingBybit:
"""
Expand Down Expand Up @@ -258,3 +267,9 @@ def _load_fundingrate_data(self, symbol) -> pd.DataFrame:
df.set_index("timestamp_open", inplace=True)

return df

def load_klines_data(self):
self._load_klines_data()

def load_fundingrate_data(self):
self._load_fundingrate_data()
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="crypto-features",
version="1.0.0",
version="1.0.1",
description="Analyse, calculate and store features from historical and raw data of cryptocurrency.",
install_requires=[],
author="aoki-h-jp",
Expand Down

0 comments on commit feadb22

Please sign in to comment.