Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add deduplicate_pairs_by_volume() #183

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
- Add: `wrangle.normalise_volume` to deal with different volume formats of Uniswap v2 and v3
- Add: Support for Coingecko data loading and metadata cross referencing. See `tradingstrategy.alternative_data.coingecko`.
- Add: `tradingstrategy.alternative_data.coingecko.categorise_pairs()` to tag trading pair data with their CoinGecko category
-
- Add: `deduplicate_pairs_by_volume()` to make it easier to construct trading pair baskets from open-ended universes

# 0.24.3

- Fix: Allow to run without Jupyter notebook/IPython installed. Make sure you use `Client.create_live_client()` instead of `Client.create_jupyter_client()`
Expand Down
83 changes: 83 additions & 0 deletions scripts/meme-token-basket.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Create a basket of meme tokens on Ethereum mainnet.

- Use Coingecko labelling

- Cross-reference to Trading Strategy data

- Build a basket of available categorised (meme) tokens on Ethereum mainnet
"""

import logging
import os
import sys

from tradingstrategy.alternative_data.coingecko import CoingeckoUniverse, categorise_pairs
from tradingstrategy.chain import ChainId
from tradingstrategy.client import Client
from tradingstrategy.pair import PandasPairUniverse
from tradingstrategy.utils.token_filter import deduplicate_pairs_by_volume


def main():

logging.basicConfig(handlers=[logging.StreamHandler(sys.stdout)], level=logging.INFO)
logger = logging.getLogger(__name__)

chain_id = ChainId.ethereum
category = "Meme"
client = Client.create_live_client(api_key=os.environ['TRADING_STRATEGY_API_KEY'])

coingecko_universe = CoingeckoUniverse.load()
logger.info("Coingecko universe is %s", coingecko_universe)

pairs_df = client.fetch_pair_universe().to_pandas()
category_df = categorise_pairs(coingecko_universe, pairs_df)

# Get all trading pairs that are memecoin, across all coingecko data
mask = category_df["category"] == category
meme_pair_ids = category_df[mask]["pair_id"]

logger.info("Memecoin pairs across all chain %s", len(meme_pair_ids))

# From these pair ids, see what trading pairs we have on Ethereum mainnet
pairs_df = pairs_df[pairs_df["pair_id"].isin(meme_pair_ids) & (pairs_df["chain_id"] == chain_id.value)]
logger.info("Ethereum filtered memecoins %s", len(pairs_df))

pairs_universe = PandasPairUniverse(pairs_df)

logger.info("Example pairs:")
for pair in list(pairs_universe.iterate_pairs())[0:10]:
logger.info(" Pair: %s", pair)
# SHIB - WETH (0x811beed0119b4afce20d2583eb608c6f7af1954f) at exchange 1 on ethereum>
# SHIB - USDT (0x773dd321873fe70553acc295b1b49a104d968cc8) at exchange 1 on ethereum>
# LEASH - WETH (0x874376be8231dad99aabf9ef0767b3cc054c60ee) at exchange 1 on ethereum>
# LEASH - DAI (0x761d5dca312484036de12ba22b660a2e5b1aa211) at exchange 1 on ethereum>

# Deduplicate trading pairs

# - Choose the best pair with the best volume
pairs_df = deduplicate_pairs_by_volume(pairs_df)
pairs_universe = PandasPairUniverse(pairs_df)

logger.info("Example of deduplicated pairs:")
for pair in list(pairs_universe.iterate_pairs())[0:10]:
logger.info(" Pair: %s", pair)
# INFO:__main__: Pair: <Pair #37836 SHIB - WETH (0x24d3dd4a62e29770cf98810b09f89d3a90279e7a) at exchange 22 on ethereum>
# INFO:__main__:Example of deduplicated pairs:
# INFO:__main__: Pair: <Pair #3018988 PEPE - WETH (0x11950d141ecb863f01007add7d1a342041227b58) at exchange 3681 on ethereum>
# INFO:__main__: Pair: <Pair #3047249 TURBO - WETH (0x8107fca5494375fc743a9fc4d4844353a1af3d94) at exchange 3681 on ethereum>
# INFO:__main__: Pair: <Pair #3842242 Neiro - WETH (0x15153da0e9e13cfc167b3d417d3721bf545479bb) at exchange 3681 on ethereum>
# INFO:__main__: Pair: <Pair #3376429 MEME - WETH (0x70cf99553471fe6c0d513ebfac8acc55ba02ab7b) at exchange 3681 on ethereum>

logger.info(
"Total %d pairs to trade on %s for category %s",
len(pairs_df),
chain_id.name,
category,
)

print("All ok")


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions tradingstrategy/alternative_data/coingecko.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,8 @@ def categorise_pairs(

See also :py:attr:`SAMPLE_CATEGORIES` for soe example categories.

See also another example in :py:func:`tradingstrategy.utils.token_filter.deduplicate_pairs_by_volume`.

:param coingecko_universe:
Coingecko data bundle.

Expand Down
91 changes: 91 additions & 0 deletions tradingstrategy/utils/token_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,3 +634,94 @@ def add_base_quote_address_columns(pairs_df: pd.DataFrame) -> pd.DataFrame:
pairs_df["base_token_address"] = np.where(token0_is_base_token_mask, pairs_df["token0_address"], pairs_df["token1_address"])
pairs_df["quote_token_address"] = np.where(~token0_is_base_token_mask, pairs_df["token0_address"], pairs_df["token1_address"])
return pairs_df


def deduplicate_pairs_by_volume(pairs_df: pd.DataFrame) -> pd.DataFrame:
"""Deduplicate trading pairs.

- For each base token, we might have several trading pairs with different quote tokens (WETH, USDC)
and different DEXes (uniswap-v2, uniswap-v3)

- In this function, we deduplicate the incoming trading pairs so that we pick one with the best volume
(volume 24h USD, whatever was recorded at the time of creating the pairs dataset).

- Note that a smarter method of doing this is to check trading fee + liquidity,
but for that we may need to download the liquidity universe - this method
using "some past" record of volume is simpler

Example:

.. code-block:: python

chain_id = ChainId.ethereum
category = "Meme"
client = Client.create_live_client(api_key=os.environ['TRADING_STRATEGY_API_KEY'])

coingecko_universe = CoingeckoUniverse.load()
logger.info("Coingecko universe is %s", coingecko_universe)

pairs_df = client.fetch_pair_universe().to_pandas()
category_df = categorise_pairs(coingecko_universe, pairs_df)

# Get all trading pairs that are memecoin, across all coingecko data
mask = category_df["category"] == category
meme_pair_ids = category_df[mask]["pair_id"]

logger.info("Memecoin pairs across all chain %s", len(meme_pair_ids))

# From these pair ids, see what trading pairs we have on Ethereum mainnet
pairs_df = pairs_df[pairs_df["pair_id"].isin(meme_pair_ids) & (pairs_df["chain_id"] == chain_id.value)]
logger.info("Ethereum filtered memecoins %s", len(pairs_df))

pairs_universe = PandasPairUniverse(pairs_df)

logger.info("Example pairs:")
for pair in list(pairs_universe.iterate_pairs())[0:10]:
logger.info(" Pair: %s", pair)
# SHIB - WETH (0x811beed0119b4afce20d2583eb608c6f7af1954f) at exchange 1 on ethereum>
# SHIB - USDT (0x773dd321873fe70553acc295b1b49a104d968cc8) at exchange 1 on ethereum>
# LEASH - WETH (0x874376be8231dad99aabf9ef0767b3cc054c60ee) at exchange 1 on ethereum>
# LEASH - DAI (0x761d5dca312484036de12ba22b660a2e5b1aa211) at exchange 1 on ethereum>

# Deduplicate trading pairs

# - Choose the best pair with the best volume
pairs_df = deduplicate_pairs_by_volume(pairs_df)
pairs_universe = PandasPairUniverse(pairs_df)

logger.info("Example of deduplicated pairs:")
for pair in list(pairs_universe.iterate_pairs())[0:10]:
logger.info(" Pair: %s", pair)
# INFO:__main__: Pair: <Pair #37836 SHIB - WETH (0x24d3dd4a62e29770cf98810b09f89d3a90279e7a) at exchange 22 on ethereum>
# INFO:__main__:Example of deduplicated pairs:
# INFO:__main__: Pair: <Pair #3018988 PEPE - WETH (0x11950d141ecb863f01007add7d1a342041227b58) at exchange 3681 on ethereum>
# INFO:__main__: Pair: <Pair #3047249 TURBO - WETH (0x8107fca5494375fc743a9fc4d4844353a1af3d94) at exchange 3681 on ethereum>
# INFO:__main__: Pair: <Pair #3842242 Neiro - WETH (0x15153da0e9e13cfc167b3d417d3721bf545479bb) at exchange 3681 on ethereum>
# INFO:__main__: Pair: <Pair #3376429 MEME - WETH (0x70cf99553471fe6c0d513ebfac8acc55ba02ab7b) at exchange 3681 on ethereum>

logger.info(
"Total %d pairs to trade on %s for category %s",
len(pairs_df),
chain_id.name,
category,
)

"""

# Normalise volume
pairs_df["volume"] = pairs_df["buy_volume_30d"] + pairs_df["sell_volume_30d"]

# We sort by volume and then filter out
pairs_df = pairs_df.sort_values(by="volume", ascending=False)

included_set = set()

def _filter_by_base(row: pd.Series):
base_token_symbol = row["base_token_symbol"]
if base_token_symbol not in included_set:
included_set.add(base_token_symbol)
return True
return False

pairs_df = pairs_df[pairs_df.apply(_filter_by_base, axis=1)]
return pairs_df
Loading