Skip to content

Commit

Permalink
Cuda fix (#1595)
Browse files Browse the repository at this point in the history
* tpb arg mismatch messing with CUDA, works now

* update readme to include cubit install instructions for PoW

* fix tests
  • Loading branch information
ifrit98 authored Nov 28, 2023
1 parent 2de7bb5 commit 8941ae0
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 42 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ or using python
import bittensor
```

#### CUDA
If you anticipate using PoW registration for subnets or the faucet (only available on staging), please install `cubit` as well for your version of python. You can find the Opentensor cubit implementation and instructions [here](https://github.com/opentensor/cubit).

For example with python 3.10:
```bash
pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp310-cp310-linux_x86_64.whl
```

# Wallets

Wallets are the core ownership and identity technology around which all functions on Bittensor are carried out. Bittensor wallets consists of a coldkey and hotkey where the coldkey may contain many hotkeys, while each hotkey can only belong to a single coldkey. Coldkeys store funds securely, and operate functions such as transfers and staking, while hotkeys are used for all online operations such as signing queries, running miners and validating.
Expand Down
2 changes: 1 addition & 1 deletion bittensor/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"update_interval": 50000,
"output_in_place": True,
"verbose": False,
"cuda": {"dev_id": [0], "use_cuda": False, "TPB": 256},
"cuda": {"dev_id": [0], "use_cuda": False, "tpb": 256},
},
"axon": {
"port": 8091,
Expand Down
20 changes: 10 additions & 10 deletions bittensor/commands/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ class PowRegisterCommand:
- --pow_register.cuda.use_cuda (bool): Enables the use of CUDA for GPU-accelerated PoW calculations. Requires a CUDA-compatible GPU.
- --pow_register.cuda.no_cuda (bool): Disables the use of CUDA, defaulting to CPU-based calculations.
- --pow_register.cuda.dev_id (int): Specifies the CUDA device ID, useful for systems with multiple CUDA-compatible GPUs.
- --pow_register.cuda.TPB (int): Sets the number of Threads Per Block for CUDA operations, affecting the GPU calculation dynamics.
- --pow_register.cuda.tpb (int): Sets the number of Threads Per Block for CUDA operations, affecting the GPU calculation dynamics.
The command also supports additional wallet and subtensor arguments, enabling further customization of the registration process.
Expand Down Expand Up @@ -189,7 +189,7 @@ def run(cli):
wallet=wallet,
netuid=cli.config.netuid,
prompt=not cli.config.no_prompt,
TPB=cli.config.pow_register.cuda.get("TPB", None),
tpb=cli.config.pow_register.cuda.get("tpb", None),
update_interval=cli.config.pow_register.get("update_interval", None),
num_processes=cli.config.pow_register.get("num_processes", None),
cuda=cli.config.pow_register.cuda.get(
Expand Down Expand Up @@ -281,10 +281,10 @@ def add_args(parser: argparse.ArgumentParser):
required=False,
)
register_parser.add_argument(
"--pow_register.cuda.TPB",
"--cuda.TPB",
"--pow_register.cuda.tpb",
"--cuda.tpb",
type=int,
default=defaults.pow_register.cuda.TPB,
default=defaults.pow_register.cuda.tpb,
help="""Set the number of Threads Per Block for CUDA.""",
required=False,
)
Expand Down Expand Up @@ -342,7 +342,7 @@ class RunFaucetCommand:
- --faucet.cuda.use_cuda (bool): Activates the use of CUDA for GPU acceleration in the PoW process, suitable for CUDA-compatible GPUs.
- --faucet.cuda.no_cuda (bool): Disables the use of CUDA, opting for CPU-based calculations.
- --faucet.cuda.dev_id (int[]): Allows selection of specific CUDA device IDs for the operation, useful in multi-GPU setups.
- --faucet.cuda.TPB (int): Determines the number of Threads Per Block for CUDA operations, affecting GPU calculation efficiency.
- --faucet.cuda.tpb (int): Determines the number of Threads Per Block for CUDA operations, affecting GPU calculation efficiency.
These options provide flexibility in configuring the PoW process according to the user's hardware capabilities and preferences.
Expand All @@ -364,7 +364,7 @@ def run(cli):
subtensor.run_faucet(
wallet=wallet,
prompt=not cli.config.no_prompt,
TPB=cli.config.pow_register.cuda.get("TPB", None),
tpb=cli.config.pow_register.cuda.get("tpb", None),
update_interval=cli.config.pow_register.get("update_interval", None),
num_processes=cli.config.pow_register.get("num_processes", None),
cuda=cli.config.pow_register.cuda.get(
Expand Down Expand Up @@ -449,10 +449,10 @@ def add_args(parser: argparse.ArgumentParser):
required=False,
)
run_faucet_parser.add_argument(
"--faucet.cuda.TPB",
"--cuda.TPB",
"--faucet.cuda.tpb",
"--cuda.tpb",
type=int,
default=defaults.pow_register.cuda.TPB,
default=defaults.pow_register.cuda.tpb,
help="""Set the number of Threads Per Block for CUDA.""",
required=False,
)
Expand Down
12 changes: 6 additions & 6 deletions bittensor/extrinsics/registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def register_extrinsic(
output_in_place: bool = True,
cuda: bool = False,
dev_id: Union[List[int], int] = 0,
TPB: int = 256,
tpb: int = 256,
num_processes: Optional[int] = None,
update_interval: Optional[int] = None,
log_verbose: bool = False,
Expand All @@ -61,7 +61,7 @@ def register_extrinsic(
If true, the wallet should be registered using CUDA device(s).
dev_id (Union[List[int], int]):
The CUDA device id to use, or a list of device ids.
TPB (int):
tpb (int):
The number of threads per block (CUDA).
num_processes (int):
The number of processes to use to register.
Expand Down Expand Up @@ -123,7 +123,7 @@ def register_extrinsic(
output_in_place,
cuda=cuda,
dev_id=dev_id,
TPB=TPB,
tpb=tpb,
num_processes=num_processes,
update_interval=update_interval,
log_verbose=log_verbose,
Expand Down Expand Up @@ -339,7 +339,7 @@ def run_faucet_extrinsic(
output_in_place: bool = True,
cuda: bool = False,
dev_id: Union[List[int], int] = 0,
TPB: int = 256,
tpb: int = 256,
num_processes: Optional[int] = None,
update_interval: Optional[int] = None,
log_verbose: bool = False,
Expand All @@ -362,7 +362,7 @@ def run_faucet_extrinsic(
If true, the wallet should be registered using CUDA device(s).
dev_id (Union[List[int], int]):
The CUDA device id to use, or a list of device ids.
TPB (int):
tpb (int):
The number of threads per block (CUDA).
num_processes (int):
The number of processes to use to register.
Expand Down Expand Up @@ -410,7 +410,7 @@ def run_faucet_extrinsic(
output_in_place,
cuda=cuda,
dev_id=dev_id,
TPB=TPB,
tpb=tpb,
num_processes=num_processes,
update_interval=update_interval,
log_verbose=log_verbose,
Expand Down
8 changes: 4 additions & 4 deletions bittensor/subtensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def register(
output_in_place: bool = True,
cuda: bool = False,
dev_id: Union[List[int], int] = 0,
TPB: int = 256,
tpb: int = 256,
num_processes: Optional[int] = None,
update_interval: Optional[int] = None,
log_verbose: bool = False,
Expand All @@ -478,7 +478,7 @@ def register(
output_in_place=output_in_place,
cuda=cuda,
dev_id=dev_id,
TPB=TPB,
tpb=tpb,
num_processes=num_processes,
update_interval=update_interval,
log_verbose=log_verbose,
Expand Down Expand Up @@ -512,7 +512,7 @@ def run_faucet(
output_in_place: bool = True,
cuda: bool = False,
dev_id: Union[List[int], int] = 0,
TPB: int = 256,
tpb: int = 256,
num_processes: Optional[int] = None,
update_interval: Optional[int] = None,
log_verbose: bool = False,
Expand All @@ -528,7 +528,7 @@ def run_faucet(
output_in_place=output_in_place,
cuda=cuda,
dev_id=dev_id,
TPB=TPB,
tpb=tpb,
num_processes=num_processes,
update_interval=update_interval,
log_verbose=log_verbose,
Expand Down
6 changes: 3 additions & 3 deletions bittensor/utils/_register_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
def solve_cuda(
nonce_start: np.int64,
update_interval: np.int64,
TPB: int,
tpb: int,
block_and_hotkey_hash_bytes: bytes,
difficulty: int,
limit: int,
Expand All @@ -26,7 +26,7 @@ def solve_cuda(
Starting nonce.
update_interval: int64
Number of nonces to solve before updating block information.
TPB: int
tpb: int
Threads per block.
block_and_hotkey_hash_bytes: bytes
Keccak(Bytes of the block hash + bytes of the hotkey) 64 bytes.
Expand Down Expand Up @@ -78,7 +78,7 @@ def _seal_meets_difficulty(seal: bytes, difficulty: int):
block_and_hotkey_hash_hex = binascii.hexlify(block_and_hotkey_hash_bytes)[:64]

solution = cubit.solve_cuda(
TPB,
tpb,
nonce_start,
update_interval,
upper_bytes,
Expand Down
28 changes: 14 additions & 14 deletions bittensor/utils/registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def run(self):

class _CUDASolver(_SolverBase):
dev_id: int
TPB: int
tpb: int

def __init__(
self,
Expand All @@ -225,7 +225,7 @@ def __init__(
check_block,
limit,
dev_id: int,
TPB: int,
tpb: int,
):
super().__init__(
proc_num,
Expand All @@ -241,7 +241,7 @@ def __init__(
limit,
)
self.dev_id = dev_id
self.TPB = TPB
self.tpb = tpb

def run(self):
block_number: int = 0 # dummy value
Expand Down Expand Up @@ -269,7 +269,7 @@ def run(self):
self.limit,
block_number,
self.dev_id,
self.TPB,
self.tpb,
)
if solution is not None:
self.solution_queue.put(solution)
Expand All @@ -282,7 +282,7 @@ def run(self):
pass

# increase nonce by number of nonces processed
nonce_start += self.update_interval * self.TPB
nonce_start += self.update_interval * self.tpb
nonce_start = nonce_start % nonce_limit


Expand All @@ -294,13 +294,13 @@ def _solve_for_nonce_block_cuda(
limit: int,
block_number: int,
dev_id: int,
TPB: int,
tpb: int,
) -> Optional[POWSolution]:
"""Tries to solve the POW on a CUDA device for a block of nonces (nonce_start, nonce_start + update_interval * TPB"""
"""Tries to solve the POW on a CUDA device for a block of nonces (nonce_start, nonce_start + update_interval * tpb"""
solution, seal = solve_cuda(
nonce_start,
update_interval,
TPB,
tpb,
block_and_hotkey_hash_bytes,
difficulty,
limit,
Expand Down Expand Up @@ -794,7 +794,7 @@ def _solve_for_difficulty_fast_cuda(
netuid: int,
output_in_place: bool = True,
update_interval: int = 50_000,
TPB: int = 512,
tpb: int = 512,
dev_id: Union[List[int], int] = 0,
n_samples: int = 10,
alpha_: float = 0.80,
Expand All @@ -813,7 +813,7 @@ def _solve_for_difficulty_fast_cuda(
If true, prints the output in place, otherwise prints to new lines
update_interval: int
The number of nonces to try before checking for more blocks
TPB: int
tpb: int
The number of threads per block. CUDA param that should match the GPU capability
dev_id: Union[List[int], int]
The CUDA device IDs to execute the registration on, either a single device or a list of devices
Expand Down Expand Up @@ -868,7 +868,7 @@ def _solve_for_difficulty_fast_cuda(
check_block,
limit,
dev_id[i],
TPB,
tpb,
)
for i in range(num_processes)
]
Expand Down Expand Up @@ -967,7 +967,7 @@ def _solve_for_difficulty_fast_cuda(
if num_time > 0 and time_since_last > 0.0:
# create EWMA of the hash_rate to make measure more robust

hash_rate_ = (num_time * TPB * update_interval) / time_since_last
hash_rate_ = (num_time * tpb * update_interval) / time_since_last
hash_rates.append(hash_rate_)
hash_rates.pop(0) # remove the 0th data point
curr_stats.hash_rate = sum(
Expand All @@ -987,7 +987,7 @@ def _solve_for_difficulty_fast_cuda(
curr_stats.time_spent = time_since_last
new_time_spent_total = time_now - start_time_perpetual
curr_stats.hash_rate_perpetual = (
curr_stats.rounds_total * (TPB * update_interval)
curr_stats.rounds_total * (tpb * update_interval)
) / new_time_spent_total
curr_stats.time_spent_total = new_time_spent_total

Expand Down Expand Up @@ -1071,7 +1071,7 @@ def create_pow(
netuid=netuid,
output_in_place=output_in_place,
dev_id=dev_id,
TPB=tpb,
tpb=tpb,
update_interval=update_interval,
log_verbose=log_verbose,
)
Expand Down
8 changes: 4 additions & 4 deletions tests/unit_tests/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,13 +639,13 @@ def test_multi_cuda_run_updates_nonce_start(self):
class MockException(Exception):
pass

TPB: int = 512
tpb: int = 512
update_interval: int = 70_000
nonce_limit: int = int(math.pow(2, 64)) - 1

mock_solver_self = MagicMock(
spec=_CUDASolver,
TPB=TPB,
tpb=tpb,
dev_id=0,
update_interval=update_interval,
stopEvent=MagicMock(is_set=MagicMock(return_value=False)),
Expand Down Expand Up @@ -690,10 +690,10 @@ class MockException(Exception):
initial_nonce_start,
"nonce_start was not updated after iteration",
)
## Should incerase by the number of nonces tried == TPB * update_interval
## Should incerase by the number of nonces tried == tpb * update_interval
self.assertEqual(
nonce_start_after_iteration,
(initial_nonce_start + update_interval * TPB) % nonce_limit,
(initial_nonce_start + update_interval * tpb) % nonce_limit,
"nonce_start was not updated by the correct amount",
)

Expand Down

0 comments on commit 8941ae0

Please sign in to comment.