diff --git a/.gitignore b/.gitignore index ebfa7c0a..0bdb20f8 100644 --- a/.gitignore +++ b/.gitignore @@ -82,6 +82,9 @@ target/ profile_default/ ipython_config.py +# wandb +wandb/ + # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: @@ -253,3 +256,6 @@ cert/ # wandb wandb/ + +# neural internet register api +neurons/register-api/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..8cf0c865 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - repo: https://github.com/alessandrojcm/commitlint-pre-commit-hook + rev: v9.20.0 + hooks: + - id: commitlint + stages: [commit-msg] + additional_dependencies: ['@commitlint/config-angular'] \ No newline at end of file diff --git a/cert/gen_ca.sh b/cert/gen_ca.sh index 5a6e7671..103ca052 100644 --- a/cert/gen_ca.sh +++ b/cert/gen_ca.sh @@ -25,8 +25,8 @@ echo "2.2 Use the server private key to generate a certificate generation reques openssl req -new -key server.key -out server.req -sha256 -subj "/C=US/ST=NY/CN=server.neuralinternet.ai/O=NI" echo "2.3 Use the certificate generation request and the CA cert to generate the server cert." -openssl x509 -req -in server.req -CA ca.cer -CAkey ca.key -CAcreateserial -set_serial 100 -days "$ca_cert_expire_days" -outform PEM -passin pass:"$pem_password" -out server.cer -sha256 -extensions v3_req -extfile <( -cat << EOF +# Create a temporary extensions file +cat << EOF > extfile.cnf [ v3_req ] subjectAltName = @alt_names @@ -35,13 +35,16 @@ IP.1 = 127.0.0.1 IP.2 = 0.0.0.0 IP.3 = "$local_ip" EOF -) +openssl x509 -req -in server.req -CA ca.cer -CAkey ca.key -CAcreateserial -set_serial 100 -days "$ca_cert_expire_days" -outform PEM -passin pass:"$pem_password" -out server.cer -sha256 -extensions v3_req -extfile extfile.cnf + +# Remove the temporary extensions file +rm extfile.cnf echo "2.4 Convert the cer to PEM CRT format" openssl x509 -inform PEM -in server.cer -out server.crt -echo "2.5 Clean up – now that the cert has been created, we no longer need the request" +echo "2.5 Clean up - now that the cert has been created, we no longer need the request" rm server.req #for frontend server @@ -52,8 +55,8 @@ echo "3.2 Use the client private key to generate a certificate generation reques openssl req -new -key client.key -out client.req -subj "/C=US/ST=NY/CN=client.neuralinternet.ai/O=NI" echo "3.3 Use the certificate generation request and the CA cert to generate the client cert." -openssl x509 -req -in client.req -CA ca.cer -CAkey ca.key -CAcreateserial -set_serial 101 -days "$ca_cert_expire_days" -outform PEM -out client.cer -passin pass:"$pem_password" -extensions v3_req -extfile <( -cat << EOF +# Create a temporary extensions file +cat << EOF > extfile.cnf [ v3_req ] subjectAltName = @alt_names @@ -62,7 +65,11 @@ IP.1 = 127.0.0.1 IP.2 = 0.0.0.0 IP.3 = "$local_ip" EOF -) + +openssl x509 -req -in client.req -CA ca.cer -CAkey ca.key -CAcreateserial -set_serial 101 -days "$ca_cert_expire_days" -outform PEM -out client.cer -passin pass:"$pem_password" -extensions v3_req -extfile extfile.cnf + +# Remove the temporary extensions file +rm extfile.cnf echo "3.4 Convert the client certificate and private key to pkcs#12 format for use by browsers." openssl pkcs12 -export -inkey client.key -in client.cer -out client.p12 -passout pass:"$pem_password" @@ -70,5 +77,5 @@ openssl pkcs12 -export -inkey client.key -in client.cer -out client.p12 -passout echo "3.5. Convert the cer to PEM CRT format" openssl x509 -inform PEM -in client.cer -out client.crt -echo "3.6. Clean up – now that the cert has been created, we no longer need the request." +echo "3.6. Clean up - now that the cert has been created, we no longer need the request." rm client.req \ No newline at end of file diff --git a/commitlint-config.js b/commitlint-config.js new file mode 100644 index 00000000..6eaf62bb --- /dev/null +++ b/commitlint-config.js @@ -0,0 +1 @@ +module.exports = {extends: ['@commitlint/config-conventional']} \ No newline at end of file diff --git a/compute/__init__.py b/compute/__init__.py index 2f2006be..12b9c2e8 100644 --- a/compute/__init__.py +++ b/compute/__init__.py @@ -18,9 +18,9 @@ import string # Define the version of the template module. -__version__ = "1.7.5" -__minimal_miner_version__ = "1.6.0" -__minimal_validator_version__ = "1.7.5" +__version__ = "1.8.0" +__minimal_miner_version__ = "1.8.0" +__minimal_validator_version__ = "1.8.0" version_split = __version__.split(".") __version_as_int__ = (100 * int(version_split[0])) + (10 * int(version_split[1])) + (1 * int(version_split[2])) diff --git a/compute/axon.py b/compute/axon.py index 820102d1..842134ee 100644 --- a/compute/axon.py +++ b/compute/axon.py @@ -18,29 +18,44 @@ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER # DEALINGS IN THE SOFTWARE. +# Standard library import copy -import json +import time import uuid -from typing import Optional +from inspect import Signature +from typing import TYPE_CHECKING, Callable, Optional -import bittensor -import bittensor.utils.networking as net -import time +# Third-party import uvicorn -from bittensor import axon, subtensor -from bittensor.axon import FastAPIThreadedServer, AxonMiddleware from fastapi import FastAPI, APIRouter -from rich.prompt import Confirm from starlette.requests import Request +# Bittensor +import bittensor +from bittensor.core.axon import Axon as axon +from bittensor.core.axon import FastAPIThreadedServer, AxonMiddleware +from bittensor.core.subtensor import Subtensor as subtensor +from bittensor.core.config import Config +from bittensor.core.threadpool import PriorityThreadPoolExecutor +from bittensor.core.extrinsics.serving import do_serve_axon +from bittensor.utils.btlogging import logging +from bittensor.utils import format_error_message, networking as net +from bittensor.utils import ( format_error_message, networking as net, unlock_key, Certificate ) + +# Local from compute import __version_as_int__ -from compute.prometheus import prometheus_extrinsic from compute.utils.version import get_local_version - -def serve_extrinsic( - subtensor: "bittensor.subtensor", - wallet: "bittensor.wallet", +if TYPE_CHECKING: + from bittensor.core.axon import Axon + from bittensor.core.subtensor import Subtensor + from bittensor.core.types import AxonServeCallParams + from bittensor_wallet import Wallet + from bittensor.core.subtensor import Subtensor + +def custom_serve_extrinsic( + subtensor: "Subtensor", + wallet: "Wallet", ip: str, port: int, protocol: int, @@ -48,43 +63,32 @@ def serve_extrinsic( placeholder1: int = 0, placeholder2: int = 0, wait_for_inclusion: bool = False, - wait_for_finalization=True, - prompt: bool = False, + wait_for_finalization: bool = True, + certificate: Certificate | None = None, ) -> bool: - r"""Subscribes a bittensor endpoint to the subtensor chain. + """Subscribes a Bittensor endpoint to the subtensor chain. + Args: - wallet (bittensor.wallet): - bittensor wallet object. - ip (str): - endpoint host port i.e. 192.122.31.4 - port (int): - endpoint port number i.e. 9221 - protocol (int): - int representation of the protocol - netuid (int): - network uid to serve on. - placeholder1 (int): - placeholder for future use. - placeholder2 (int): - placeholder for future use. - wait_for_inclusion (bool): - if set, waits for the extrinsic to enter a block before returning true, - or returns false if the extrinsic fails to enter the block within the timeout. - wait_for_finalization (bool): - if set, waits for the extrinsic to be finalized on the chain before returning true, - or returns false if the extrinsic fails to be finalized within the timeout. - prompt (bool): - If true, the call waits for confirmation from the user before proceeding. + subtensor (bittensor.core.subtensor.Subtensor): Subtensor instance object. + wallet (bittensor_wallet.Wallet): Bittensor wallet object. + ip (str): Endpoint host port i.e., ``192.122.31.4``. + port (int): Endpoint port number i.e., ``9221``. + protocol (int): An ``int`` representation of the protocol. + netuid (int): The network uid to serve on. + placeholder1 (int): A placeholder for future use. + placeholder2 (int): A placeholder for future use. + wait_for_inclusion (bool): If set, waits for the extrinsic to enter a block before returning ``true``, or returns ``false`` if the extrinsic fails to enter the block within the timeout. + wait_for_finalization (bool): If set, waits for the extrinsic to be finalized on the chain before returning ``true``, or returns ``false`` if the extrinsic fails to be finalized within the timeout. + certificate (Certificate | None): An optional certificate object that can be used for secure communication. Returns: - success (bool): - flag is true if extrinsic was finalized or uncluded in the block. - If we did not wait for finalization / inclusion, the response is true. + success (bool): Flag is ``true`` if extrinsic was finalized or uncluded in the block. If we did not wait for finalization / inclusion, the response is ``true``. """ # Decrypt hotkey - wallet.hotkey - version = __version_as_int__ + if not (unlock := unlock_key(wallet, "hotkey")).success: + logging.error(unlock.message) + return False params: "AxonServeCallParams" = { - "version": version, + "version": __version_as_int__, "ip": net.ip_to_int(ip), "port": port, "ip_type": net.ip_version(ip), @@ -94,13 +98,14 @@ def serve_extrinsic( "protocol": protocol, "placeholder1": placeholder1, "placeholder2": placeholder2, + "certificate": certificate, } - bittensor.logging.debug("Checking axon ...") + logging.debug("Checking axon ...") neuron = subtensor.get_neuron_for_pubkey_and_subnet( wallet.hotkey.ss58_address, netuid=netuid ) neuron_up_to_date = not neuron.is_null and params == { - "version": neuron.axon_info.version, + "version": __version_as_int__, "ip": net.ip_to_int(neuron.axon_info.ip), "port": neuron.axon_info.port, "ip_type": neuron.axon_info.ip_type, @@ -115,26 +120,16 @@ def serve_extrinsic( output["coldkey"] = wallet.coldkeypub.ss58_address output["hotkey"] = wallet.hotkey.ss58_address if neuron_up_to_date: - bittensor.logging.debug( + logging.debug( f"Axon already served on: AxonInfo({wallet.hotkey.ss58_address},{ip}:{port}) " ) return True - if prompt: - output = params.copy() - output["coldkey"] = wallet.coldkeypub.ss58_address - output["hotkey"] = wallet.hotkey.ss58_address - if not Confirm.ask( - "Do you want to serve axon:\n [bold white]{}[/bold white]".format( - json.dumps(output, indent=4, sort_keys=True) - ) - ): - return False - - bittensor.logging.debug( - f"Serving axon with: AxonInfo({wallet.hotkey.ss58_address},{ip}:{port}) -> {subtensor.network}:{netuid}:{version}" + logging.debug( + f"Serving axon with: AxonInfo({wallet.hotkey.ss58_address},{ip}:{port}) -> {subtensor.network}:{netuid}" ) - success, error_message = subtensor._do_serve_axon( + success, error_message = do_serve_axon( + self=subtensor, wallet=wallet, call_params=params, wait_for_finalization=wait_for_finalization, @@ -142,17 +137,18 @@ def serve_extrinsic( ) if wait_for_inclusion or wait_for_finalization: - if success == True: - bittensor.logging.debug(f"Axon served.") + if success is True: + logging.debug( + f"Axon served with: AxonInfo({wallet.hotkey.ss58_address},{ip}:{port}) on {subtensor.network}:{netuid} " + ) return True else: - bittensor.logging.debug( - f"Axon failed to served with error: {error_message} " - ) + logging.error(f"Failed: {format_error_message(error_message)}") return False else: return True +bittensor.core.extrinsics.serving.serve_extrinsic = custom_serve_extrinsic class ComputeSubnetSubtensor(subtensor): def __init__( @@ -169,138 +165,79 @@ def __init__( log_verbose=log_verbose, ) - ################# - #### Serving #### - ################# - def serve( - self, - wallet: "bittensor.wallet", - ip: str, - port: int, - protocol: int, - netuid: int, - placeholder1: int = __version_as_int__, - placeholder2: int = 0, - wait_for_inclusion: bool = False, - wait_for_finalization=True, - prompt: bool = False, - ) -> bool: - serve_extrinsic( - self, - wallet, - ip, - port, - protocol, - netuid, - placeholder1, - placeholder2, - wait_for_inclusion, - wait_for_finalization, - ) - - def serve_prometheus( - self, - wallet: "bittensor.wallet", - port: int, - netuid: int, - wait_for_inclusion: bool = False, - wait_for_finalization: bool = True, - force_update: bool = False, - ) -> bool: - return prometheus_extrinsic( - self, - wallet=wallet, - port=port, - netuid=netuid, - wait_for_inclusion=wait_for_inclusion, - wait_for_finalization=wait_for_finalization, - force_update=force_update, - ) - class ComputeSubnetAxon(axon): def __init__( self, - wallet: "bittensor.wallet" = None, - config: Optional["bittensor.config"] = None, + wallet: Optional["Wallet"] = None, + config: Optional["Config"] = None, port: Optional[int] = None, ip: Optional[str] = None, external_ip: Optional[str] = None, external_port: Optional[int] = None, max_workers: Optional[int] = None, - ) -> "bittensor.axon": - r"""Creates a new bittensor.Axon object from passed arguments. + ): + """Creates a new bittensor.Axon object from passed arguments. + Args: - config (:obj:`Optional[bittensor.config]`, `optional`): - bittensor.axon.config() - wallet (:obj:`Optional[bittensor.wallet]`, `optional`): - bittensor wallet with hotkey and coldkeypub. - port (:type:`Optional[int]`, `optional`): - Binding port. - ip (:type:`Optional[str]`, `optional`): - Binding ip. - external_ip (:type:`Optional[str]`, `optional`): - The external ip of the server to broadcast to the network. - external_port (:type:`Optional[int]`, `optional`): - The external port of the server to broadcast to the network. - max_workers (:type:`Optional[int]`, `optional`): - Used to create the threadpool if not passed, specifies the number of active threads servicing requests. + config (:obj:`Optional[bittensor.core.config.Config]`): bittensor.Axon.config() + wallet (:obj:`Optional[bittensor_wallet.Wallet]`): bittensor wallet with hotkey and coldkeypub. + port (:type:`Optional[int]`): Binding port. + ip (:type:`Optional[str]`): Binding ip. + external_ip (:type:`Optional[str]`): The external ip of the server to broadcast to the network. + external_port (:type:`Optional[int]`): The external port of the server to broadcast to the network. + max_workers (:type:`Optional[int]`): Used to create the threadpool if not passed, specifies the number of active threads servicing requests. """ + # Build and check config. if config is None: config = axon.config() config = copy.deepcopy(config) - config.axon.ip = ip or config.axon.get("ip", bittensor.defaults.axon.ip) - config.axon.port = port or config.axon.get("port", bittensor.defaults.axon.port) - config.axon.external_ip = external_ip or config.axon.get( - "external_ip", bittensor.defaults.axon.external_ip - ) - config.axon.external_port = external_port or config.axon.get( - "external_port", bittensor.defaults.axon.external_port - ) - config.axon.max_workers = max_workers or config.axon.get( - "max_workers", bittensor.defaults.axon.max_workers - ) + config.axon.ip = ip or config.axon.ip + config.axon.port = port or config.axon.port + config.axon.external_ip = external_ip or config.axon.external_ip + config.axon.external_port = external_port or config.axon.external_port + config.axon.max_workers = max_workers or config.axon.max_workers axon.check_config(config) - self.config = config + self.config = config # type: ignore # Get wallet or use default. - self.wallet = wallet or bittensor.wallet() + self.wallet = wallet or Wallet(config=self.config) # Build axon objects. self.uuid = str(uuid.uuid1()) - self.ip = self.config.axon.ip - self.port = self.config.axon.port + self.ip = self.config.axon.ip # type: ignore + self.port = self.config.axon.port # type: ignore self.external_ip = ( - self.config.axon.external_ip - if self.config.axon.external_ip != None - else bittensor.utils.networking.get_external_ip() + self.config.axon.external_ip # type: ignore + if self.config.axon.external_ip is not None # type: ignore + else net.get_external_ip() ) self.external_port = ( - self.config.axon.external_port - if self.config.axon.external_port != None - else self.config.axon.port + self.config.axon.external_port # type: ignore + if self.config.axon.external_port is not None # type: ignore + else self.config.axon.port # type: ignore ) - self.full_address = str(self.config.axon.ip) + ":" + str(self.config.axon.port) + self.full_address = str(self.config.axon.ip) + ":" + str(self.config.axon.port) # type: ignore self.started = False - + # Build middleware - self.thread_pool = bittensor.PriorityThreadPoolExecutor( - max_workers=self.config.axon.max_workers + self.thread_pool = PriorityThreadPoolExecutor( + max_workers=self.config.axon.max_workers # type: ignore ) - self.nonces = {} - + self.nonces: dict[str, int] = {} + # Request default functions. - self.forward_class_types = {} - self.blacklist_fns = {} - self.priority_fns = {} - self.forward_fns = {} - self.verify_fns = {} - self.required_hash_fields = {} + self.forward_class_types: dict[str, list[Signature]] = {} + self.blacklist_fns: dict[str, Callable | None] = {} + self.priority_fns: dict[str, Callable | None] = {} + self.forward_fns: dict[str, Callable | None] = {} + self.verify_fns: dict[str, Callable | None] = {} + # Instantiate FastAPI self.app = FastAPI() - log_level = "trace" if bittensor.logging.__trace_on__ else "critical" + log_level = "trace" if logging.__trace_on__ else "critical" self.fast_config = uvicorn.Config( self.app, host="0.0.0.0", port=self.config.axon.port, log_level=log_level ) @@ -309,7 +246,8 @@ def __init__( self.app.include_router(self.router) # Build ourselves as the middleware. - self.app.add_middleware(ComputeSubnetAxonMiddleware, axon=self) + self.middleware_cls = ComputeSubnetAxonMiddleware + self.app.add_middleware(self.middleware_cls, axon=self) # Attach default forward. def ping(r: bittensor.Synapse) -> bittensor.Synapse: @@ -364,7 +302,7 @@ def __init__(self, app: "AxonMiddleware", axon: "bittensor.axon"): """ super().__init__(app, axon=axon) - async def preprocess(self, request: Request) -> bittensor.Synapse: + async def preprocess(self, request: Request) -> bittensor.core.synapse.Synapse: """ Performs the initial processing of the incoming request. This method is responsible for extracting relevant information from the request and setting up the Synapse object, which @@ -400,7 +338,7 @@ async def preprocess(self, request: Request) -> bittensor.Synapse: { "version": __version_as_int__, "uuid": str(self.axon.uuid), - "nonce": f"{time.monotonic_ns()}", + "nonce": time.monotonic_ns(), "status_message": "Success", "status_code": "100", "placeholder1": 1, @@ -410,7 +348,7 @@ async def preprocess(self, request: Request) -> bittensor.Synapse: # Fills the dendrite information into the synapse. synapse.dendrite.__dict__.update( - {"port": str(request.client.port), "ip": str(request.client.host)} + {"port": int(request.client.port), "ip": str(request.client.host)} ) # Signs the synapse from the axon side using the wallet hotkey. diff --git a/compute/prometheus.py b/compute/prometheus.py index b586eb42..74464242 100644 --- a/compute/prometheus.py +++ b/compute/prometheus.py @@ -21,10 +21,11 @@ import bittensor.utils.networking as net import compute +import inspect def prometheus_extrinsic( - subtensor: "bittensor.subtensor", + subtensor: "bittensor.core.subtensor.Subtensor.MockSubtensor", wallet: "bittensor.wallet", port: int, netuid: int, @@ -99,9 +100,8 @@ def prometheus_extrinsic( # Add netuid, not in prometheus_info call_params["netuid"] = netuid - bittensor.logging.info("Serving prometheus on: {}:{} ...".format(subtensor.network, netuid)) - success, err = subtensor._do_serve_prometheus( + success, err = subtensor.do_serve_prometheus( wallet=wallet, call_params=call_params, wait_for_finalization=wait_for_finalization, diff --git a/compute/protocol.py b/compute/protocol.py index 22ce3133..ae151c82 100644 --- a/compute/protocol.py +++ b/compute/protocol.py @@ -87,6 +87,7 @@ class Allocate(bt.Synapse): docker_action: dict = { "action": "", "ssh_key": "", + "key_type": "", } def deserialize(self) -> dict: diff --git a/compute/utils/subtensor.py b/compute/utils/subtensor.py index 8efac3cb..8c1106d5 100644 --- a/compute/utils/subtensor.py +++ b/compute/utils/subtensor.py @@ -20,7 +20,7 @@ from compute.utils.cache import ttl_cache -bt_blocktime = bt.__blocktime__ +bt_blocktime = bt.BLOCKTIME @ttl_cache(maxsize=1, ttl=bt_blocktime) diff --git a/neurons/Miner/allocate.py b/neurons/Miner/allocate.py index f72bd200..97e03424 100644 --- a/neurons/Miner/allocate.py +++ b/neurons/Miner/allocate.py @@ -37,6 +37,7 @@ def register_allocation(timeline, device_requirement, public_key, docker_require cpu_assignment = "0" ram_capacity = device_requirement["ram"]["capacity"] # e.g 5g hard_disk_capacity = device_requirement["hard_disk"]["capacity"] # e.g 100g + testing = device_requirement.get("testing", False) if not device_requirement["gpu"]: gpu_capacity = 0 else: @@ -47,7 +48,7 @@ def register_allocation(timeline, device_requirement, public_key, docker_require ram_usage = {"capacity": str(int(ram_capacity / 1073741824)) + "g"} hard_disk_usage = {"capacity": str(int(hard_disk_capacity / 1073741824)) + "g"} - run_status = run_container(cpu_usage, ram_usage, hard_disk_usage, gpu_usage, public_key, docker_requirement) + run_status = run_container(cpu_usage, ram_usage, hard_disk_usage, gpu_usage, public_key, docker_requirement, testing) if run_status["status"]: bt.logging.info("Successfully allocated container.") diff --git a/neurons/Miner/container.py b/neurons/Miner/container.py index 2995c242..02704d2d 100644 --- a/neurons/Miner/container.py +++ b/neurons/Miner/container.py @@ -28,7 +28,7 @@ from io import BytesIO import sys from docker.types import DeviceRequest - +from compute import __version_as_int__ parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(parent_dir) @@ -96,7 +96,7 @@ def kill_container(): return False # Run a new docker container with the given docker_name, image_name and device information -def run_container(cpu_usage, ram_usage, hard_disk_usage, gpu_usage, public_key, docker_requirement: dict): +def run_container(cpu_usage, ram_usage, hard_disk_usage, gpu_usage, public_key, docker_requirement: dict, testing: bool): try: client, containers = get_docker() # Configuration @@ -172,7 +172,7 @@ def run_container(cpu_usage, ram_usage, hard_disk_usage, gpu_usage, public_key, # client.volumes.create(volume_name, driver = 'local', driver_opts={'size': hard_disk_capacity}) # Determine container name based on ssh key - container_to_run = container_name if docker_ssh_key else container_name_test + container_to_run = container_name_test if testing else container_name # Step 2: Run the Docker container device_requests = [DeviceRequest(count=-1, capabilities=[["gpu"]])] @@ -194,7 +194,7 @@ def run_container(cpu_usage, ram_usage, hard_disk_usage, gpu_usage, public_key, # Check the status to determine if the container ran successfully if container.status == "created": bt.logging.info("Container was created successfully.") - info = {"username": "root", "password": password, "port": docker_ssh_port} + info = {"username": "root", "password": password, "port": docker_ssh_port, "version" : __version_as_int__} info_str = json.dumps(info) public_key = public_key.encode("utf-8") encrypted_info = rsa.encrypt_data(public_key, info_str) @@ -432,7 +432,7 @@ def unpause_container(): bt.logging.info(f"Error unpausing container {e}") return {"status": False} -def exchange_key_container(new_ssh_key: str): +def exchange_key_container(new_ssh_key: str, key_type: str = "user"): try: client, containers = get_docker() running_container = None @@ -443,7 +443,22 @@ def exchange_key_container(new_ssh_key: str): if running_container: # stop and remove the container by using the SIGTERM signal to PID 1 (init) process in the container if running_container.status == "running": - running_container.exec_run(cmd=f"bash -c \"echo '{new_ssh_key}' > /root/.ssh/authorized_keys & sync & sleep 1\"") + exist_key = running_container.exec_run(cmd="cat /root/.ssh/authorized_keys") + exist_key = exist_key.output.decode("utf-8").split("\n") + user_key = exist_key[0] + terminal_key = "" + if len(exist_key) > 1: + terminal_key = exist_key[1] + if key_type == "terminal": + terminal_key = new_ssh_key + elif key_type == "user": + user_key = new_ssh_key + else: + bt.logging.debug("Invalid key type to swap the SSH key") + return {"status": False} + key_list = user_key + "\n" + terminal_key + # bt.logging.debug(f"New SSH key: {key_list}") + running_container.exec_run(cmd=f"bash -c \"echo '{key_list}' > /root/.ssh/authorized_keys & sync & sleep 1\"") running_container.exec_run(cmd="kill -15 1") running_container.wait() running_container.restart() diff --git a/neurons/miner.py b/neurons/miner.py index 57b0656a..e53de45a 100644 --- a/neurons/miner.py +++ b/neurons/miner.py @@ -438,7 +438,8 @@ def allocate(self, synapse: Allocate) -> Allocate: if docker_action["action"] == "exchange_key": public_key = synapse.public_key new_ssh_key = docker_action["ssh_key"] - result = exchange_key_container(new_ssh_key) + key_type = docker_action["key_type"] + result = exchange_key_container(new_ssh_key, key_type) synapse.output = result elif docker_action["action"] == "restart": public_key = synapse.public_key diff --git a/neurons/miner_checker.py b/neurons/miner_checker.py index e6cc5e61..2feb516b 100644 --- a/neurons/miner_checker.py +++ b/neurons/miner_checker.py @@ -91,7 +91,7 @@ def miner_checking_thread(self, axon): allocation_status = False private_key, public_key = rsa.generate_key_pair() - device_requirement = {"cpu": {"count": 1}, "gpu": {}, "hard_disk": {"capacity": 1073741824}, "ram": {"capacity": 1073741824}} + device_requirement = {"cpu": {"count": 1}, "gpu": {}, "hard_disk": {"capacity": 1073741824}, "ram": {"capacity": 1073741824}, "testing": True} try: check_allocation = dendrite.query(axon, Allocate(timeline=30, device_requirement=device_requirement, checking=True,), timeout=30) diff --git a/neurons/register_api.py b/neurons/register_api.py index a129f8d3..8b2cb9cf 100644 --- a/neurons/register_api.py +++ b/neurons/register_api.py @@ -61,7 +61,7 @@ # Import FastAPI Libraries import uvicorn from fastapi import ( - FastAPI, + FastAPI, HTTPException, status, Request, WebSocket, @@ -72,9 +72,16 @@ from fastapi.exceptions import RequestValidationError from fastapi.concurrency import run_in_threadpool from pydantic import BaseModel, Field +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.status import HTTP_403_FORBIDDEN +from dotenv import load_dotenv from typing import Optional, Union, List +# Loads the .env file +load_dotenv() + # Constants +ENABLE_WHITELIST_IPS = False # False for disabling, True for enabling DEFAULT_SSL_MODE = 2 # 1 for client CERT optional, 2 for client CERT_REQUIRED DEFAULT_API_PORT = 8903 # default port for the API DATA_SYNC_PERIOD = 600 # metagraph resync time @@ -107,6 +114,22 @@ "5Fq5v71D4LX8Db1xsmRSy6udQThcZ8sFDqxQFwnUZ1BuqY5A"] MINER_BLACKLIST = [] +# IP Whitelist middleware +class IPWhitelistMiddleware(BaseHTTPMiddleware): + def __init__(self, app: FastAPI): + super().__init__(app) + self.whitelisted_ips = set(os.getenv("WHITELISTED_IPS", "").split(",")) + + async def dispatch(self, request: Request, call_next): + # Extracts the client's IP address + client_ip = request.client.host + if client_ip not in self.whitelisted_ips: + bt.logging.info(f"Access attempt from IP: {client_ip}") + raise HTTPException(status_code=HTTP_403_FORBIDDEN, detail="Access forbidden: IP not whitelisted") + + # Process the request and get the response + response = await call_next(request) + return response class UserConfig(BaseModel): netuid: str = Field(default="15") @@ -139,6 +162,7 @@ class Allocation(BaseModel): ssh_command: str = "" ssh_key: str = "" uuid_key: str = "" + miner_version: int = 0 class DockerRequirement(BaseModel): @@ -292,6 +316,8 @@ def __init__( load_dotenv() self._setup_routes() + if ENABLE_WHITELIST_IPS: + self.app.add_middleware(IPWhitelistMiddleware) self.process = None self.websocket_connection = None self.allocation_table = [] @@ -493,7 +519,7 @@ async def allocate_spec(requirements: DeviceRequirement, docker_requirement: Doc allocated.ssh_password = info["password"] allocated.uuid_key = info["uuid"] allocated.ssh_command = f"ssh {info['username']}@{result['ip']} -p {str(info['port'])}" - + allocated.miner_version = info.get("version", 0) update_allocation_db(result_hotkey, info, True) await self._update_allocation_wandb() bt.logging.info(f"API: Resource {result_hotkey} was successfully allocated") @@ -575,22 +601,13 @@ async def allocate_hotkey(hotkey: str, ssh_key: Optional[str] = None, uuid_key = str(uuid.uuid1()) private_key, public_key = rsa.generate_key_pair() - if ssh_key: - if docker_requirement is None: - docker_requirement = DockerRequirement() - docker_requirement.ssh_key = ssh_key - else: - docker_requirement.ssh_key = ssh_key + + if docker_requirement is None: + docker_requirement = DockerRequirement() + if ssh_key is None: + docker_requirement.ssh_key = "" else: - bt.logging.error(f"API: Allocation {hotkey} Failed : No ssh key") - return JSONResponse( - status_code=status.HTTP_404_NOT_FOUND, - content={ - "success": False, - "message": "Fail to allocate resource", - "err_detail": "No ssh key", - }, - ) + docker_requirement.ssh_key = ssh_key run_start = time.time() @@ -651,7 +668,7 @@ async def allocate_hotkey(hotkey: str, ssh_key: Optional[str] = None, allocated.ssh_password = info["password"] allocated.uuid_key = info["uuid"] allocated.ssh_command = f"ssh {info['username']}@{result['ip']} -p {str(info['port'])}" - + allocated.miner_version = info.get("version", 0) update_allocation_db(result_hotkey, info, True) await self._update_allocation_wandb() @@ -847,7 +864,7 @@ async def deallocate(hotkey: str, uuid_key: str, request: Request, notify_flag: }, } ) - async def check_miner_status(hotkey_list: List[str]) -> JSONResponse: + async def check_miner_status(hotkey_list: List[str], query_version: bool = False) -> JSONResponse: checking_list = [] for hotkey in hotkey_list: checking_result = { @@ -857,16 +874,20 @@ async def check_miner_status(hotkey_list: List[str]) -> JSONResponse: for axon in self.metagraph.axons: if axon.hotkey == hotkey: try: - register_response = await run_in_threadpool(self.dendrite.query, - axon, Allocate(timeline=1, checking=True, ), - timeout=60) - if register_response: - if register_response["status"] is True: - checking_result = {"hotkey": hotkey, "status": "Docker OFFLINE"} - else: - checking_result = {"hotkey": hotkey, "status": "Docker ONLINE"} + if query_version: + checking_result = {"hotkey": hotkey, "version": axon.version} else: - checking_result = {"hotkey": hotkey, "status": "Miner NO_RESPONSE"} + register_response = await run_in_threadpool(self.dendrite.query, + axon, Allocate(timeline=1, checking=True, ), + timeout=10) + await asyncio.sleep(0.1) + if register_response: + if register_response["status"] is True: + checking_result = {"hotkey": hotkey, "status": "Docker OFFLINE"} + else: + checking_result = {"hotkey": hotkey, "status": "Docker ONLINE"} + else: + checking_result = {"hotkey": hotkey, "status": "Miner NO_RESPONSE"} except Exception as e: bt.logging.error( f"API: An error occur during the : {e}" @@ -1206,7 +1227,7 @@ async def unpause_docker(hotkey: str, uuid_key: str) -> JSONResponse: "description": "An error occurred while exchanging docker key.", }, }) - async def exchange_docker_key(hotkey: str, uuid_key: str, ssh_key: str) -> JSONResponse: + async def exchange_docker_key(hotkey: str, uuid_key: str, ssh_key: str, key_type: str = "user") -> JSONResponse: # Instantiate the connection to the db db = ComputeDb() cursor = db.get_cursor() @@ -1234,13 +1255,14 @@ async def exchange_docker_key(hotkey: str, uuid_key: str, ssh_key: str) -> JSONR docker_action = { "action": "exchange_key", "ssh_key": ssh_key, + "key_type": key_type, } if uuid_key_db == uuid_key: index = self.metagraph.hotkeys.index(hotkey) axon = self.metagraph.axons[index] run_start = time.time() - allocate_class = Allocate(timeline=0, device_requirement={}, checking=False, public_key=regkey, + allocate_class = Allocate(timeline=1, device_requirement={}, checking=False, public_key=regkey, docker_change=True, docker_action=docker_action) response = await run_in_threadpool( self.dendrite.query, axon, allocate_class, timeout=60 @@ -1369,6 +1391,7 @@ async def list_allocations() -> JSONResponse: ) entry.uuid_key = info["uuid"] entry.ssh_key = info["ssh_key"] + entry.miner_version = info.get("version", 0) allocation_list.append(entry) except Exception as e: @@ -1712,12 +1735,8 @@ async def count_all_gpus() -> JSONResponse: # Iterate through the miner specs details and print the table for hotkey, details in specs_details.items(): if details : - gpu_miner = details["gpu"] - gpu_capacity = "{:.2f}".format( - (gpu_miner["capacity"] / 1024) - ) - gpu_name = str(gpu_miner["details"][0]["name"]).lower() - gpu_count = gpu_miner["count"] + gpu_miner = details.get("gpu", "") + gpu_count = gpu_miner.get("count", 0) GPU_COUNTS += gpu_count bt.logging.info(f"API: List resources successfully") return JSONResponse( diff --git a/neurons/validator.py b/neurons/validator.py index 854f796c..56307f35 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -245,16 +245,23 @@ def init_prometheus(self, force_update: bool = False): """ Register the prometheus information on metagraph. :return: bool - """ + """ + # extrinsic prometheus is removed at 8.2.1 + bt.logging.info("Extrinsic prometheus information on metagraph.") - success = self.subtensor.serve_prometheus( - wallet=self.wallet, - port=bt.defaults.axon.port, - netuid=self.config.netuid, - force_update=force_update, - ) + success = True + # TODO : remove all the related code from the code base + # self._subtensor.serve_prometheus( + # wallet=self.wallet, + # port=bt.core.settings.DEFAULTS.axon.port, + # netuid=self.config.netuid, + # force_update=force_update, + # ) if success: - bt.logging.success(prefix="Prometheus served", sufix=f"Current version: {get_local_version()}") + bt.logging.success( + prefix="Prometheus served", + suffix=f"Current version: {get_local_version()}" # Corrected keyword + ) else: bt.logging.error("Prometheus initialization failed") return success @@ -484,7 +491,7 @@ def filter_axon_version(self, dict_filtered_axons: dict): dict_filtered_axons_version = {} for uid, axon in dict_filtered_axons.items(): - if latest_version and latest_version <= axon.version < 600: + if latest_version and latest_version <= axon.version: dict_filtered_axons_version[uid] = axon return dict_filtered_axons_version @@ -537,7 +544,7 @@ def get_valid_queryable(self): neuron: bt.NeuronInfoLite = self.metagraph.neurons[uid] axon = self.metagraph.axons[uid] - if neuron.axon_info.ip != "0.0.0.0" and self.metagraph.total_stake[uid] < 1.024e3 and not self.is_blacklisted(neuron=neuron): + if neuron.axon_info.ip != "0.0.0.0" and not self.is_blacklisted(neuron=neuron): valid_queryable.append((uid, axon)) return valid_queryable @@ -923,7 +930,7 @@ def allocate_miner(self, axon, private_key, public_key): dendrite = bt.dendrite(wallet=self.wallet) # Define device requirements (customize as needed) - device_requirement = {"cpu": {"count": 1}, "gpu": {}, "hard_disk": {"capacity": 1073741824}, "ram": {"capacity": 1073741824}} + device_requirement = {"cpu": {"count": 1}, "gpu": {}, "hard_disk": {"capacity": 1073741824}, "ram": {"capacity": 1073741824}, "testing": True} device_requirement["gpu"] = {"count": 1, "capacity": 0, "type": ""} docker_requirement = { @@ -1058,7 +1065,7 @@ def set_weights(self): version_key=__version_as_int__, wait_for_inclusion=False, ) - if isinstance(result, bool) and result or isinstance(result, tuple) and result[0]: + if isinstance(result[0], bool) and result or isinstance(result, tuple) and result[0]: bt.logging.info(result) bt.logging.success("✅ Successfully set weights.") else: diff --git a/requirements.txt b/requirements.txt index f6608eb3..15d8b68b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,18 +1,18 @@ at==0.0.3 -bittensor==6.9.4 +bittensor==8.5.0 black==23.7.0 -cryptography==42.0.0 +cryptography==43.0.1 docker==7.0.0 GPUtil==1.4.0 igpu==0.1.2 -numpy==1.26.3 +numpy==2.0.2 psutil==5.9.8 pyinstaller==6.4.0 -torch==2.1.2 -wandb==0.16.6 +wandb==0.19.4 pyfiglet==1.0.2 python-dotenv==1.0.1 requests==2.31.0 paramiko==3.4.1 blake3 ipwhois==1.3.0 +torch==2.5.1 \ No newline at end of file