From 7b4f6f62ae3764311e5bc405988682eea433a843 Mon Sep 17 00:00:00 2001
From: nautilus <fabian@neuralinternet.ai>
Date: Fri, 20 Dec 2024 12:52:53 +0100
Subject: [PATCH 1/4] fix: allocation bonus: multiplier instead of addition

---
 README.md                                | 32 ++++++++++++------------
 neurons/Validator/calculate_pow_score.py | 11 ++++----
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 10d4231d..71b4c773 100644
--- a/README.md
+++ b/README.md
@@ -210,43 +210,43 @@ The score calculation function now determines a miner's performance primarily ba
 - NVIDIA RTX A5000: 0.36
 - NVIDIA RTX A4500: 0.34
 
-**Scaling Factor**: Determine the highest GPU base score, multiply it by 8 (the maximum number of GPUs), and set this scenario as the 100-point baseline. A scaling factor is derived so that using eight of the top GPU models equals 100 points.
+**Scaling Factor**: Determine the highest GPU base score, multiply it by 8 (the maximum number of GPUs), and set this scenario as the 100-point baseline. A scaling factor is derived so that using eight of the top GPU models equals 50 points.
 
-**GPU Score**: Multiply the chosen GPU’s base score by the number of GPUs (up to 8) and by the scaling factor to find the miner’s GPU score (0–100).
+**GPU Score**: Multiply the chosen GPU’s base score by the number of GPUs (up to 8) and by the scaling factor to find the miner’s GPU score (0–50).
 
-**Allocation Bonus**: If a miner has allocated machine resources, add 100 points to the GPU score, allowing a maximum score of up to 200.
+**Allocation Bonus**: If a miner has allocated machine resources, the GPU score is multiplied by 2, allowing a maximum score of up to 100.
 
 **Total Score**:
 
-- Score (not allocated) = GPU Score (0–100)
-- Score (allocated) = GPU Score + 100 (up to 200)
+- Score (not allocated) = GPU Score (0–50)
+- Score (allocated) = GPU Score * 2 (up to 100)
 
 ### Example 1: Miner A's Total Score
 
 - **GPU**: NVIDIA H200 (Base Score: 3.90)
 - **Number of GPUs**: 8
-- **Allocation**: True
+- **Allocation**: False
 
 Step-by-step calculation:
-1. Highest scenario: 3.90 * 8 = 31.2  
-2. Scaling factor: 100 / 31.2 ≈ 3.2051  
-3. GPU Score: 3.90 * 8 * 3.2051 ≈ 100  
-4. Allocation Bonus: 100 + 100 = 200
+1. Highest scenario: 4 * 8 = 32
+2. Scaling factor: 50 / 32 ≈ 1.5625
+3. GPU Score: 4 * 8 * 1.5625 ≈ 50
+4. Allocation Bonus: 0
 
-Total Score = 200
+Total Score = 50
 
 ### Example 2: Miner B's Total Score
 
 - **GPU**: NVIDIA RTX 4090 (Base Score: 0.69)
 - **Number of GPUs**: 2
-- **Allocation**: False
+- **Allocation**: True
 
 Step-by-step calculation:
-1. Scaling factor (same as above): 3.2051  
-2. GPU Score: 0.69 * 2 * 3.2051 ≈ 4.42  
-3. No allocation bonus applied.
+1. Scaling factor (same as above): 1.5625
+2. GPU Score: 0.68 * 2 * 1.5625 ≈ 2.125
+3. Allocation Bonus: 2.125 * 2 = 4.25
 
-Total Score = 4.42
+Total Score = 4.25
 
 ## Resource Allocation Mechanism
 
diff --git a/neurons/Validator/calculate_pow_score.py b/neurons/Validator/calculate_pow_score.py
index 667e8f01..7f7b2a84 100644
--- a/neurons/Validator/calculate_pow_score.py
+++ b/neurons/Validator/calculate_pow_score.py
@@ -39,7 +39,7 @@ def calc_score_pog(gpu_specs, hotkey, allocated_hotkeys, config_data, mock=False
         # Get the GPU with the maximum score
         max_gpu = max(gpu_scores, key=gpu_scores.get)
         max_score = gpu_scores[max_gpu]*8
-        score_factor = 100/max_score
+        score_factor = 50/max_score
 
         gpu_name = gpu_specs.get("gpu_name")
         num_gpus = min(gpu_specs.get("num_gpus"), 8)
@@ -47,15 +47,16 @@ def calc_score_pog(gpu_specs, hotkey, allocated_hotkeys, config_data, mock=False
         # Get GPU score
         score = gpu_scores.get(gpu_name) * num_gpus * score_factor
 
-        # Add allocation score, i.e. max un-allocated score = 100
+        # Add allocation score, multiplier = 2
         if hotkey in allocated_hotkeys:
-            score += 100
+            score = score * 2
 
         # Logging score
-        bt.logging.info(f"Score - {hotkey}: {score:.2f}/200")
+        bt.logging.info(f"Score - {hotkey}: {score:.2f}/100")
 
         # Normalize the score
-        normalized_score = normalize(score, 0, 200)
+        normalized_score = normalize(score, 0, 100)
+
         return normalized_score
     except Exception as e:
         bt.logging.error(f"An error occurred while calculating score for the following hotkey - {hotkey}: {e}")

From 249ef9ba2a581f2304119d203b0a079890750841 Mon Sep 17 00:00:00 2001
From: nautilus <fabian@neuralinternet.ai>
Date: Fri, 20 Dec 2024 13:00:05 +0100
Subject: [PATCH 2/4] chore: bump version

---
 compute/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compute/__init__.py b/compute/__init__.py
index f883f7d4..81c060bb 100644
--- a/compute/__init__.py
+++ b/compute/__init__.py
@@ -18,9 +18,9 @@
 import string
 
 # Define the version of the template module.
-__version__ = "1.6.0"
+__version__ = "1.6.1"
 __minimal_miner_version__ = "1.6.0"
-__minimal_validator_version__ = "1.6.0"
+__minimal_validator_version__ = "1.6.1"
 
 version_split = __version__.split(".")
 __version_as_int__ = (100 * int(version_split[0])) + (10 * int(version_split[1])) + (1 * int(version_split[2]))

From a3b3ba9308c455ee948d8d733bb5fd62d1d48e5a Mon Sep 17 00:00:00 2001
From: nautilus <fabian@neuralinternet.ai>
Date: Mon, 23 Dec 2024 16:02:17 +0100
Subject: [PATCH 3/4] fix: different name for test-allocation container

---
 compute/__init__.py        |  2 +-
 neurons/Miner/container.py | 34 ++++++++++++++++++++++++++++++++--
 neurons/miner.py           |  2 ++
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/compute/__init__.py b/compute/__init__.py
index 81c060bb..27537ede 100644
--- a/compute/__init__.py
+++ b/compute/__init__.py
@@ -20,7 +20,7 @@
 # Define the version of the template module.
 __version__ = "1.6.1"
 __minimal_miner_version__ = "1.6.0"
-__minimal_validator_version__ = "1.6.1"
+__minimal_validator_version__ = "1.6.0"
 
 version_split = __version__.split(".")
 __version_as_int__ = (100 * int(version_split[0])) + (10 * int(version_split[1])) + (1 * int(version_split[2]))
diff --git a/neurons/Miner/container.py b/neurons/Miner/container.py
index 52ff3d86..930b779b 100644
--- a/neurons/Miner/container.py
+++ b/neurons/Miner/container.py
@@ -38,6 +38,7 @@
 
 image_name = "ssh-image"  # Docker image name
 container_name = "ssh-container"  # Docker container name
+container_name_test = "ssh-test-container"
 volume_name = "ssh-volume"  # Docker volumne name
 volume_path = "/tmp"  # Path inside the container where the volume will be mounted
 ssh_port = 4444  # Port to map SSH service on the host
@@ -56,7 +57,7 @@ def kill_container():
         client, containers = get_docker()
         running_container = None
         for container in containers:
-            if container_name in container.name:
+            if container.name == container_name:
                 running_container = container
                 break
         if running_container:
@@ -76,6 +77,31 @@ def kill_container():
         bt.logging.info(f"Error killing container {e}")
         return False
 
+# Kill the currently running test container
+def kill_test_container():
+    try:
+        client, containers = get_docker()
+        running_container = None
+        for container in containers:
+            if container.name == container_name_test:
+                running_container = container
+                break
+        if running_container:
+            # stop and remove the container by using the SIGTERM signal to PID 1 (init) process in the container
+            if running_container.status == "running":
+                running_container.exec_run(cmd="kill -15 1")
+                running_container.wait()
+                # running_container.stop()
+            running_container.remove()
+            # Remove all dangling images
+            client.images.prune(filters={"dangling": True})
+            bt.logging.info("Test container was killed successfully")
+        else:
+           bt.logging.info("No running container.")
+        return True
+    except Exception as e:
+        bt.logging.info(f"Error killing container {e}")
+        return False
 
 # Run a new docker container with the given docker_name, image_name and device information
 def run_container(cpu_usage, ram_usage, hard_disk_usage, gpu_usage, public_key, docker_requirement: dict):
@@ -150,13 +176,17 @@ def run_container(cpu_usage, ram_usage, hard_disk_usage, gpu_usage, public_key,
         # Create the Docker volume with the specified size
         # client.volumes.create(volume_name, driver = 'local', driver_opts={'size': hard_disk_capacity})
 
+        # Determine container name based on ssh key
+        container_to_run = container_name if docker_ssh_key else container_name_test
+
+
         # Step 2: Run the Docker container
         device_requests = [DeviceRequest(count=-1, capabilities=[["gpu"]])]
         # if gpu_usage["capacity"] == 0:
         #    device_requests = []
         container = client.containers.run(
             image=image_name,
-            name=container_name,
+            name=container_to_run,
             detach=True,
             device_requests=device_requests,
             environment=["NVIDIA_VISIBLE_DEVICES=all"],
diff --git a/neurons/miner.py b/neurons/miner.py
index 57b0656a..a8986797 100644
--- a/neurons/miner.py
+++ b/neurons/miner.py
@@ -61,6 +61,7 @@
     build_sample_container,
     check_container,
     kill_container,
+    kill_test_container,
     restart_container,
     exchange_key_container,
     pause_container,
@@ -218,6 +219,7 @@ def __check_alloaction_errors(self):
                 bt.logging.info(
                     "Container is already running without allocated. Killing the container."
                 )
+        kill_test_container()
 
     def init_axon(self):
         # Step 6: Build and link miner functions to the axon.

From 061cf2799a7a899f2221aeadcc319973a07a69de Mon Sep 17 00:00:00 2001
From: nautilus <fabian@neuralinternet.ai>
Date: Mon, 23 Dec 2024 16:03:48 +0100
Subject: [PATCH 4/4] fix: asyncio timeout for pog worker

---
 neurons/validator.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/neurons/validator.py b/neurons/validator.py
index 2057569d..feced9ad 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -643,8 +643,13 @@ async def worker():
                         break
                     hotkey = axon.hotkey
                     try:
-                        result = await asyncio.get_event_loop().run_in_executor(
-                            self.executor, self.test_miner_gpu, axon, self.config_data
+                        # Set a timeout for the GPU test
+                        timeout = 300  # e.g., 5 minutes
+                        result = await asyncio.wait_for(
+                            asyncio.get_event_loop().run_in_executor(
+                                self.executor, self.test_miner_gpu, axon, self.config_data
+                            ),
+                            timeout=timeout
                         )
                         if result[1] is not None and result[2] > 0:
                             async with results_lock:
@@ -655,6 +660,16 @@ async def worker():
                             update_pog_stats(self.db, hotkey, result[1], result[2])
                         else:
                             raise RuntimeError("GPU test failed")
+                    except asyncio.TimeoutError:
+                        bt.logging.warning(f"⏳ Timeout while testing {hotkey}. Retrying...")
+                        retry_counts[hotkey] += 1
+                        if retry_counts[hotkey] < retry_limit:
+                            bt.logging.info(f"🔄 {hotkey}: Retrying miner -> (Attempt {retry_counts[hotkey]})")
+                            await asyncio.sleep(retry_interval)
+                            await queue.put(axon)
+                        else:
+                            bt.logging.info(f"❌ {hotkey}: Miner failed after {retry_limit} attempts (Timeout).")
+                            update_pog_stats(self.db, hotkey, None, None)
                     except Exception as e:
                         bt.logging.trace(f"Exception in worker for {hotkey}: {e}")
                         retry_counts[hotkey] += 1
@@ -668,7 +683,6 @@ async def worker():
                     finally:
                         queue.task_done()
 
-
             # Number of concurrent workers
             # Determine a safe default number of workers
             cpu_cores = os.cpu_count() or 1