Merge pull request #259 from neuralinternet/feature/CSN-399-consensus…

…-scoring-mechanism-wandb feature/csn 399 consensus scoring mechanism wandb
neuralinternet · Jan 21, 2025 · 767eb72 · 767eb72
2 parents 9204d98 + e1cd033
commit 767eb72
Show file tree

Hide file tree

Showing 12 changed files with 580 additions and 175 deletions.
diff --git a/.gitignore b/.gitignore
@@ -251,5 +251,8 @@ test_speed_file.dat
 # cert
 cert/
 
+# wandb
+wandb/
+
 # neural internet register api 
-neurons/register-api/
+neurons/register-api/
diff --git a/compute/__init__.py b/compute/__init__.py
@@ -18,7 +18,7 @@
 import string
 
 # Define the version of the template module.
-__version__ = "1.7.3"
+__version__ = "1.7.4"
 __minimal_miner_version__ = "1.6.0"
 __minimal_validator_version__ = "1.7.3"
 

diff --git a/compute/utils/db.py b/compute/utils/db.py
@@ -53,6 +53,21 @@ def init(self):
                 )
             """
             )
+            cursor.execute(
+                """
+                CREATE TABLE IF NOT EXISTS stats (
+                    uid INTEGER PRIMARY KEY,
+                    hotkey TEXT NOT NULL,
+                    gpu_specs TEXT,
+                    score REAL,
+                    allocated BOOLEAN,
+                    own_score BOOLEAN,
+                    reliability_score REAL,  -- Optional reliability score
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    FOREIGN KEY (hotkey) REFERENCES miner_details (hotkey) ON DELETE CASCADE
+                )
+                """
+            )
 
             self.conn.commit()
         except Exception as e:

diff --git a/compute/wandb/wandb.py b/compute/wandb/wandb.py
@@ -2,11 +2,13 @@
 import wandb
 import pathlib
 import os
-import json
 import hashlib
+import json
+from collections import Counter
 
 from dotenv import load_dotenv
 from compute.utils.db import ComputeDb
+from neurons.Validator.database.pog import retrieve_stats, write_stats
 from neurons.Validator.script import get_perf_info
 
 PUBLIC_WANDB_NAME = "opencompute"
@@ -193,39 +195,38 @@ def update_stats(self, stats: dict):
 
     def update_allocated_hotkeys(self, hotkey_list):
         """
-        This function updates the allocated hotkeys on validator side.
-        It's useless to alter this information as it needs to be signed by a valid validator hotkey.
+        This function updates the allocated hotkeys on the validator side and syncs the allocation with the database.
         """
         self.api.flush()
 
-        # Step 1: Read penalized hotkeys from the file (penalized_hotkeys.json in the root directory)
-        penalized_hotkeys = []
-        try:
-            with open("penalized_hotkeys.json", 'r') as file:
-                penalized_hotkeys_data = json.load(file)
-                penalized_hotkeys = [entry["hotkey"] for entry in penalized_hotkeys_data]  # Extract hotkeys
-        except FileNotFoundError:
-            bt.logging.trace("Penalized hotkeys file not found.")
-        except json.JSONDecodeError:
-            bt.logging.trace("Error decoding JSON from penalized hotkeys file.")
+        # Retrieve current stats from the database
+        stats = retrieve_stats(self.db)
 
-        # Update the configuration with the new keys
-        # update_dict = {
-        #         "allocated_hotkeys": hotkey_list
-        #     }
+        # Update the `allocated` field in stats based on `hotkey_list`
+        for uid, data in stats.items():
+            hotkey = data.get("hotkey")
+            if hotkey in hotkey_list:
+                data["allocated"] = True  # Mark as allocated if the hotkey is in the list
+            else:
+                data["allocated"] = False  # Mark as not allocated if the hotkey is not in the list
 
+        # Write the updated stats back to the database
+        write_stats(self.db, stats)
+
+        # Prepare the update dictionary for the configuration
         update_dict = {
-                "allocated_hotkeys": hotkey_list,               # Update allocated hotkeys
-                "penalized_hotkeys_checklist": penalized_hotkeys  # Add penalized hotkeys to the config
-                }
+            "allocated_hotkeys": hotkey_list,  # Update allocated hotkeys
+            "stats": stats  # Updated stats with allocation status
+        }
         self.run.config.update(update_dict, allow_val_change=True)
 
-        # Track allocated hotkeys over time
+        # Log the allocated hotkeys for tracking
         self.run.log({"allocated_hotkeys": self.run.config["allocated_hotkeys"]})
 
         # Sign the run
         self.sign_run()
 
+
     def update_penalized_hotkeys_checklist(self, hotkey_list):
         """
         This function updates the penalized hotkeys checklist on validator side.
@@ -327,7 +328,119 @@ def get_allocated_hotkeys(self, valid_validator_hotkeys, flag):
                 bt.logging.info(f"Run ID: {run.id}, Name: {run.name}, Error: {e}")
 
         return allocated_keys_list
-
+
+    def get_stats_allocated(self, valid_validator_hotkeys, flag):
+        """
+        Aggregates stats from all validator runs on wandb, returning a dict keyed by UID.
+        Only includes entries where 'own_score' == True (and optionally 'allocated' == True).
+        Then picks one 'dominant' entry per UID and preserves all fields (e.g., allocated).
+        """
+
+        # Query all validator runs
+        self.api.flush()
+        validator_runs = self.api.runs(
+            path=f"{PUBLIC_WANDB_ENTITY}/{PUBLIC_WANDB_NAME}",
+            filters={
+                "$and": [
+                    {"config.role": "validator"},
+                    {"config.config.netuid": self.config.netuid},
+                    {"config.stats": {"$exists": True}},
+                ]
+            }
+        )
+
+        if not validator_runs:
+            bt.logging.info("No validator info found in the project opencompute.")
+            return {}
+
+        # aggregator[uid] = list of dicts
+        aggregator = {}
+
+        for run in validator_runs:
+            try:
+                run_config = run.config
+                hotkey = run_config.get("hotkey")
+                stats_data = run_config.get("stats", {})
+
+                valid_validator_hotkey = (hotkey in valid_validator_hotkeys)
+                # If flag == False, allow *all* runs for data retrieval
+                if not flag:
+                    valid_validator_hotkey = True
+
+                # Only accept data if run verified, we have stats, and hotkey is valid
+                if self.verify_run(run) and stats_data and valid_validator_hotkey:
+                    # Iterate over the stats in that run
+                    for uid, data in stats_data.items():
+                        # If you also want allocated == True, re-enable that check:
+                        # if data.get("own_score") is True and data.get("allocated") is True:
+                        if data.get("own_score") is True:
+                            aggregator.setdefault(uid, []).append(data)
+
+            except Exception as e:
+                bt.logging.info(f"Run ID: {run.id}, Name: {run.name}, Error: {e}")
+
+        # Helper function: pick the single "dominant" dict from valid_entries
+        def pick_dominant_dict(valid_entries):
+            """
+            Groups by (gpu_name, num_gpus, score), finds the combo that appears most often.
+            In case of tie, picks the highest score. Returns the original dict.
+            """
+            combos = []
+            for d in valid_entries:
+                specs = d.get("gpu_specs", {})
+                combo_key = (specs.get("gpu_name"), specs.get("num_gpus"), d.get("score", 0))
+                combos.append(combo_key)
+
+            c = Counter(combos)
+            if not c:
+                # Fallback if everything is zero or something else is wrong
+                return max(valid_entries, key=lambda x: x.get('score', 0))
+
+            # Find the top combo
+            top_combo, top_count = c.most_common(1)[0][0], c.most_common(1)[0][1]
+
+            # Check for ties and resolve
+            all_top_combos = [combo for combo, count in c.items() if count == top_count]
+            if len(all_top_combos) == 1:
+                chosen_combo = all_top_combos[0]
+            else:
+                # Tie: pick the one with the highest score
+                chosen_combo = max(all_top_combos, key=lambda x: x[2])  # Index 2 is the score
+
+            # Find the original dict in valid_entries matching the chosen combo
+            for d in valid_entries:
+                specs = d.get("gpu_specs", {})
+                triple = (specs.get("gpu_name"), specs.get("num_gpus"), d.get("score", 0))
+                if triple == chosen_combo:
+                    # Do not overwrite "allocated", keep it as is
+                    d["own_score"] = True  # Mark as chosen
+                    return d
+
+        final_stats = {}
+
+        for uid, entries in aggregator.items():
+            # Filter out zero-score entries if you want
+            valid_entries = [d for d in entries if d.get('score', 0) != 0]
+            if not valid_entries:
+                continue
+
+            # If there's exactly one valid entry, pick it
+            if len(valid_entries) == 1:
+                valid_entries[0]["own_score"] = True  # Mark as chosen
+                final_stats[uid] = valid_entries[0]
+            else:
+                # Otherwise pick a single "dominant" dict
+                chosen_dict = pick_dominant_dict(valid_entries)
+                final_stats[uid] = chosen_dict
+
+        # Convert string UIDs to int if needed
+        final_stats_int_keys = {}
+        for uid_str, data in final_stats.items():
+            uid_int = int(uid_str)
+            final_stats_int_keys[uid_int] = data
+
+        return final_stats_int_keys
+
     def get_penalized_hotkeys(self, valid_validator_hotkeys, flag):
         """
         This function gets all allocated hotkeys from all validators.
@@ -336,12 +449,12 @@ def get_penalized_hotkeys(self, valid_validator_hotkeys, flag):
         # Query all runs in the project and Filter runs where the role is 'validator'
         self.api.flush()
         validator_runs = self.api.runs(path=f"{PUBLIC_WANDB_ENTITY}/{PUBLIC_WANDB_NAME}",
-                                       filters={"$and": [{"config.role": "validator"},
-                                                         {"config.config.netuid": self.config.netuid},
-                                                         {"config.penalized_hotkeys": {"$exists": True}},]
+                                    filters={"$and": [{"config.role": "validator"},
+                                                        {"config.config.netuid": self.config.netuid},
+                                                        {"config.penalized_hotkeys": {"$exists": True}},]
                                                 })
 
-         # Check if the runs list is empty
+        # Check if the runs list is empty
         if not validator_runs:
             bt.logging.info("No validator info found in the project opencompute.")
             return []
@@ -492,16 +605,33 @@ def verify_run(self, run):
 
         return False
 
-    def get_penalized_hotkeys_checklist(self, valid_validator_hotkeys, flag):
-        """ This function gets penalized hotkeys checklist from your validator run """
-        if self.run:
-            try:
-                run_config = self.run.config
-                penalized_hotkeys_checklist = run_config.get('penalized_hotkeys_checklist')
-                return penalized_hotkeys_checklist
-            except Exception as e:
-                bt.logging.info(f"Run ID: {self.run.id}, Name: {self.run.name}, Error: {e}")
-                return []
+    def sync_allocated(self, hotkey):
+        """
+        This function syncs the allocated status of the miner with the wandb run.
+        """
+        # Fetch allocated hotkeys
+        allocated_hotkeys = self.get_allocated_hotkeys([], False)
+
+        if hotkey in allocated_hotkeys:
+            return True
         else:
-            bt.logging.info(f"No run info found")
-            return []
+            return False
+
+    def get_penalized_hotkeys_checklist(self, valid_validator_hotkeys, flag): 
+        """ This function gets penalized hotkeys checklist from a specific hardcoded validator. """
+        # Hardcoded run ID
+        run_id = "neuralinternet/opencompute/0djlnjjs"
+        # Fetch the specific run by its ID
+        self.api.flush()
+        run = self.api.run(run_id) 
+        if not run: 
+            bt.logging.info(f"No run info found for ID {run_id}.") 
+            return []
+        # Access the run's configuration
+        try: 
+            run_config = run.config  
+            penalized_hotkeys_checklist = run_config.get('penalized_hotkeys_checklist')
+            return penalized_hotkeys_checklist 
+        except Exception as e: 
+            bt.logging.info(f"Run ID: {run.id}, Name: {run.name}, Error: {e}") 
+            return []
diff --git a/config.yaml b/config.yaml
@@ -72,6 +72,7 @@ gpu_performance:
     NVIDIA H100: 2.80
     NVIDIA A100-SXM4-80GB: 1.90
     NVIDIA A100 80GB PCIe: 1.65
+    NVIDIA A100-SXM4-40GB: 1.30
     NVIDIA L40s: 1.10
     NVIDIA RTX 6000 Ada Generation: 0.90
     NVIDIA L40: 1.0

diff --git a/neurons/Validator/calculate_pow_score.py b/neurons/Validator/calculate_pow_score.py
@@ -52,12 +52,12 @@ def calc_score_pog(gpu_specs, hotkey, allocated_hotkeys, config_data, mock=False
             score = score * 1
 
         # Logging score
-        bt.logging.info(f"Score - {hotkey}: {score:.2f}/100")
+        # bt.logging.info(f"Score - {hotkey}: {score:.2f}/100")
 
         # Normalize the score
         normalized_score = normalize(score, 0, 100)
 
         return normalized_score
     except Exception as e:
         bt.logging.error(f"An error occurred while calculating score for the following hotkey - {hotkey}: {e}")
-        return 0
+        return 0
diff --git a/neurons/Validator/database/miner.py b/neurons/Validator/database/miner.py
@@ -83,6 +83,14 @@ def purge_miner_entries(db: ComputeDb, uid: int, hotkey: str):
             "DELETE FROM miner_details WHERE hotkey = ?",
             (hotkey,),
         )
+        cursor.execute(
+            "DELETE FROM pog_stats WHERE hotkey = ?",
+            (hotkey,),
+        )
+        cursor.execute(
+            "DELETE FROM stats WHERE uid = ? AND hotkey = ?",
+            (uid, hotkey),
+        )
         db.conn.commit()
 
         if cursor.rowcount > 0: