Skip to content

Commit

Permalink
Merge pull request #259 from neuralinternet/feature/CSN-399-consensus…
Browse files Browse the repository at this point in the history
…-scoring-mechanism-wandb

feature/csn 399 consensus scoring mechanism wandb
  • Loading branch information
nauttiilus authored Jan 21, 2025
2 parents 9204d98 + e1cd033 commit 767eb72
Show file tree
Hide file tree
Showing 12 changed files with 580 additions and 175 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -251,5 +251,8 @@ test_speed_file.dat
# cert
cert/

# wandb
wandb/

# neural internet register api
neurons/register-api/
neurons/register-api/
2 changes: 1 addition & 1 deletion compute/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import string

# Define the version of the template module.
__version__ = "1.7.3"
__version__ = "1.7.4"
__minimal_miner_version__ = "1.6.0"
__minimal_validator_version__ = "1.7.3"

Expand Down
15 changes: 15 additions & 0 deletions compute/utils/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,21 @@ def init(self):
)
"""
)
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS stats (
uid INTEGER PRIMARY KEY,
hotkey TEXT NOT NULL,
gpu_specs TEXT,
score REAL,
allocated BOOLEAN,
own_score BOOLEAN,
reliability_score REAL, -- Optional reliability score
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (hotkey) REFERENCES miner_details (hotkey) ON DELETE CASCADE
)
"""
)

self.conn.commit()
except Exception as e:
Expand Down
206 changes: 168 additions & 38 deletions compute/wandb/wandb.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
import wandb
import pathlib
import os
import json
import hashlib
import json
from collections import Counter

from dotenv import load_dotenv
from compute.utils.db import ComputeDb
from neurons.Validator.database.pog import retrieve_stats, write_stats
from neurons.Validator.script import get_perf_info

PUBLIC_WANDB_NAME = "opencompute"
Expand Down Expand Up @@ -193,39 +195,38 @@ def update_stats(self, stats: dict):

def update_allocated_hotkeys(self, hotkey_list):
"""
This function updates the allocated hotkeys on validator side.
It's useless to alter this information as it needs to be signed by a valid validator hotkey.
This function updates the allocated hotkeys on the validator side and syncs the allocation with the database.
"""
self.api.flush()

# Step 1: Read penalized hotkeys from the file (penalized_hotkeys.json in the root directory)
penalized_hotkeys = []
try:
with open("penalized_hotkeys.json", 'r') as file:
penalized_hotkeys_data = json.load(file)
penalized_hotkeys = [entry["hotkey"] for entry in penalized_hotkeys_data] # Extract hotkeys
except FileNotFoundError:
bt.logging.trace("Penalized hotkeys file not found.")
except json.JSONDecodeError:
bt.logging.trace("Error decoding JSON from penalized hotkeys file.")
# Retrieve current stats from the database
stats = retrieve_stats(self.db)

# Update the configuration with the new keys
# update_dict = {
# "allocated_hotkeys": hotkey_list
# }
# Update the `allocated` field in stats based on `hotkey_list`
for uid, data in stats.items():
hotkey = data.get("hotkey")
if hotkey in hotkey_list:
data["allocated"] = True # Mark as allocated if the hotkey is in the list
else:
data["allocated"] = False # Mark as not allocated if the hotkey is not in the list

# Write the updated stats back to the database
write_stats(self.db, stats)

# Prepare the update dictionary for the configuration
update_dict = {
"allocated_hotkeys": hotkey_list, # Update allocated hotkeys
"penalized_hotkeys_checklist": penalized_hotkeys # Add penalized hotkeys to the config
}
"allocated_hotkeys": hotkey_list, # Update allocated hotkeys
"stats": stats # Updated stats with allocation status
}
self.run.config.update(update_dict, allow_val_change=True)

# Track allocated hotkeys over time
# Log the allocated hotkeys for tracking
self.run.log({"allocated_hotkeys": self.run.config["allocated_hotkeys"]})

# Sign the run
self.sign_run()


def update_penalized_hotkeys_checklist(self, hotkey_list):
"""
This function updates the penalized hotkeys checklist on validator side.
Expand Down Expand Up @@ -327,7 +328,119 @@ def get_allocated_hotkeys(self, valid_validator_hotkeys, flag):
bt.logging.info(f"Run ID: {run.id}, Name: {run.name}, Error: {e}")

return allocated_keys_list


def get_stats_allocated(self, valid_validator_hotkeys, flag):
"""
Aggregates stats from all validator runs on wandb, returning a dict keyed by UID.
Only includes entries where 'own_score' == True (and optionally 'allocated' == True).
Then picks one 'dominant' entry per UID and preserves all fields (e.g., allocated).
"""

# Query all validator runs
self.api.flush()
validator_runs = self.api.runs(
path=f"{PUBLIC_WANDB_ENTITY}/{PUBLIC_WANDB_NAME}",
filters={
"$and": [
{"config.role": "validator"},
{"config.config.netuid": self.config.netuid},
{"config.stats": {"$exists": True}},
]
}
)

if not validator_runs:
bt.logging.info("No validator info found in the project opencompute.")
return {}

# aggregator[uid] = list of dicts
aggregator = {}

for run in validator_runs:
try:
run_config = run.config
hotkey = run_config.get("hotkey")
stats_data = run_config.get("stats", {})

valid_validator_hotkey = (hotkey in valid_validator_hotkeys)
# If flag == False, allow *all* runs for data retrieval
if not flag:
valid_validator_hotkey = True

# Only accept data if run verified, we have stats, and hotkey is valid
if self.verify_run(run) and stats_data and valid_validator_hotkey:
# Iterate over the stats in that run
for uid, data in stats_data.items():
# If you also want allocated == True, re-enable that check:
# if data.get("own_score") is True and data.get("allocated") is True:
if data.get("own_score") is True:
aggregator.setdefault(uid, []).append(data)

except Exception as e:
bt.logging.info(f"Run ID: {run.id}, Name: {run.name}, Error: {e}")

# Helper function: pick the single "dominant" dict from valid_entries
def pick_dominant_dict(valid_entries):
"""
Groups by (gpu_name, num_gpus, score), finds the combo that appears most often.
In case of tie, picks the highest score. Returns the original dict.
"""
combos = []
for d in valid_entries:
specs = d.get("gpu_specs", {})
combo_key = (specs.get("gpu_name"), specs.get("num_gpus"), d.get("score", 0))
combos.append(combo_key)

c = Counter(combos)
if not c:
# Fallback if everything is zero or something else is wrong
return max(valid_entries, key=lambda x: x.get('score', 0))

# Find the top combo
top_combo, top_count = c.most_common(1)[0][0], c.most_common(1)[0][1]

# Check for ties and resolve
all_top_combos = [combo for combo, count in c.items() if count == top_count]
if len(all_top_combos) == 1:
chosen_combo = all_top_combos[0]
else:
# Tie: pick the one with the highest score
chosen_combo = max(all_top_combos, key=lambda x: x[2]) # Index 2 is the score

# Find the original dict in valid_entries matching the chosen combo
for d in valid_entries:
specs = d.get("gpu_specs", {})
triple = (specs.get("gpu_name"), specs.get("num_gpus"), d.get("score", 0))
if triple == chosen_combo:
# Do not overwrite "allocated", keep it as is
d["own_score"] = True # Mark as chosen
return d

final_stats = {}

for uid, entries in aggregator.items():
# Filter out zero-score entries if you want
valid_entries = [d for d in entries if d.get('score', 0) != 0]
if not valid_entries:
continue

# If there's exactly one valid entry, pick it
if len(valid_entries) == 1:
valid_entries[0]["own_score"] = True # Mark as chosen
final_stats[uid] = valid_entries[0]
else:
# Otherwise pick a single "dominant" dict
chosen_dict = pick_dominant_dict(valid_entries)
final_stats[uid] = chosen_dict

# Convert string UIDs to int if needed
final_stats_int_keys = {}
for uid_str, data in final_stats.items():
uid_int = int(uid_str)
final_stats_int_keys[uid_int] = data

return final_stats_int_keys

def get_penalized_hotkeys(self, valid_validator_hotkeys, flag):
"""
This function gets all allocated hotkeys from all validators.
Expand All @@ -336,12 +449,12 @@ def get_penalized_hotkeys(self, valid_validator_hotkeys, flag):
# Query all runs in the project and Filter runs where the role is 'validator'
self.api.flush()
validator_runs = self.api.runs(path=f"{PUBLIC_WANDB_ENTITY}/{PUBLIC_WANDB_NAME}",
filters={"$and": [{"config.role": "validator"},
{"config.config.netuid": self.config.netuid},
{"config.penalized_hotkeys": {"$exists": True}},]
filters={"$and": [{"config.role": "validator"},
{"config.config.netuid": self.config.netuid},
{"config.penalized_hotkeys": {"$exists": True}},]
})

# Check if the runs list is empty
# Check if the runs list is empty
if not validator_runs:
bt.logging.info("No validator info found in the project opencompute.")
return []
Expand Down Expand Up @@ -492,16 +605,33 @@ def verify_run(self, run):

return False

def get_penalized_hotkeys_checklist(self, valid_validator_hotkeys, flag):
""" This function gets penalized hotkeys checklist from your validator run """
if self.run:
try:
run_config = self.run.config
penalized_hotkeys_checklist = run_config.get('penalized_hotkeys_checklist')
return penalized_hotkeys_checklist
except Exception as e:
bt.logging.info(f"Run ID: {self.run.id}, Name: {self.run.name}, Error: {e}")
return []
def sync_allocated(self, hotkey):
"""
This function syncs the allocated status of the miner with the wandb run.
"""
# Fetch allocated hotkeys
allocated_hotkeys = self.get_allocated_hotkeys([], False)

if hotkey in allocated_hotkeys:
return True
else:
bt.logging.info(f"No run info found")
return []
return False

def get_penalized_hotkeys_checklist(self, valid_validator_hotkeys, flag):
""" This function gets penalized hotkeys checklist from a specific hardcoded validator. """
# Hardcoded run ID
run_id = "neuralinternet/opencompute/0djlnjjs"
# Fetch the specific run by its ID
self.api.flush()
run = self.api.run(run_id)
if not run:
bt.logging.info(f"No run info found for ID {run_id}.")
return []
# Access the run's configuration
try:
run_config = run.config
penalized_hotkeys_checklist = run_config.get('penalized_hotkeys_checklist')
return penalized_hotkeys_checklist
except Exception as e:
bt.logging.info(f"Run ID: {run.id}, Name: {run.name}, Error: {e}")
return []
1 change: 1 addition & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ gpu_performance:
NVIDIA H100: 2.80
NVIDIA A100-SXM4-80GB: 1.90
NVIDIA A100 80GB PCIe: 1.65
NVIDIA A100-SXM4-40GB: 1.30
NVIDIA L40s: 1.10
NVIDIA RTX 6000 Ada Generation: 0.90
NVIDIA L40: 1.0
Expand Down
4 changes: 2 additions & 2 deletions neurons/Validator/calculate_pow_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ def calc_score_pog(gpu_specs, hotkey, allocated_hotkeys, config_data, mock=False
score = score * 1

# Logging score
bt.logging.info(f"Score - {hotkey}: {score:.2f}/100")
# bt.logging.info(f"Score - {hotkey}: {score:.2f}/100")

# Normalize the score
normalized_score = normalize(score, 0, 100)

return normalized_score
except Exception as e:
bt.logging.error(f"An error occurred while calculating score for the following hotkey - {hotkey}: {e}")
return 0
return 0
8 changes: 8 additions & 0 deletions neurons/Validator/database/miner.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ def purge_miner_entries(db: ComputeDb, uid: int, hotkey: str):
"DELETE FROM miner_details WHERE hotkey = ?",
(hotkey,),
)
cursor.execute(
"DELETE FROM pog_stats WHERE hotkey = ?",
(hotkey,),
)
cursor.execute(
"DELETE FROM stats WHERE uid = ? AND hotkey = ?",
(uid, hotkey),
)
db.conn.commit()

if cursor.rowcount > 0:
Expand Down
Loading

0 comments on commit 767eb72

Please sign in to comment.