Skip to content

Commit

Permalink
Merge pull request #161 from ServiceNow/AB_res
Browse files Browse the repository at this point in the history
Ab res
  • Loading branch information
recursix authored Nov 29, 2024
2 parents 7dd91a7 + 4756d94 commit 9e9b800
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
7 changes: 7 additions & 0 deletions reproducibility_journal.csv
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,10 @@ ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,weblinx_test,0.0.1.dev13,202
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.079,0.005,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a,
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,workarena_l2_agent_curriculum_eval,0.4.1,2024-11-29_14-28-47,528da1f2-1949-41dc-b988-85f19f435af2,0.072,0.017,2,235/235,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.1,b115b2716d8a6328824684a692ed642297f0b1dc,,0.13.3,70dac253628c476aff1af6a975f27f8563453ad2,
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,miniwob,0.13.3,2024-11-29_16-14-00,4d748972-6d35-4489-a197-138b656a7db3,0.646,0.019,0,625/625,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.1,becb4856fb1612f44010fe74ef8155d367ca17fc,,0.13.3,70dac253628c476aff1af6a975f27f8563453ad2,
ThibaultLSDC,GenericAgent-gpt-4o,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.005,0.003,2,213/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,
ThibaultLSDC,GenericAgent-gpt-4o-mini,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.002,0.002,1,214/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.008,0.003,1,212/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.007,0.005,8,206/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,
ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-8b-instruct,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.001,0.001,15,214/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,
ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.007,0.003,1,212/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,
ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.009,0.005,1,214/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None,
2 changes: 1 addition & 1 deletion src/agentlab/experiments/study.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from abc import ABC, abstractmethod
import gzip
import logging
import pickle
Expand Down Expand Up @@ -405,7 +406,6 @@ def load_most_recent(root_dir: Path = None, contains=None) -> "Study":

def _make_study_name(agent_names, benchmark_names, suffix=None):
"""Make a study name from the agent and benchmark names."""

# extract unique agent and benchmark names
agent_names = list(set(agent_names))
benchmark_names = list(set(benchmark_names))
Expand Down

0 comments on commit 9e9b800

Please sign in to comment.