Spaces:

Bias-Leaderboard
/

leaderboard

Sleeping

meg-huggingface commited on Jan 21

Commit

f712ac1

•

1 Parent(s): 29ae773

Removing limitgit add src/envs.py

Files changed (2) hide show

src/backend/run_eval_suite.py CHANGED Viewed

@@ -5,7 +5,7 @@ from datetime import datetime
 from lm_eval import tasks, evaluator, utils
-from src.envs import RESULTS_REPO, API
 from src.backend.manage_requests import EvalRequest
 logging.getLogger("openai").setLevel(logging.WARNING)
@@ -13,7 +13,10 @@ logging.getLogger("openai").setLevel(logging.WARNING)
 def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, local_dir: str, results_repo: str, no_cache=True, limit=None):
     if limit:
         print(
-            "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
         )
     task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)

 from lm_eval import tasks, evaluator, utils
+from src.envs import API
 from src.backend.manage_requests import EvalRequest
 logging.getLogger("openai").setLevel(logging.WARNING)
 def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_size, device, local_dir: str, results_repo: str, no_cache=True, limit=None):
     if limit:
         print(
+            "!!!!!! WARNING: NOT A FULL EVALUATION !!!!! Just looking at %d."
+            "The `LIMIT` environment variable and/or `limit` in `run_evaluation` "
+            "SHOULD ONLY BE USED FOR TESTING. "
+            "REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT." % limit
         )
     task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)

src/envs.py CHANGED Viewed

@@ -13,7 +13,8 @@ TOKEN = os.environ.get("TOKEN")
 # Change to your org name
 OWNER = "Bias-Leaderboard"
 DEVICE = "cuda:0" # "cpu" or "cuda:0" if you add compute
-LIMIT = 20 # !!!! Should be None for actual evaluations!!!
 # ----------------------------------
 # Define some input/output variables.

 # Change to your org name
 OWNER = "Bias-Leaderboard"
 DEVICE = "cuda:0" # "cpu" or "cuda:0" if you add compute
+# !!!! None for actual evaluations. !!!!! Use a low val like 20 for debugging
+LIMIT = None # 20
 # ----------------------------------
 # Define some input/output variables.