fix eval_bs in fake format and reset auto-gptq exporting max_shard_si…

…ze (#332)
intel · Nov 22, 2024 · 8fb9552 · 8fb9552
1 parent 58c11fa
commit 8fb9552
Show file tree

Hide file tree

Showing 3 changed files with 2 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -27,7 +27,6 @@ more accuracy data and recipes across various models.
 
 ## What's New
 * [2024/11] We provide some tips and tricks for LLM&VLM quantization, please check out [this file](./docs/tips_and_tricks.md).
-* 
 * [2024/10] AutoRound has been integrated to [torch/ao](https://github.com/pytorch/ao), check out
   their [release note](https://github.com/pytorch/ao/releases/tag/v0.6.1)
 * [2024/10] Important update: We now support full-range symmetric quantization and have made it the default

diff --git a/auto_round/export/export_to_autogptq/export.py b/auto_round/export/export_to_autogptq/export.py
@@ -213,7 +213,7 @@ def save(model: torch.nn.Module, save_dir: str, max_shard_size: str = "5GB", saf
         safe_serialization (`bool`, defaults to `True`):
             Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
     """
-    max_shard_size = "10000GB"  ## API of auto-gptq with marlin does not support shard size
+    ##max_shard_size = "10000GB"  ## API of auto-gptq with marlin does not support shard size
     os.makedirs(save_dir, exist_ok=True)
     model.save_pretrained(save_dir, max_shard_size=max_shard_size, safe_serialization=safe_serialization)
     config_file = "quantize_config.json"

diff --git a/auto_round/script/llm.py b/auto_round/script/llm.py
@@ -458,7 +458,7 @@ def tune(args):
                 user_model = model
             else:
                 user_model = model.to(device_str)
-            if args.eval_bs == "auto":
+            if args.eval_bs is None or args.eval_bs == "auto":
                 args.eval_bs = 16
             from auto_round.eval.evaluation import simple_evaluate_user_model
             res = simple_evaluate_user_model(user_model, tokenizer, tasks=tasks, batch_size=args.eval_bs)