Skip to content

Commit

Permalink
Merge pull request #4066 from injet-zhou/main
Browse files Browse the repository at this point in the history
add throughput entry to training log
  • Loading branch information
hiyouga authored Jun 5, 2024
2 parents ca459f6 + b2f0459 commit f2580ad
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/llamafactory/extras/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,14 @@ def on_log(self, args: "TrainingArguments", state: "TrainerState", control: "Tra
percentage=round(self.cur_steps / self.max_steps * 100, 2) if self.max_steps != 0 else 100,
elapsed_time=self.elapsed_time,
remaining_time=self.remaining_time,
throughput="{:.2f}".format(state.num_input_tokens_seen / (time.time() - self.start_time)),
total_tokens=state.num_input_tokens_seen,
)
logs = {k: v for k, v in logs.items() if v is not None}
if self.webui_mode and all(key in logs for key in ["loss", "learning_rate", "epoch"]):
logger.info(
"{{'loss': {:.4f}, 'learning_rate': {:2.4e}, 'epoch': {:.2f}}}".format(
logs["loss"], logs["learning_rate"], logs["epoch"]
"{{'loss': {:.4f}, 'learning_rate': {:2.4e}, 'epoch': {:.2f}, 'throughput': {}}}".format(
logs["loss"], logs["learning_rate"], logs["epoch"], logs["throughput"]
)
)

Expand Down
1 change: 1 addition & 0 deletions src/llamafactory/webui/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def _parse_train_args(self, data: Dict["Component", Any]) -> Dict[str, Any]:
pure_bf16=(get("train.compute_type") == "pure_bf16"),
plot_loss=True,
ddp_timeout=180000000,
include_num_input_tokens_seen=True,
)

# checkpoints
Expand Down

0 comments on commit f2580ad

Please sign in to comment.