Skip to content

Commit

Permalink
fix #2777 #2895
Browse files Browse the repository at this point in the history
  • Loading branch information
hiyouga committed Mar 20, 2024
1 parent 7b8f502 commit 9bec3c9
Show file tree
Hide file tree
Showing 12 changed files with 104 additions and 48 deletions.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,9 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
#### Use Huggingface Accelerate

```bash
accelerate launch --config_file config.yaml src/train_bash.py # arguments (same as above)
accelerate launch --config_file config.yaml src/train_bash.py \
--ddp_timeout 180000000 \
... # arguments (same as above)
```

<details><summary>Example config.yaml for LoRA training</summary>
Expand Down Expand Up @@ -519,8 +521,8 @@ use_cpu: false
```bash
deepspeed --num_gpus 8 src/train_bash.py \
--deepspeed ds_config.json \
--ddp_timeout 180000000 \
--deepspeed ds_config.json \
--ddp_timeout 180000000 \
... # arguments (same as above)
```

Expand Down
7 changes: 4 additions & 3 deletions README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,9 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
#### 使用 Huggingface Accelerate

```bash
accelerate launch --config_file config.yaml src/train_bash.py # 参数同上
accelerate launch --config_file config.yaml src/train_bash.py \
--ddp_timeout 180000000 \
... # 参数同上
```

<details><summary>使用 Accelerate 进行 LoRA 训练的 config.yaml 示例</summary>
Expand Down Expand Up @@ -519,9 +521,8 @@ use_cpu: false
```bash
deepspeed --num_gpus 8 src/train_bash.py \
--deepspeed ds_config.json \
--ddp_timeout 180000000 \
--ddp_timeout 180000000 \
... # 参数同上

```

<details><summary>使用 DeepSpeed ZeRO-2 进行全参数训练的 ds_config.json 示例</summary>
Expand Down
12 changes: 12 additions & 0 deletions src/llmtuner/train/dpo/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@
from trl.trainer.utils import disable_dropout_in_model

from ...extras.constants import IGNORE_INDEX
from ..utils import create_custom_optimzer


if TYPE_CHECKING:
from transformers import PreTrainedModel

from ...hparams import FinetuningArguments


class CustomDPOTrainer(DPOTrainer):
def __init__(
Expand All @@ -21,6 +24,7 @@ def __init__(
loss_type: Literal["sigmoid", "hinge", "ipo", "kto_pair"],
ftx_gamma: float,
model: Union["PreTrainedModel", torch.nn.Module],
finetuning_args: "FinetuningArguments",
ref_model: Optional[Union["PreTrainedModel", torch.nn.Module]] = None,
disable_dropout: bool = True,
**kwargs,
Expand All @@ -30,6 +34,7 @@ def __init__(
if ref_model is not None:
disable_dropout_in_model(ref_model)

self.finetuning_args = finetuning_args
self.reference_free = False
self.use_dpo_data_collator = True # hack to avoid warning
self.generate_during_eval = False # disable at evaluation
Expand Down Expand Up @@ -61,6 +66,13 @@ def __init__(
else:
self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True)

def create_optimizer_and_scheduler(self, num_training_steps: int) -> None:
self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args, num_training_steps)
if self.optimizer is None:
self.create_optimizer()

self.create_scheduler(num_training_steps=num_training_steps, optimizer=self.optimizer)

def sft_loss(self, chosen_logits: torch.FloatTensor, chosen_labels: torch.LongTensor) -> torch.Tensor:
r"""
Computes supervised cross-entropy loss of given labels under the given logits.
Expand Down
5 changes: 2 additions & 3 deletions src/llmtuner/train/dpo/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ...extras.ploting import plot_loss
from ...hparams import ModelArguments
from ...model import load_model, load_tokenizer
from ..utils import create_custom_optimzer, create_modelcard_and_push, create_ref_model
from ..utils import create_modelcard_and_push, create_ref_model
from .collator import DPODataCollatorWithPadding
from .trainer import CustomDPOTrainer

Expand Down Expand Up @@ -44,18 +44,17 @@ def run_dpo(
training_args.remove_unused_columns = False # important for pairwise dataset

# Initialize our Trainer
optimizer = create_custom_optimzer(model, dataset, training_args, finetuning_args)
trainer = CustomDPOTrainer(
beta=finetuning_args.dpo_beta,
loss_type=finetuning_args.dpo_loss,
ftx_gamma=finetuning_args.dpo_ftx,
finetuning_args=finetuning_args,
model=model,
ref_model=ref_model,
args=training_args,
tokenizer=tokenizer,
data_collator=data_collator,
callbacks=callbacks,
optimizers=(optimizer, None),
**split_dataset(dataset, data_args, training_args),
)

Expand Down
8 changes: 4 additions & 4 deletions src/llmtuner/train/ppo/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,16 +64,16 @@ def run_ppo(
)

# Create optimizer and scheduler
optimizer = create_custom_optimzer(model, dataset, training_args, finetuning_args)
if optimizer is None:
optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=training_args.learning_rate)

if training_args.max_steps > 0:
num_training_steps = training_args.max_steps
else:
total_train_batch_size = backward_batch_size * finetuning_args.ppo_buffer_size * training_args.world_size
num_training_steps = training_args.num_train_epochs * math.ceil(len(dataset) / total_train_batch_size)

optimizer = create_custom_optimzer(model, training_args, finetuning_args, num_training_steps)
if optimizer is None:
optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=training_args.learning_rate)

lr_scheduler = get_scheduler(
training_args.lr_scheduler_type,
optimizer=optimizer,
Expand Down
30 changes: 30 additions & 0 deletions src/llmtuner/train/pt/trainer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from typing import TYPE_CHECKING

from transformers import Trainer

from ...extras.logging import get_logger
from ..utils import create_custom_optimzer


if TYPE_CHECKING:
from ...hparams import FinetuningArguments


logger = get_logger(__name__)


class CustomTrainer(Trainer):
r"""
Inherits Trainer for custom optimizer.
"""

def __init__(self, finetuning_args: "FinetuningArguments", **kwargs) -> None:
super().__init__(**kwargs)
self.finetuning_args = finetuning_args

def create_optimizer_and_scheduler(self, num_training_steps: int) -> None:
self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args, num_training_steps)
if self.optimizer is None:
self.create_optimizer()

self.create_scheduler(num_training_steps=num_training_steps, optimizer=self.optimizer)
10 changes: 5 additions & 5 deletions src/llmtuner/train/pt/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
import math
from typing import TYPE_CHECKING, List, Optional

from transformers import DataCollatorForLanguageModeling, Trainer
from transformers import DataCollatorForLanguageModeling

from ...data import get_dataset, split_dataset
from ...extras.ploting import plot_loss
from ...model import load_model, load_tokenizer
from ..utils import create_custom_optimzer, create_modelcard_and_push
from ..utils import create_modelcard_and_push
from .trainer import CustomTrainer


if TYPE_CHECKING:
Expand All @@ -30,14 +31,13 @@ def run_pt(
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

# Initialize our Trainer
optimizer = create_custom_optimzer(model, dataset, training_args, finetuning_args)
trainer = Trainer(
trainer = CustomTrainer(
model=model,
args=training_args,
finetuning_args=finetuning_args,
tokenizer=tokenizer,
data_collator=data_collator,
callbacks=callbacks,
optimizers=(optimizer, None),
**split_dataset(dataset, data_args, training_args),
)

Expand Down
17 changes: 14 additions & 3 deletions src/llmtuner/train/rm/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,36 @@
from transformers import Trainer

from ...extras.logging import get_logger
from ..utils import create_custom_optimzer


if TYPE_CHECKING:
from transformers.modeling_utils import PreTrainedModel
from transformers.trainer import PredictionOutput

from ...hparams import FinetuningArguments


logger = get_logger(__name__)


class PairwiseTrainer(Trainer):
r"""
Inherits PeftTrainer to compute pairwise loss.
Inherits Trainer to compute pairwise loss.
"""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __init__(self, finetuning_args: "FinetuningArguments", **kwargs) -> None:
super().__init__(**kwargs)
self.finetuning_args = finetuning_args
self.can_return_loss = True # override property to return eval_loss

def create_optimizer_and_scheduler(self, num_training_steps: int) -> None:
self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args, num_training_steps)
if self.optimizer is None:
self.create_optimizer()

self.create_scheduler(num_training_steps=num_training_steps, optimizer=self.optimizer)

def compute_loss(
self, model: "PreTrainedModel", inputs: Dict[str, torch.Tensor], return_outputs: bool = False
) -> Union[torch.Tensor, Tuple[torch.Tensor, List[torch.Tensor]]]:
Expand Down
5 changes: 2 additions & 3 deletions src/llmtuner/train/rm/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ...extras.misc import fix_valuehead_checkpoint
from ...extras.ploting import plot_loss
from ...model import load_model, load_tokenizer
from ..utils import create_custom_optimzer, create_modelcard_and_push
from ..utils import create_modelcard_and_push
from .collator import PairwiseDataCollatorWithPadding
from .metric import compute_accuracy
from .trainer import PairwiseTrainer
Expand Down Expand Up @@ -35,14 +35,13 @@ def run_rm(
training_args.remove_unused_columns = False # important for pairwise dataset

# Initialize our Trainer
optimizer = create_custom_optimzer(model, dataset, training_args, finetuning_args)
trainer = PairwiseTrainer(
model=model,
args=training_args,
finetuning_args=finetuning_args,
tokenizer=tokenizer,
data_collator=data_collator,
callbacks=callbacks + [FixValueHeadModelCallback()],
optimizers=(optimizer, None),
compute_metrics=compute_accuracy,
**split_dataset(dataset, data_args, training_args),
)
Expand Down
19 changes: 16 additions & 3 deletions src/llmtuner/train/sft/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,41 @@

import numpy as np
import torch
import torch.nn as nn
from transformers import Seq2SeqTrainer

from ...extras.constants import IGNORE_INDEX
from ...extras.logging import get_logger
from ..utils import create_custom_optimzer


if TYPE_CHECKING:
from transformers.trainer import PredictionOutput

from ...hparams import FinetuningArguments


logger = get_logger(__name__)


class CustomSeq2SeqTrainer(Seq2SeqTrainer):
r"""
Inherits PeftTrainer to compute generative metrics such as BLEU and ROUGE.
Inherits Seq2SeqTrainer to compute generative metrics such as BLEU and ROUGE.
"""

def __init__(self, finetuning_args: "FinetuningArguments", **kwargs) -> None:
super().__init__(**kwargs)
self.finetuning_args = finetuning_args

def create_optimizer_and_scheduler(self, num_training_steps: int) -> None:
self.optimizer = create_custom_optimzer(self.model, self.args, self.finetuning_args, num_training_steps)
if self.optimizer is None:
self.create_optimizer()

self.create_scheduler(num_training_steps=num_training_steps, optimizer=self.optimizer)

def prediction_step(
self,
model: nn.Module,
model: "torch.nn.Module",
inputs: Dict[str, Union[torch.Tensor, Any]],
prediction_loss_only: bool,
ignore_keys: Optional[List[str]] = None,
Expand Down
10 changes: 4 additions & 6 deletions src/llmtuner/train/sft/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@
from ...extras.misc import get_logits_processor
from ...extras.ploting import plot_loss
from ...model import load_model, load_tokenizer
from ...train.sft.metric import ComputeMetrics
from ...train.sft.trainer import CustomSeq2SeqTrainer
from ...train.utils import create_modelcard_and_push
from ..utils import create_custom_optimzer
from ..utils import create_modelcard_and_push
from .metric import ComputeMetrics
from .trainer import CustomSeq2SeqTrainer


if TYPE_CHECKING:
Expand Down Expand Up @@ -50,14 +49,13 @@ def run_sft(
training_args.generation_num_beams = data_args.eval_num_beams or training_args.generation_num_beams

# Initialize our Trainer
optimizer = create_custom_optimzer(model, dataset, training_args, finetuning_args)
trainer = CustomSeq2SeqTrainer(
model=model,
args=training_args,
finetuning_args=finetuning_args,
tokenizer=tokenizer,
data_collator=data_collator,
callbacks=callbacks,
optimizers=(optimizer, None),
compute_metrics=ComputeMetrics(tokenizer) if training_args.predict_with_generate else None,
**split_dataset(dataset, data_args, training_args),
)
Expand Down
Loading

0 comments on commit 9bec3c9

Please sign in to comment.