You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
The Formatter.apply method should handle None values gracefully, either by skipping the replacement or substituting a default value.
System Info
Traceback (most recent call last):
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/train_bash.py", line 14, in
main()
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/train_bash.py", line 5, in main
run_exp()
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/train/tuner.py", line 31, in run_exp
run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/train/sft/workflow.py", line 32, in run_sft
dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="sft")
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/data/loader.py", line 180, in get_dataset
dataset = dataset.map(preprocess_func, batched=True, remove_columns=column_names, **kwargs)
File "/nvme/share/share_data/yangyihe/envs/llama_factory/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 592, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "/nvme/share/share_data/yangyihe/envs/llama_factory/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 557, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "/nvme/share/share_data/yangyihe/envs/llama_factory/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3093, in map
for rank, done, content in Dataset._map_single(**dataset_kwargs):
File "/nvme/share/share_data/yangyihe/envs/llama_factory/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3470, in _map_single
batch = apply_function_on_filtered_inputs(
File "/nvme/share/share_data/yangyihe/envs/llama_factory/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3349, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/data/preprocess.py", line 60, in preprocess_supervised_dataset
template.encode_multiturn(
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/data/template.py", line 65, in encode_multiturn
return self._encode(tokenizer, messages, system, tools, cutoff_len, reserved_label_len)
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/data/template.py", line 94, in _encode
elements += self.format_assistant.apply(content=message["content"])
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/data/formatter.py", line 99, in apply
slot = slot.replace("{{" + name + "}}", value, 1)
TypeError: replace() argument 2 must be str, not None
Others
Possible Fix
Modify the Formatter.apply method to check for None and replace it with a default string. Here is a suggested change:
for slot in self.slots:
if isinstance(slot, str):
for name, value in kwargs.items():
value = value if value is not None else ""
slot = slot.replace("{{" + name + "}}", value, 1)
elements.append(slot)
The text was updated successfully, but these errors were encountered:
Reminder
Reproduction
accelerate launch --config_file config.yaml src/train_bash.py
--stage sft
--do_train
--model_name_or_path ${MODEL_NAME}
--dataset alpaca_zh
--dataloader_persistent_workers
--dataloader_num_workers 8
--template default
--use_unsloth
--finetuning_type ${FINETUNING_TYPE}
--lora_target q_proj,v_proj
--lora_rank 8
--lora_dropout 0.0
--output_dir ${SAVE_DIR}
--overwrite_cache
--per_device_train_batch_size 20
--gradient_accumulation_steps 4
--lr_scheduler_type cosine
--logging_steps 100
--save_steps 3000
--learning_rate 5e-5
--num_train_epochs 3.0
--plot_loss
--fp16
Expected behavior
The Formatter.apply method should handle None values gracefully, either by skipping the replacement or substituting a default value.
System Info
Traceback (most recent call last):
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/train_bash.py", line 14, in
main()
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/train_bash.py", line 5, in main
run_exp()
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/train/tuner.py", line 31, in run_exp
run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/train/sft/workflow.py", line 32, in run_sft
dataset = get_dataset(tokenizer, model_args, data_args, training_args, stage="sft")
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/data/loader.py", line 180, in get_dataset
dataset = dataset.map(preprocess_func, batched=True, remove_columns=column_names, **kwargs)
File "/nvme/share/share_data/yangyihe/envs/llama_factory/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 592, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "/nvme/share/share_data/yangyihe/envs/llama_factory/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 557, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
File "/nvme/share/share_data/yangyihe/envs/llama_factory/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3093, in map
for rank, done, content in Dataset._map_single(**dataset_kwargs):
File "/nvme/share/share_data/yangyihe/envs/llama_factory/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3470, in _map_single
batch = apply_function_on_filtered_inputs(
File "/nvme/share/share_data/yangyihe/envs/llama_factory/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 3349, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/data/preprocess.py", line 60, in preprocess_supervised_dataset
template.encode_multiturn(
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/data/template.py", line 65, in encode_multiturn
return self._encode(tokenizer, messages, system, tools, cutoff_len, reserved_label_len)
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/data/template.py", line 94, in _encode
elements += self.format_assistant.apply(content=message["content"])
File "/home/zhuwenhui.p/project/LLaMA-Factory/src/llmtuner/data/formatter.py", line 99, in apply
slot = slot.replace("{{" + name + "}}", value, 1)
TypeError: replace() argument 2 must be str, not None
Others
Possible Fix
Modify the Formatter.apply method to check for None and replace it with a default string. Here is a suggested change:
The text was updated successfully, but these errors were encountered: