You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I have read the README and searched the existing issues.
System Info
root@4417b4b2e765:/app# llamafactory-cli env
[2024-06-22 04:05:13,124] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[WARNING] async_io requires the dev libaio .so object and headers but these were not found.
[WARNING] async_io: please install the libaio-dev package with apt
[WARNING] If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found.
[WARNING] Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH
[WARNING] sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.2
[WARNING] using untested triton version (2.1.0), only 1.0.0 is known to be compatible
Reminder
System Info
root@4417b4b2e765:/app# llamafactory-cli env
[2024-06-22 04:05:13,124] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[WARNING] async_io requires the dev libaio .so object and headers but these were not found.
[WARNING] async_io: please install the libaio-dev package with apt
[WARNING] If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found.
[WARNING] Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH
[WARNING] sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.2
[WARNING] using untested triton version (2.1.0), only 1.0.0 is known to be compatible
llamafactory
version: 0.8.3.dev0Reproduction
还没有到训练那一步,就cli train就没啥
root@4417b4b2e765:/app# llamafactory-cli train
[2024-06-22 04:07:50,556] [INFO] [real_accelerator.py:203:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[WARNING] async_io requires the dev libaio .so object and headers but these were not found.
[WARNING] async_io: please install the libaio-dev package with apt
[WARNING] If libaio is already installed (perhaps from source), try setting the CFLAGS and LDFLAGS environment variables to where it can be found.
[WARNING] Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH
[WARNING] sparse_attn requires a torch version >= 1.5 and < 2.0 but detected 2.2
[WARNING] using untested triton version (2.1.0), only 1.0.0 is known to be compatible
usage: llamafactory-cli [-h] --model_name_or_path MODEL_NAME_OR_PATH [--adapter_name_or_path ADAPTER_NAME_OR_PATH] [--adapter_folder ADAPTER_FOLDER] [--cache_dir CACHE_DIR] [--use_fast_tokenizer [USE_FAST_TOKENIZER]]
[--no_use_fast_tokenizer] [--resize_vocab [RESIZE_VOCAB]] [--split_special_tokens [SPLIT_SPECIAL_TOKENS]] [--new_special_tokens NEW_SPECIAL_TOKENS] [--model_revision MODEL_REVISION]
[--low_cpu_mem_usage [LOW_CPU_MEM_USAGE]] [--no_low_cpu_mem_usage] [--quantization_bit QUANTIZATION_BIT] [--quantization_type {fp4,nf4}] [--double_quantization [DOUBLE_QUANTIZATION]]
[--no_double_quantization] [--quantization_device_map {auto}] [--rope_scaling {linear,dynamic}] [--flash_attn {off,sdpa,fa2,auto}] [--shift_attn [SHIFT_ATTN]] [--mixture_of_depths {convert,load}]
[--use_unsloth [USE_UNSLOTH]] [--visual_inputs [VISUAL_INPUTS]] [--moe_aux_loss_coef MOE_AUX_LOSS_COEF] [--disable_gradient_checkpointing [DISABLE_GRADIENT_CHECKPOINTING]]
[--upcast_layernorm [UPCAST_LAYERNORM]] [--upcast_lmhead_output [UPCAST_LMHEAD_OUTPUT]] [--train_from_scratch [TRAIN_FROM_SCRATCH]] [--infer_backend {huggingface,vllm}] [--vllm_maxlen VLLM_MAXLEN]
[--vllm_gpu_util VLLM_GPU_UTIL] [--vllm_enforce_eager [VLLM_ENFORCE_EAGER]] [--vllm_max_lora_rank VLLM_MAX_LORA_RANK] [--offload_folder OFFLOAD_FOLDER] [--use_cache [USE_CACHE]] [--no_use_cache]
[--infer_dtype {auto,float16,bfloat16,float32}] [--hf_hub_token HF_HUB_TOKEN] [--ms_hub_token MS_HUB_TOKEN] [--export_dir EXPORT_DIR] [--export_size EXPORT_SIZE] [--export_device {cpu,auto}]
[--export_quantization_bit EXPORT_QUANTIZATION_BIT] [--export_quantization_dataset EXPORT_QUANTIZATION_DATASET] [--export_quantization_nsamples EXPORT_QUANTIZATION_NSAMPLES]
[--export_quantization_maxlen EXPORT_QUANTIZATION_MAXLEN] [--export_legacy_format [EXPORT_LEGACY_FORMAT]] [--export_hub_model_id EXPORT_HUB_MODEL_ID] [--print_param_status [PRINT_PARAM_STATUS]]
[--template TEMPLATE] [--dataset DATASET] [--dataset_dir DATASET_DIR] [--split SPLIT] [--cutoff_len CUTOFF_LEN] [--reserved_label_len RESERVED_LABEL_LEN] [--train_on_prompt [TRAIN_ON_PROMPT]]
[--streaming [STREAMING]] [--buffer_size BUFFER_SIZE] [--mix_strategy {concat,interleave_under,interleave_over}] [--interleave_probs INTERLEAVE_PROBS] [--overwrite_cache [OVERWRITE_CACHE]]
[--preprocessing_num_workers PREPROCESSING_NUM_WORKERS] [--max_samples MAX_SAMPLES] [--eval_num_beams EVAL_NUM_BEAMS] [--ignore_pad_token_for_loss [IGNORE_PAD_TOKEN_FOR_LOSS]]
[--no_ignore_pad_token_for_loss] [--val_size VAL_SIZE] [--packing PACKING] [--tokenized_path TOKENIZED_PATH] --output_dir OUTPUT_DIR [--overwrite_output_dir [OVERWRITE_OUTPUT_DIR]]
[--do_train [DO_TRAIN]] [--do_eval [DO_EVAL]] [--do_predict [DO_PREDICT]] [--eval_strategy {no,steps,epoch}] [--prediction_loss_only [PREDICTION_LOSS_ONLY]]
[--per_device_train_batch_size PER_DEVICE_TRAIN_BATCH_SIZE] [--per_device_eval_batch_size PER_DEVICE_EVAL_BATCH_SIZE] [--per_gpu_train_batch_size PER_GPU_TRAIN_BATCH_SIZE]
[--per_gpu_eval_batch_size PER_GPU_EVAL_BATCH_SIZE] [--gradient_accumulation_steps GRADIENT_ACCUMULATION_STEPS] [--eval_accumulation_steps EVAL_ACCUMULATION_STEPS] [--eval_delay EVAL_DELAY]
[--learning_rate LEARNING_RATE] [--weight_decay WEIGHT_DECAY] [--adam_beta1 ADAM_BETA1] [--adam_beta2 ADAM_BETA2] [--adam_epsilon ADAM_EPSILON] [--max_grad_norm MAX_GRAD_NORM]
[--num_train_epochs NUM_TRAIN_EPOCHS] [--max_steps MAX_STEPS]
[--lr_scheduler_type {linear,cosine,cosine_with_restarts,polynomial,constant,constant_with_warmup,inverse_sqrt,reduce_lr_on_plateau,cosine_with_min_lr,warmup_stable_decay}]
[--lr_scheduler_kwargs LR_SCHEDULER_KWARGS] [--warmup_ratio WARMUP_RATIO] [--warmup_steps WARMUP_STEPS] [--log_level {detail,debug,info,warning,error,critical,passive}]
[--log_level_replica {detail,debug,info,warning,error,critical,passive}] [--log_on_each_node [LOG_ON_EACH_NODE]] [--no_log_on_each_node] [--logging_dir LOGGING_DIR]
[--logging_strategy {no,steps,epoch}] [--logging_first_step [LOGGING_FIRST_STEP]] [--logging_steps LOGGING_STEPS] [--logging_nan_inf_filter [LOGGING_NAN_INF_FILTER]] [--no_logging_nan_inf_filter]
[--save_strategy {no,steps,epoch}] [--save_steps SAVE_STEPS] [--save_total_limit SAVE_TOTAL_LIMIT] [--save_safetensors [SAVE_SAFETENSORS]] [--no_save_safetensors]
[--save_on_each_node [SAVE_ON_EACH_NODE]] [--save_only_model [SAVE_ONLY_MODEL]] [--restore_callback_states_from_checkpoint [RESTORE_CALLBACK_STATES_FROM_CHECKPOINT]] [--no_cuda [NO_CUDA]]
[--use_cpu [USE_CPU]] [--use_mps_device [USE_MPS_DEVICE]] [--seed SEED] [--data_seed DATA_SEED] [--jit_mode_eval [JIT_MODE_EVAL]] [--use_ipex [USE_IPEX]] [--bf16 [BF16]] [--fp16 [FP16]]
[--fp16_opt_level FP16_OPT_LEVEL] [--half_precision_backend {auto,apex,cpu_amp}] [--bf16_full_eval [BF16_FULL_EVAL]] [--fp16_full_eval [FP16_FULL_EVAL]] [--tf32 TF32] [--local_rank LOCAL_RANK]
[--ddp_backend {nccl,gloo,mpi,ccl,hccl,cncl}] [--tpu_num_cores TPU_NUM_CORES] [--tpu_metrics_debug [TPU_METRICS_DEBUG]] [--debug DEBUG [DEBUG ...]] [--dataloader_drop_last [DATALOADER_DROP_LAST]]
[--eval_steps EVAL_STEPS] [--dataloader_num_workers DATALOADER_NUM_WORKERS] [--dataloader_prefetch_factor DATALOADER_PREFETCH_FACTOR] [--past_index PAST_INDEX] [--run_name RUN_NAME]
[--disable_tqdm DISABLE_TQDM] [--remove_unused_columns [REMOVE_UNUSED_COLUMNS]] [--no_remove_unused_columns] [--label_names LABEL_NAMES [LABEL_NAMES ...]]
[--load_best_model_at_end [LOAD_BEST_MODEL_AT_END]] [--metric_for_best_model METRIC_FOR_BEST_MODEL] [--greater_is_better GREATER_IS_BETTER] [--ignore_data_skip [IGNORE_DATA_SKIP]] [--fsdp FSDP]
[--fsdp_min_num_params FSDP_MIN_NUM_PARAMS] [--fsdp_config FSDP_CONFIG] [--fsdp_transformer_layer_cls_to_wrap FSDP_TRANSFORMER_LAYER_CLS_TO_WRAP] [--accelerator_config ACCELERATOR_CONFIG]
[--deepspeed DEEPSPEED] [--label_smoothing_factor LABEL_SMOOTHING_FACTOR]
[--optim {adamw_hf,adamw_torch,adamw_torch_fused,adamw_torch_xla,adamw_torch_npu_fused,adamw_apex_fused,adafactor,adamw_anyprecision,sgd,adagrad,adamw_bnb_8bit,adamw_8bit,lion_8bit,lion_32bit,paged_adamw_32bit,paged_adamw_8bit,paged_lion_32bit,paged_lion_8bit,rmsprop,rmsprop_bnb,rmsprop_bnb_8bit,rmsprop_bnb_32bit,galore_adamw,galore_adamw_8bit,galore_adafactor,galore_adamw_layerwise,galore_adamw_8bit_layerwise,galore_adafactor_layerwise}]
[--optim_args OPTIM_ARGS] [--adafactor [ADAFACTOR]] [--group_by_length [GROUP_BY_LENGTH]] [--length_column_name LENGTH_COLUMN_NAME] [--report_to REPORT_TO]
[--ddp_find_unused_parameters DDP_FIND_UNUSED_PARAMETERS] [--ddp_bucket_cap_mb DDP_BUCKET_CAP_MB] [--ddp_broadcast_buffers DDP_BROADCAST_BUFFERS] [--dataloader_pin_memory [DATALOADER_PIN_MEMORY]]
[--no_dataloader_pin_memory] [--dataloader_persistent_workers [DATALOADER_PERSISTENT_WORKERS]] [--skip_memory_metrics [SKIP_MEMORY_METRICS]] [--no_skip_memory_metrics]
[--use_legacy_prediction_loop [USE_LEGACY_PREDICTION_LOOP]] [--push_to_hub [PUSH_TO_HUB]] [--resume_from_checkpoint RESUME_FROM_CHECKPOINT] [--hub_model_id HUB_MODEL_ID]
[--hub_strategy {end,every_save,checkpoint,all_checkpoints}] [--hub_token HUB_TOKEN] [--hub_private_repo [HUB_PRIVATE_REPO]] [--hub_always_push [HUB_ALWAYS_PUSH]]
[--gradient_checkpointing [GRADIENT_CHECKPOINTING]] [--gradient_checkpointing_kwargs GRADIENT_CHECKPOINTING_KWARGS] [--include_inputs_for_metrics [INCLUDE_INPUTS_FOR_METRICS]]
[--eval_do_concat_batches [EVAL_DO_CONCAT_BATCHES]] [--no_eval_do_concat_batches] [--fp16_backend {auto,apex,cpu_amp}] [--evaluation_strategy {no,steps,epoch}]
[--push_to_hub_model_id PUSH_TO_HUB_MODEL_ID] [--push_to_hub_organization PUSH_TO_HUB_ORGANIZATION] [--push_to_hub_token PUSH_TO_HUB_TOKEN] [--mp_parameters MP_PARAMETERS]
[--auto_find_batch_size [AUTO_FIND_BATCH_SIZE]] [--full_determinism [FULL_DETERMINISM]] [--torchdynamo TORCHDYNAMO] [--ray_scope RAY_SCOPE] [--ddp_timeout DDP_TIMEOUT] [--torch_compile [TORCH_COMPILE]]
[--torch_compile_backend TORCH_COMPILE_BACKEND] [--torch_compile_mode TORCH_COMPILE_MODE] [--dispatch_batches DISPATCH_BATCHES] [--split_batches SPLIT_BATCHES]
[--include_tokens_per_second [INCLUDE_TOKENS_PER_SECOND]] [--include_num_input_tokens_seen [INCLUDE_NUM_INPUT_TOKENS_SEEN]] [--neftune_noise_alpha NEFTUNE_NOISE_ALPHA]
[--optim_target_modules OPTIM_TARGET_MODULES] [--batch_eval_metrics [BATCH_EVAL_METRICS]] [--sortish_sampler [SORTISH_SAMPLER]] [--predict_with_generate [PREDICT_WITH_GENERATE]]
[--generation_max_length GENERATION_MAX_LENGTH] [--generation_num_beams GENERATION_NUM_BEAMS] [--generation_config GENERATION_CONFIG] [--use_badam [USE_BADAM]] [--badam_mode {layer,ratio}]
[--badam_start_block BADAM_START_BLOCK] [--badam_switch_mode {ascending,descending,random,fixed}] [--badam_switch_interval BADAM_SWITCH_INTERVAL] [--badam_update_ratio BADAM_UPDATE_RATIO]
[--badam_mask_mode {adjacent,scatter}] [--badam_verbose BADAM_VERBOSE] [--use_galore [USE_GALORE]] [--galore_target GALORE_TARGET] [--galore_rank GALORE_RANK]
[--galore_update_interval GALORE_UPDATE_INTERVAL] [--galore_scale GALORE_SCALE] [--galore_proj_type {std,reverse_std,right,left,full}] [--galore_layerwise [GALORE_LAYERWISE]] [--pref_beta PREF_BETA]
[--pref_ftx PREF_FTX] [--pref_loss {sigmoid,hinge,ipo,kto_pair,orpo,simpo}] [--dpo_label_smoothing DPO_LABEL_SMOOTHING] [--kto_chosen_weight KTO_CHOSEN_WEIGHT]
[--kto_rejected_weight KTO_REJECTED_WEIGHT] [--simpo_gamma SIMPO_GAMMA] [--ppo_buffer_size PPO_BUFFER_SIZE] [--ppo_epochs PPO_EPOCHS] [--ppo_score_norm [PPO_SCORE_NORM]] [--ppo_target PPO_TARGET]
[--ppo_whiten_rewards [PPO_WHITEN_REWARDS]] [--ref_model REF_MODEL] [--ref_model_adapters REF_MODEL_ADAPTERS] [--ref_model_quantization_bit REF_MODEL_QUANTIZATION_BIT] [--reward_model REWARD_MODEL]
[--reward_model_adapters REWARD_MODEL_ADAPTERS] [--reward_model_quantization_bit REWARD_MODEL_QUANTIZATION_BIT] [--reward_model_type {lora,full,api}] [--additional_target ADDITIONAL_TARGET]
[--lora_alpha LORA_ALPHA] [--lora_dropout LORA_DROPOUT] [--lora_rank LORA_RANK] [--lora_target LORA_TARGET] [--loraplus_lr_ratio LORAPLUS_LR_RATIO] [--loraplus_lr_embedding LORAPLUS_LR_EMBEDDING]
[--use_rslora [USE_RSLORA]] [--use_dora [USE_DORA]] [--pissa_init [PISSA_INIT]] [--pissa_iter PISSA_ITER] [--pissa_convert [PISSA_CONVERT]] [--create_new_adapter [CREATE_NEW_ADAPTER]]
[--freeze_trainable_layers FREEZE_TRAINABLE_LAYERS] [--freeze_trainable_modules FREEZE_TRAINABLE_MODULES] [--freeze_extra_modules FREEZE_EXTRA_MODULES] [--pure_bf16 [PURE_BF16]]
[--stage {pt,sft,rm,ppo,dpo,kto}] [--finetuning_type {lora,freeze,full}] [--use_llama_pro [USE_LLAMA_PRO]] [--freeze_vision_tower [FREEZE_VISION_TOWER]] [--no_freeze_vision_tower]
[--train_mm_proj_only [TRAIN_MM_PROJ_ONLY]] [--plot_loss [PLOT_LOSS]] [--do_sample [DO_SAMPLE]] [--no_do_sample] [--temperature TEMPERATURE] [--top_p TOP_P] [--top_k TOP_K] [--num_beams NUM_BEAMS]
[--max_length MAX_LENGTH] [--max_new_tokens MAX_NEW_TOKENS] [--repetition_penalty REPETITION_PENALTY] [--length_penalty LENGTH_PENALTY] [--default_system DEFAULT_SYSTEM]
llamafactory-cli: error: the following arguments are required: --model_name_or_path, --output_dir
Expected behavior
我修改了docker-compose和docker的端口为19324,从docker ps看容器也启动成功了,防火墙也没啥问题,为什么无法访问webui呢?而且从docker inspect打印出来为什么port会是7861啊?是我哪里配置的不对么?
我的docker文件是:
FROM nvcr.io/nvidia/pytorch:24.01-py3
WORKDIR /app
COPY requirements.txt /app/
RUN pip install -r requirements.txt -i https://mirrors.cloud.tencent.com/pypi/simple
COPY . /app/
RUN pip install -e .[deepspeed,metrics,bitsandbytes,qwen,modelscope] -i https://mirrors.cloud.tencent.com/pypi/simple
VOLUME [ "/root/.cache/modelscope/", "/app/data", "/app/output" ]
EXPOSE 19324
CMD [ "llamafactory-cli", "webui" ]
docker compose文件是:
version: '3.8'
services:
llama-factory:
build:
dockerfile: Dockerfile
context: .
container_name: llama_factory
volumes:
- ./ms_cache:/root/.cache/modelscope/
Others
sunxiaoheng@nvidia-gpu-a800:
/digital_human_project/LLaMA-Factory-main$ docker ps/digital_human_project/LLaMA-Factory-main$ docker inspect 4417b4b2e765CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
4417b4b2e765 llama-factory-main-llama-factory "/opt/nvidia/nvidia_…" 2 hours ago Up 2 hours 6006/tcp, 8888/tcp, 19324/tcp, 0.0.0.0:7861->7861/tcp, :::7861->7861/tcp llama_factory
995d6b17ccaf mongo "docker-entrypoint.s…" 5 months ago Up 9 hours 127.0.0.1:27017->27017/tcp mongo
sunxiaoheng@nvidia-gpu-a800:
[
{
"Id": "4417b4b2e765e8bdedea46dd4383defdd9d0f31e2d84e1263e15bd5d1a2a7cda",
"Created": "2024-06-22T02:01:39.745128747Z",
"Path": "/opt/nvidia/nvidia_entrypoint.sh",
"Args": [
"llamafactory-cli",
"webui"
],
"State": {
"Status": "running",
"Running": true,
"Paused": false,
"Restarting": false,
"OOMKilled": false,
"Dead": false,
"Pid": 3360750,
"ExitCode": 0,
"Error": "",
"StartedAt": "2024-06-22T02:01:41.506798034Z",
"FinishedAt": "0001-01-01T00:00:00Z"
},
"Image": "sha256:77be222539c912397d19b48a411a1db5654d80f7a20ce85b2c45263aa25705e5",
"ResolvConfPath": "/var/lib/docker/containers/4417b4b2e765e8bdedea46dd4383defdd9d0f31e2d84e1263e15bd5d1a2a7cda/resolv.conf",
"HostnamePath": "/var/lib/docker/containers/4417b4b2e765e8bdedea46dd4383defdd9d0f31e2d84e1263e15bd5d1a2a7cda/hostname",
"HostsPath": "/var/lib/docker/containers/4417b4b2e765e8bdedea46dd4383defdd9d0f31e2d84e1263e15bd5d1a2a7cda/hosts",
"LogPath": "/var/lib/docker/containers/4417b4b2e765e8bdedea46dd4383defdd9d0f31e2d84e1263e15bd5d1a2a7cda/4417b4b2e765e8bdedea46dd4383defdd9d0f31e2d84e1263e15bd5d1a2a7cda-json.log",
"Name": "/llama_factory",
"RestartCount": 0,
"Driver": "overlay2",
"Platform": "linux",
"MountLabel": "",
"ProcessLabel": "",
"AppArmorProfile": "docker-default",
"ExecIDs": null,
"HostConfig": {
"Binds": [
"/home/sunxiaoheng/digital_human_project/LLaMA-Factory-main/data:/app/data:rw",
"/home/sunxiaoheng/digital_human_project/LLaMA-Factory-main/output:/app/output:rw",
"/home/sunxiaoheng/digital_human_project/LLaMA-Factory-main/ms_cache:/root/.cache/modelscope:rw"
],
"ContainerIDFile": "",
"LogConfig": {
"Type": "json-file",
"Config": {}
},
"NetworkMode": "llama-factory-main_default",
"PortBindings": {
"7861/tcp": [
{
"HostIp": "",
"HostPort": "7861"
}
]
},
"RestartPolicy": {
"Name": "unless-stopped",
"MaximumRetryCount": 0
},
"AutoRemove": false,
"VolumeDriver": "",
"VolumesFrom": null,
"ConsoleSize": [
0,
0
],
"CapAdd": null,
"CapDrop": null,
"CgroupnsMode": "private",
"Dns": null,
"DnsOptions": null,
"DnsSearch": null,
"ExtraHosts": [],
"GroupAdd": null,
"IpcMode": "host",
"Cgroup": "",
"Links": null,
"OomScoreAdj": 0,
"PidMode": "",
"Privileged": false,
"PublishAllPorts": false,
"ReadonlyRootfs": false,
"SecurityOpt": [
"label=disable"
],
"UTSMode": "",
"UsernsMode": "",
"ShmSize": 67108864,
"Runtime": "runc",
"Isolation": "",
"CpuShares": 0,
"Memory": 0,
"NanoCpus": 0,
"CgroupParent": "",
"BlkioWeight": 0,
"BlkioWeightDevice": null,
"BlkioDeviceReadBps": null,
"BlkioDeviceWriteBps": null,
"BlkioDeviceReadIOps": null,
"BlkioDeviceWriteIOps": null,
"CpuPeriod": 0,
"CpuQuota": 0,
"CpuRealtimePeriod": 0,
"CpuRealtimeRuntime": 0,
"CpusetCpus": "",
"CpusetMems": "",
"Devices": null,
"DeviceCgroupRules": null,
"DeviceRequests": [
{
"Driver": "nvidia",
"Count": -1,
"DeviceIDs": null,
"Capabilities": [
[
"gpu"
]
],
"Options": null
}
],
"MemoryReservation": 0,
"MemorySwap": 0,
"MemorySwappiness": null,
"OomKillDisable": null,
"PidsLimit": null,
"Ulimits": null,
"CpuCount": 0,
"CpuPercent": 0,
"IOMaximumIOps": 0,
"IOMaximumBandwidth": 0,
"MaskedPaths": [
"/proc/asound",
"/proc/acpi",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/proc/scsi",
"/sys/firmware"
],
"ReadonlyPaths": [
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
]
},
"GraphDriver": {
"Data": {
"LowerDir": "/var/lib/docker/overlay2/b0dfea4780dcca5abe587130c76c0263fa984a2d11d2a4a46b122519c71d5803-init/diff:/var/lib/docker/overlay2/zehlu4hcbncwa113pj3yrovak/diff:/var/lib/docker/overlay2/as7qs9eex5psf8rz2erujrayg/diff:/var/lib/docker/overlay2/k5c3vdirk8u7lma6okaipp4oi/diff:/var/lib/docker/overlay2/nn507wp5z3efa9z9mwgve2cgt/diff:/var/lib/docker/overlay2/vbntazpp7rhsxnc13349pvhbl/diff:/var/lib/docker/overlay2/721ad283531a956460c61efdfb8100a9340dafe30d86eba1e504a8ee33c4e272/diff:/var/lib/docker/overlay2/c4e96e814d28a732a0e320ce3d1438752eded9f4d5a29a3037766c9acea0a16d/diff:/var/lib/docker/overlay2/0e297213499ce908aca5293d96006994c7e695bc6eaf7945fa30fbfed6369abd/diff:/var/lib/docker/overlay2/965413bdf9d29d9f37b4e1af5b40d20260ee68134858b47f0c84a0adcdf2165f/diff:/var/lib/docker/overlay2/7bc5203be047816d99130cd8ec34dca992d75653549289bd3bee11696192ccf2/diff:/var/lib/docker/overlay2/e0881232ed9780ed1e692da703d0759f21b5ee3c1e60e957b0e97b1972bc7027/diff:/var/lib/docker/overlay2/69782997c72802d40b3e0baae5bca90fa9d2a9d03045ce2e0513a4c2bcb4629f/diff:/var/lib/docker/overlay2/2aca6e5e7ef667ba04ec072354263f8daa6f5bb32f2ee2def97702d0206cee14/diff:/var/lib/docker/overlay2/5bb5d7775306acf65d8fd4a43891b8d3ebe9690e7c674b380f2bc25c14d110fd/diff:/var/lib/docker/overlay2/45bdae7c11083b848a4cf0c30cbcb0fcf6738a7e2d90fd8430b84895d70d72f5/diff:/var/lib/docker/overlay2/c6cac8a3b7d7b20f25df25989062a4890b337f34f0b2061a656979cd54a220d6/diff:/var/lib/docker/overlay2/5716cb14ff47f83da8c404b27f179563b72c7b67addb8e700b6904a75461abc8/diff:/var/lib/docker/overlay2/77b4f38ca54a39d455506b89a3c0650f3782ad97ae44424dceb4975c9be2744c/diff:/var/lib/docker/overlay2/ff14288e620381ee2e12e3d8b048b987169bc62ca9d979f415e5b75b9106f475/diff:/var/lib/docker/overlay2/6835c4437c1ed13b62f432a7d783f0d594710109b730d35c3f94850e1fe00511/diff:/var/lib/docker/overlay2/cb3a3f05bbd186d9231539ede78fc11ac235aeedf96eb29eada6308fd09e847c/diff:/var/lib/docker/overlay2/d57f2f6785ad117dd6c7a53800dc930ac4122d4ba25e61819f677db79f499081/diff:/var/lib/docker/overlay2/22366d90a3e20c44b2ee30765ea4c4455b4769a4bc6f3a83a1d6c523e222d814/diff:/var/lib/docker/overlay2/f8b8a02af72571bed6ea23e134beccb3cbe01a29b376df511ed9e0ca28bfd5e1/diff:/var/lib/docker/overlay2/3513c8c40c67a4234677413d6155db4243445055447df7889492a9bc40f5ff3e/diff:/var/lib/docker/overlay2/2b4c50720affee5d9d386a0b3d32e2b49f8bfa3c235e47e8a5ceb00638c9f3f4/diff:/var/lib/docker/overlay2/3852f770bda9df24d2c137f84469b8480cc6ed5f43483a11ccdb014a2c77a67f/diff:/var/lib/docker/overlay2/99081bf40b36435f1bbf7d1f92459c48d0525cf37865491d2e351d6817f8f359/diff:/var/lib/docker/overlay2/5874feb22279ffb57e2c1893fb39acf204b39fe7abf99beeee3c6b9f98b416a9/diff:/var/lib/docker/overlay2/5594be341a666c36d2e66bd5bf2d651881907d49aef7e2a4a1359d7ce09a69fc/diff:/var/lib/docker/overlay2/abb5a198c29dec9a202af001de24d91fe90d0aa892f104d54e1fdd159d28e7b1/diff:/var/lib/docker/overlay2/ae44d0b2a506554dd28071c15868a83c2b5b7625b566fac1069edf2834503913/diff:/var/lib/docker/overlay2/b88c2cd318462dae7de7b65fad27084abf29614b636a015a89b496b74ac96cf5/diff:/var/lib/docker/overlay2/f2f60db2123697b65207378307d02848c5606b961c83d5097b457ea063430c81/diff:/var/lib/docker/overlay2/ccddc95a1b55c96c9249a324ed613b59e54a407ff5cac000be54e1ef6c3c19fa/diff:/var/lib/docker/overlay2/308e231dbf71841943234e939be550894048d19c6a2e83752c5333bd6ec54be2/diff:/var/lib/docker/overlay2/7426a14f566c9d84a8c52407a56c2c8a82f5346fa40705c58650d48c0929ec6c/diff:/var/lib/docker/overlay2/2b4f85f87bc617d6545e2a2a0a258d9d3f5800d69883943aaa713a76da975138/diff:/var/lib/docker/overlay2/5eed630345b4743e2015bcbb86d78a51d1aad19ba678b61f27803f2129589f77/diff:/var/lib/docker/overlay2/b5151859af6fd01563ab60e68820c6345ad247207bb2743588e68e41c701d1ab/diff:/var/lib/docker/overlay2/513e8a54806607d844052ba6ee0fcd96f7d6864252caf066998c8a5354fd480c/diff:/var/lib/docker/overlay2/0a959c79963b1ba7b54c080da34b462e414b22e0d574997c4c3aa47f10e2d455/diff:/var/lib/docker/overlay2/251b74e17768ac2441532d308c3c6ee3c1a14d593947d64419ea30068c2cc7e5/diff:/var/lib/docker/overlay2/0b8d6b82df1b37f64b247c9b426a5c78a677abbb35449193119a0620671335fe/diff:/var/lib/docker/overlay2/8bbd6e6f0c177dd67239592d3907b1c00f0106acf6d09c1c68b12650f6ac818e/diff:/var/lib/docker/overlay2/ac538a5c7af3b3fabd176ac9af32833753009b3c7dc4309bd5e134b456660406/diff:/var/lib/docker/overlay2/78a10e723e388ad4e6a7392f3c5b2e7e66ede702ca3003f3a1ed70792d3c6fa8/diff:/var/lib/docker/overlay2/2ab9064178d9da9132708ddd967b2bc01cd94df63d3d51f23c4b0d9efb767097/diff:/var/lib/docker/overlay2/9b29b8c30e24c28baaeacee364b307bbb65be0923d45abe68a1ed1e185c61ad9/diff:/var/lib/docker/overlay2/0859fda4f3016fac0cb84778014caa97c0ba7f542269659cc9ac76bb48d78dfc/diff:/var/lib/docker/overlay2/6377a0e96a9be4db08abbbdd47fcf273220c78c96075204bd35adeaf236644b0/diff:/var/lib/docker/overlay2/a00e7a8e7c83745cf0904596cef1615027eb9fbf2d89ee4d0e3f32f85e672fec/diff:/var/lib/docker/overlay2/46dc6a7b9be5a033638f9d6568886ed4f3d47a7bfaf74855456de8d721da2837/diff:/var/lib/docker/overlay2/cd113417a94b676977b8f567498a106dee9cbf02494a789497a1205ad51f3644/diff:/var/lib/docker/overlay2/d684beaa20966ddbe54ea9eece0531c0f9990fd35a1ca34adc5cb2cfb7552e8b/diff",
"MergedDir": "/var/lib/docker/overlay2/b0dfea4780dcca5abe587130c76c0263fa984a2d11d2a4a46b122519c71d5803/merged",
"UpperDir": "/var/lib/docker/overlay2/b0dfea4780dcca5abe587130c76c0263fa984a2d11d2a4a46b122519c71d5803/diff",
"WorkDir": "/var/lib/docker/overlay2/b0dfea4780dcca5abe587130c76c0263fa984a2d11d2a4a46b122519c71d5803/work"
},
"Name": "overlay2"
},
"Mounts": [
{
"Type": "bind",
"Source": "/home/sunxiaoheng/digital_human_project/LLaMA-Factory-main/ms_cache",
"Destination": "/root/.cache/modelscope",
"Mode": "rw",
"RW": true,
"Propagation": "rprivate"
},
{
"Type": "bind",
"Source": "/home/sunxiaoheng/digital_human_project/LLaMA-Factory-main/data",
"Destination": "/app/data",
"Mode": "rw",
"RW": true,
"Propagation": "rprivate"
},
{
"Type": "bind",
"Source": "/home/sunxiaoheng/digital_human_project/LLaMA-Factory-main/output",
"Destination": "/app/output",
"Mode": "rw",
"RW": true,
"Propagation": "rprivate"
}
],
"Config": {
"Hostname": "4417b4b2e765",
"Domainname": "",
"User": "",
"AttachStdin": false,
"AttachStdout": true,
"AttachStderr": true,
"ExposedPorts": {
"19324/tcp": {},
"6006/tcp": {},
"7861/tcp": {},
"8888/tcp": {}
},
"Tty": false,
"OpenStdin": false,
"StdinOnce": false,
"Env": [
"USE_MODELSCOPE_HUB=1",
"CUDA_VISIBLE_DEVICES=1",
"PATH=/usr/local/lib/python3.10/dist-packages/torch_tensorrt/bin:/usr/local/mpi/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/ucx/bin:/opt/tensorrt/bin",
"CUDA_VERSION=12.3.2.001",
"CUDA_DRIVER_VERSION=545.23.08",
"CUDA_CACHE_DISABLE=1",
"NVIDIA_REQUIRE_JETPACK_HOST_MOUNTS=",
"_CUDA_COMPAT_PATH=/usr/local/cuda/compat",
"ENV=/etc/shinit_v2",
"BASH_ENV=/etc/bash.bashrc",
"SHELL=/bin/bash",
"NVIDIA_REQUIRE_CUDA=cuda>=9.0",
"NCCL_VERSION=2.19.4",
"CUBLAS_VERSION=12.3.4.1",
"CUFFT_VERSION=11.0.12.1",
"CURAND_VERSION=10.3.4.107",
"CUSPARSE_VERSION=12.2.0.103",
"CUSOLVER_VERSION=11.5.4.101",
"CUTENSOR_VERSION=2.0.0.7",
"NPP_VERSION=12.2.3.2",
"NVJPEG_VERSION=12.3.0.81",
"CUDNN_VERSION=8.9.7.29+cuda12.2",
"TRT_VERSION=8.6.1.6+cuda12.0.1.011",
"TRTOSS_VERSION=23.11",
"NSIGHT_SYSTEMS_VERSION=2023.4.1.97",
"NSIGHT_COMPUTE_VERSION=2023.3.1.1",
"DALI_VERSION=1.33.0",
"DALI_BUILD=11414174",
"POLYGRAPHY_VERSION=0.49.1",
"TRANSFORMER_ENGINE_VERSION=1.2",
"LD_LIBRARY_PATH=/usr/local/lib/python3.10/dist-packages/torch/lib:/usr/local/lib/python3.10/dist-packages/torch_tensorrt/lib:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64",
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_DRIVER_CAPABILITIES=compute,utility,video",
"NVIDIA_PRODUCT_NAME=PyTorch",
"GDRCOPY_VERSION=2.3",
"HPCX_VERSION=2.16rc4",
"MOFED_VERSION=5.4-rdmacore39.0",
"OPENUCX_VERSION=1.15.0",
"OPENMPI_VERSION=4.1.5rc2",
"RDMACORE_VERSION=39.0",
"OPAL_PREFIX=/opt/hpcx/ompi",
"OMPI_MCA_coll_hcoll_enable=0",
"LIBRARY_PATH=/usr/local/cuda/lib64/stubs:",
"PYTORCH_BUILD_VERSION=2.2.0a0+81ea7a4",
"PYTORCH_VERSION=2.2.0a0+81ea7a4",
"PYTORCH_BUILD_NUMBER=0",
"NVIDIA_PYTORCH_VERSION=24.01",
"PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python",
"PYTHONIOENCODING=utf-8",
"LC_ALL=C.UTF-8",
"PIP_DEFAULT_TIMEOUT=100",
"NVM_DIR=/usr/local/nvm",
"JUPYTER_PORT=8888",
"TENSORBOARD_PORT=6006",
"UCC_CL_BASIC_TLS=^sharp",
"TORCH_CUDA_ARCH_LIST=5.2 6.0 6.1 7.0 7.2 7.5 8.0 8.6 8.7 9.0+PTX",
"PYTORCH_HOME=/opt/pytorch/pytorch",
"CUDA_HOME=/usr/local/cuda",
"TORCH_ALLOW_TF32_CUBLAS_OVERRIDE=1",
"USE_EXPERIMENTAL_CUDNN_V8_API=1",
"COCOAPI_VERSION=2.0+nv0.8.0",
"TORCH_CUDNN_V8_API_ENABLED=1",
"CUDA_MODULE_LOADING=LAZY",
"NVIDIA_BUILD_ID=80741402"
],
"Cmd": [
"llamafactory-cli",
"webui"
],
"Image": "llama-factory-main-llama-factory",
"Volumes": {
"/app/data": {},
"/app/output": {},
"/root/.cache/modelscope/": {}
},
"WorkingDir": "/app",
"Entrypoint": [
"/opt/nvidia/nvidia_entrypoint.sh"
],
"OnBuild": null,
"Labels": {
"com.docker.compose.config-hash": "ca03cc9c55d98f2ba3a1e3357888cb959f6ab91bc1d4396e50a6d5f9af83f555",
"com.docker.compose.container-number": "1",
"com.docker.compose.depends_on": "",
"com.docker.compose.image": "sha256:77be222539c912397d19b48a411a1db5654d80f7a20ce85b2c45263aa25705e5",
"com.docker.compose.oneoff": "False",
"com.docker.compose.project": "llama-factory-main",
"com.docker.compose.project.config_files": "/home/sunxiaoheng/digital_human_project/LLaMA-Factory-main/docker-compose.yml",
"com.docker.compose.project.working_dir": "/home/sunxiaoheng/digital_human_project/LLaMA-Factory-main",
"com.docker.compose.service": "llama-factory",
"com.docker.compose.version": "2.18.1",
"com.nvidia.build.id": "80741402",
"com.nvidia.build.ref": "3a8f39e58d71996b362a9358b971d42d695351fd",
"com.nvidia.cublas.version": "12.3.4.1",
"com.nvidia.cuda.version": "9.0",
"com.nvidia.cudnn.version": "8.9.7.29+cuda12.2",
"com.nvidia.cufft.version": "11.0.12.1",
"com.nvidia.curand.version": "10.3.4.107",
"com.nvidia.cusolver.version": "11.5.4.101",
"com.nvidia.cusparse.version": "12.2.0.103",
"com.nvidia.cutensor.version": "2.0.0.7",
"com.nvidia.nccl.version": "2.19.4",
"com.nvidia.npp.version": "12.2.3.2",
"com.nvidia.nsightcompute.version": "2023.3.1.1",
"com.nvidia.nsightsystems.version": "2023.4.1.97",
"com.nvidia.nvjpeg.version": "12.3.0.81",
"com.nvidia.pytorch.version": "2.2.0a0+81ea7a4",
"com.nvidia.tensorrt.version": "8.6.1.6+cuda12.0.1.011",
"com.nvidia.tensorrtoss.version": "23.11",
"com.nvidia.volumes.needed": "nvidia_driver",
"org.opencontainers.image.ref.name": "ubuntu",
"org.opencontainers.image.version": "22.04"
}
},
"NetworkSettings": {
"Bridge": "",
"SandboxID": "662db6fa5b03bdaa8120513e69748d69c4a0b2e0e3aea8b88bcdc4f6660bf055",
"HairpinMode": false,
"LinkLocalIPv6Address": "",
"LinkLocalIPv6PrefixLen": 0,
"Ports": {
"19324/tcp": null,
"6006/tcp": null,
"7861/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "7861"
},
{
"HostIp": "::",
"HostPort": "7861"
}
],
"8888/tcp": null
},
"SandboxKey": "/var/run/docker/netns/662db6fa5b03",
"SecondaryIPAddresses": null,
"SecondaryIPv6Addresses": null,
"EndpointID": "",
"Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"IPAddress": "",
"IPPrefixLen": 0,
"IPv6Gateway": "",
"MacAddress": "",
"Networks": {
"llama-factory-main_default": {
"IPAMConfig": null,
"Links": null,
"Aliases": [
"llama_factory",
"llama-factory",
"4417b4b2e765"
],
"NetworkID": "0c12cb6142f23159a1acdf4c42e8ab2d75e3854b979350a3735b85371b0ec48d",
"EndpointID": "bc991f175bdb41cca1f3d1ce29fbeaa30b76f72994291ebb1e90f486a7f1ebb6",
"Gateway": "172.19.0.1",
"IPAddress": "172.19.0.2",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "02:42:ac:13:00:02",
"DriverOpts": null
}
}
}
}
]
The text was updated successfully, but these errors were encountered: