diff --git a/README.md b/README.md index d458d5b..2e8d614 100644 --- a/README.md +++ b/README.md @@ -178,7 +178,7 @@ print(torch.cuda.is_available()) **3.1 预训练(学图像描述)** ```bash -python train_pretrain_vlm.py --epochs 4 +bash scripts/pretrain_vlm.sh ``` > 执行预训练,得到 `pretrain_vlm_*.pth` 作为预训练的输出权重(其中*为模型的dimension,默认为512) @@ -187,7 +187,7 @@ python train_pretrain_vlm.py --epochs 4 **3.2 监督微调(学看图对话方式)** ```bash -python train_sft_vlm.py --epochs 4 +bash scripts/sft_vlm.sh ``` > 执行监督微调,得到 `sft_vlm_*.pth` 作为指令微调的输出权重 diff --git a/README_en.md b/README_en.md index 8c16e68..e3dbd53 100644 --- a/README_en.md +++ b/README_en.md @@ -189,7 +189,7 @@ skipping the pretrain training step and proceed directly to SFT training. **3.1 Pretraining (Learning image description)** ```bash -python train_pretrain_vlm.py --epochs 4 +bash scripts/pretrain_vlm.sh ``` > Run pretraining to get `pretrain_vlm_*.pth` as the pretrained model's output weights (* represents the model @@ -198,7 +198,7 @@ python train_pretrain_vlm.py --epochs 4 **3.2 Supervised Fine-Tuning (Learning image-caption dialogue style)** ```bash -python train_sft_vlm.py --epochs 4 +bash scripts/sft_vlm.sh ``` > Perform supervised fine-tuning to get `sft_vlm_*.pth` as the output weights for the fine-tuned model. diff --git a/scripts/pretrain_vlm.sh b/scripts/pretrain_vlm.sh new file mode 100644 index 0000000..67e5a4f --- /dev/null +++ b/scripts/pretrain_vlm.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# 设置运行脚本的参数 +python train_pretrain_vlm.py \ + --out_dir="out" \ + --epochs=4 \ + --batch_size=16 \ + --learning_rate=0.0004 \ + --device="cuda:0" \ + --dtype="bfloat16" \ + # --use_wandb \ # 如果使用wandb,请取消注释 + --wandb_project="MiniMind-V" \ + --num_workers=8 \ + --data_path="./dataset/pretrain_data.jsonl" \ + --images_path="./dataset/pretrain_images" \ + # --ddp \ # 如果使用DDP分布式训练,请取消注释 + --accumulation_steps=1 \ + --grad_clip=1.0 \ + --warmup_iters=0 \ + --log_interval=100 \ + --save_interval=100 \ + --local_rank=-1 \ + # 模型参数 + --dim=512 \ + --n_layers=8 \ + --max_seq_len=640 \ + --use_moe=False diff --git a/scripts/sft_vlm.sh b/scripts/sft_vlm.sh new file mode 100644 index 0000000..f7d3b31 --- /dev/null +++ b/scripts/sft_vlm.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# 设置运行脚本的参数 +python train_sft_vlm.py \ + --out_dir="out" \ + --epochs=6 \ + --batch_size=8 \ + --learning_rate=0.000001 \ + --device="cuda:0" \ + --dtype="bfloat16" \ + # --use_wandb \ # 如果使用wandb,请取消注释 + --wandb_project="MiniMind-V" \ + --num_workers=8 \ + --data_path="./dataset/sft_data.jsonl" \ + --images_path="./dataset/sft_images" \ + # --ddp \ # 如果使用DDP分布式训练,请取消注释 + --accumulation_steps=1 \ + --grad_clip=1.0 \ + --warmup_iters=0 \ + --log_interval=10 \ + --save_interval=10 \ + --local_rank=-1 \ + # 模型参数 + --dim=512 \ + --n_layers=8 \ + --max_seq_len=1536 \ + --use_moe=False diff --git a/train_pretrain_vlm.py b/train_pretrain_vlm.py index 0bc7766..220940c 100644 --- a/train_pretrain_vlm.py +++ b/train_pretrain_vlm.py @@ -104,8 +104,8 @@ def init_model(model_config: VLMConfig): # 加载纯语言模型权重 ckp = f'./out/lm_{model_config.dim}{moe_path}.pth' model = MiniMindVLM(model_config) - state_dict = torch.load(ckp, map_location=args.device) - model.load_state_dict(state_dict, strict=False) + # state_dict = torch.load(ckp, map_location=args.device) + # model.load_state_dict(state_dict, strict=False) # 冻结除 vision_proj 外的所有参数 for name, param in model.named_parameters(): @@ -147,7 +147,7 @@ def init_distributed_mode(): parser.add_argument("--use_wandb", default=False, action="store_true") parser.add_argument("--wandb_project", type=str, default="MiniMind-V") parser.add_argument("--num_workers", type=int, default=8) - parser.add_argument("--data_path", type=str, default="./dataset/pretrain_vlm_data.jsonl") + parser.add_argument("--data_path", type=str, default="./dataset/pretrain_data.jsonl") parser.add_argument("--images_path", type=str, default="./dataset/pretrain_images") parser.add_argument("--ddp", action="store_true") parser.add_argument("--accumulation_steps", type=int, default=1) diff --git a/train_sft_vlm.py b/train_sft_vlm.py index ea33c1a..f847300 100644 --- a/train_sft_vlm.py +++ b/train_sft_vlm.py @@ -137,7 +137,7 @@ def init_distributed_mode(): parser.add_argument("--use_wandb", default=False, action="store_true") parser.add_argument("--wandb_project", type=str, default="MiniMind-V") parser.add_argument("--num_workers", type=int, default=8) - parser.add_argument("--data_path", type=str, default="./dataset/sft_vlm_data.jsonl") + parser.add_argument("--data_path", type=str, default="./dataset/sft_data.jsonl") parser.add_argument("--images_path", type=str, default="./dataset/sft_images") parser.add_argument("--ddp", action="store_true") parser.add_argument("--accumulation_steps", type=int, default=1)