alvarobartt
/

zephyr-gemma-dpo

Text Generation

Generated from Trainer

text-generation-inference

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

zephyr-gemma-dpo / train-config.yaml

alvarobartt's picture

alvarobartt HF staff

Upload train-config.yaml with huggingface_hub

52c9302 verified 7 months ago

history blame contribute delete

No virus

1.18 kB

	dataset_args:
	path: argilla/dpo-mix-7k

	format_args:
	prompt_format: zephyr-gemma

	model_args:
	pretrained_model_name_or_path: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
	torch_dtype: bfloat16

	wandb_args:
	entity: argilla-io
	project: zephyr-gemma-dpo
	name: 16bit

	training_args:
	# DPOTrainer
	beta: 0.05
	loss_type: sigmoid
	max_length: 1024
	max_prompt_length: 512
	# Trainer (train)
	bf16: true
	do_train: true
	gradient_accumulation_steps: 8
	gradient_checkpointing: true
	gradient_checkpointing_kwargs:
	use_reentrant: false
	learning_rate: 5.0e-7
	logging_steps: 10
	lr_scheduler_type: cosine
	num_train_epochs: 2
	optim: adamw_torch
	output_dir: data/gemma-7b-it-dpo
	per_device_train_batch_size: 2
	seed: 42
	warmup_ratio: 0.1
	warmup_steps: 100
	report_to:
	- wandb
	- tensorboard
	# Trainer (eval)
	do_eval: true
	evaluation_strategy: steps
	eval_steps: 100
	per_device_eval_batch_size: 4
	# Trainer (save)
	hub_model_id: alvarobartt/zephyr-gemma-dpo
	hub_private_repo: true
	push_to_hub: true
	save_strategy: "no" # Quoted, otherwise is casted to `False`
	save_total_limit: null

	use_accelerate: true
	use_unsloth: false