lucidmorto commited on
Commit
80915e3
1 Parent(s): 3f7af4c

feat: Upgrade to t5-large model and adjust training params

Browse files

Upgraded model from t5-base to t5-large for improved performance. Adjusted training parameters: increased epochs, reduced batch size due to larger model, increased warmup steps and gradient accumulation, and slightly lowered learning rate to enhance training stability. Adjusted evaluation and checkpoint saving frequency to align with updated model and training settings.

Files changed (2) hide show
  1. app.py +1 -1
  2. humanizer.py +11 -11
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
- model_name = "t5-base"
5
  tokenizer = AutoTokenizer.from_pretrained(model_name)
6
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
7
 
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
+ model_name = "t5-large"
5
  tokenizer = AutoTokenizer.from_pretrained(model_name)
6
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
7
 
humanizer.py CHANGED
@@ -40,7 +40,7 @@ processed_dataset = {split: data.map(prepare_data) for split, data in dataset.it
40
  logger.info("Dataset prepared.")
41
 
42
  # Tokenize the dataset
43
- model_name = "t5-base"
44
  tokenizer = AutoTokenizer.from_pretrained(model_name)
45
 
46
  def tokenize_function(examples):
@@ -63,25 +63,25 @@ model = T5ForConditionalGeneration.from_pretrained(model_name)
63
 
64
  training_args = Seq2SeqTrainingArguments(
65
  output_dir="./results",
66
- num_train_epochs=3, # Increase number of epochs
67
- per_device_train_batch_size=32, # Increase batch size if memory allows
68
- per_device_eval_batch_size=32,
69
- warmup_steps=500,
70
  weight_decay=0.01,
71
  logging_dir="./logs",
72
  logging_steps=100,
73
  evaluation_strategy="steps",
74
- eval_steps=1000,
75
- save_steps=1000,
76
- use_cpu=False, # Use GPU if available
77
  load_best_model_at_end=True,
78
  metric_for_best_model="eval_loss",
79
  greater_is_better=False,
80
- fp16=True, # Enable mixed precision training if GPU supports it
81
- gradient_accumulation_steps=2, # Accumulate gradients to simulate larger batch sizes
82
  )
83
 
84
- optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
85
  scheduler = get_linear_schedule_with_warmup(
86
  optimizer,
87
  num_warmup_steps=500,
 
40
  logger.info("Dataset prepared.")
41
 
42
  # Tokenize the dataset
43
+ model_name = "t5-large"
44
  tokenizer = AutoTokenizer.from_pretrained(model_name)
45
 
46
  def tokenize_function(examples):
 
63
 
64
  training_args = Seq2SeqTrainingArguments(
65
  output_dir="./results",
66
+ num_train_epochs=5, # Increased epochs
67
+ per_device_train_batch_size=16, # Reduced batch size due to larger model
68
+ per_device_eval_batch_size=16,
69
+ warmup_steps=1000, # Increased warmup steps
70
  weight_decay=0.01,
71
  logging_dir="./logs",
72
  logging_steps=100,
73
  evaluation_strategy="steps",
74
+ eval_steps=500,
75
+ save_steps=500,
76
+ use_cpu=False,
77
  load_best_model_at_end=True,
78
  metric_for_best_model="eval_loss",
79
  greater_is_better=False,
80
+ fp16=True,
81
+ gradient_accumulation_steps=4, # Increased to simulate larger batch sizes
82
  )
83
 
84
+ optimizer = torch.optim.AdamW(model.parameters(), lr=3e-5) # Slightly lower learning rate
85
  scheduler = get_linear_schedule_with_warmup(
86
  optimizer,
87
  num_warmup_steps=500,