burtenshaw HF staff commited on
Commit
4d36342
1 Parent(s): c3269c2

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. main.py +2 -2
main.py CHANGED
@@ -64,9 +64,9 @@ class ORPO(object):
64
  test = self.data[test_split].filter(self.filter_dataset)
65
  self.test = test.map(self.preprocess_dataset, batched=True, num_proc=self.args.num_proc, remove_columns=self.data[test_split].column_names)
66
 
67
- train = self.data[train_split].filter(self.filter_dataset)[self.args.max_samples]
68
  print(f"\n\n>>> {len(train)} / {len(self.data[train_split])} rows left after filtering by prompt length.")
69
- self.train = train.map(self.preprocess_dataset, batched=True, num_proc=self.args.num_proc, remove_columns=self.data[:train_split].column_names)
70
 
71
  # Set WANDB & Logging Configurations
72
  self.run_name = f"{self.args.model_name.split('/')[-1]}-{self.args.data_name.split('/')[-1]}-lambda{self.args.alpha}-ORPO-{self.start.tm_mday}-{self.start.tm_hour}-{self.start.tm_min}"
 
64
  test = self.data[test_split].filter(self.filter_dataset)
65
  self.test = test.map(self.preprocess_dataset, batched=True, num_proc=self.args.num_proc, remove_columns=self.data[test_split].column_names)
66
 
67
+ train = self.data[train_split].filter(self.filter_dataset)[:self.args.max_samples]
68
  print(f"\n\n>>> {len(train)} / {len(self.data[train_split])} rows left after filtering by prompt length.")
69
+ self.train = train.map(self.preprocess_dataset, batched=True, num_proc=self.args.num_proc, remove_columns=self.data[train_split].column_names)
70
 
71
  # Set WANDB & Logging Configurations
72
  self.run_name = f"{self.args.model_name.split('/')[-1]}-{self.args.data_name.split('/')[-1]}-lambda{self.args.alpha}-ORPO-{self.start.tm_mday}-{self.start.tm_hour}-{self.start.tm_min}"