File size: 4,033 Bytes
9af6a14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import argparse

# Virtual display
from pyvirtualdisplay import Display
virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

# Import package
import gym
from huggingface_sb3 import load_from_hub, package_to_hub, push_to_hub
from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv

def str2bool(v):
    if isinstance(v, bool):
        return v
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

parser = argparse.ArgumentParser()
parser.add_argument('--model_name', dest='model_name',
                        default="ppo-LunarLander-v2", type=str, help='model name') 
parser.add_argument('--total_timesteps', dest='total_timesteps',
                        default=1000000, type=int, help='total timesteps') 
parser.add_argument('--n_envs', dest='n_envs',
                        default=16, type=int, help='n_envs') 
parser.add_argument('--repo_id', dest='repo_id',
                        default="thien1892/LunarLander-v2-ppo", type=str, help='repo_id') 
parser.add_argument('--commit_message', dest='commit_message',
                        default="Upload PPO LunarLander-v2 trained agent", type=str, help='commit_message')
parser.add_argument('--re_train', dest='re_train',
                        default = True, type= str2bool, help='commit_message')
parser.add_argument('--id_retrain', dest='id_retrain',
                        default="thien1892/LunarLander-v2-ppo-5m", type=str, help='id_retrain')
parser.add_argument('--filename_retrain', dest='filename_retrain',
                        default="ppo-LunarLander-v2-5m.zip", type=str, help='filename_retrain')
parser.add_argument('--learning_rate', dest='learning_rate',
                        default=1e-4, type=float, help='learning_rate') 
args = parser.parse_args()

if __name__ == '__main__':
    # Create the environment
    env = make_vec_env('LunarLander-v2', n_envs= args.n_envs)

    # Model
    if not args.re_train :
        model = PPO(
            policy = 'MlpPolicy',
            env = env,
            n_steps = 1024,
            batch_size = 64,
            n_epochs = 4,
            gamma = 0.999,
            gae_lambda = 0.98,
            ent_coef = 0.01,
            learning_rate = args.learning_rate,
            verbose=1)
    else:
        checkpoint = load_from_hub(args.id_retrain, args.filename_retrain)
        model = PPO.load(checkpoint, reset_num_timesteps=True, print_system_info=True, env = env, learning_rate = args.learning_rate)

    # Train
    model.learn(total_timesteps = args.total_timesteps)

    # Save the model
    model.save(args.model_name)

    # Evaluate model
    eval_env = gym.make("LunarLander-v2") # create new environment
    mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
    print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

    # Push to HF hub
    env_id = "LunarLander-v2"
    eval_env = DummyVecEnv([lambda: gym.make(env_id)])
    model_architecture = "PPO"

    package_to_hub(model = model, # Our trained model
                  model_name = args.model_name, # The name of our trained model 
                  model_architecture = model_architecture, # The model architecture we used: in our case PPO
                  env_id = env_id, # Name of the environment
                  eval_env = eval_env, # Evaluation Environment
                  repo_id = args.repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
                  commit_message = args.commit_message)