Training
In this step, we train PPO algorithm with help of Stable Baselines3 (SB3) library. Basically, it is reproduction of the SB3 example.
In your project directory create a file
train.py
.Import required packages as follows:
import os from functools import partial # Import stable-baselines3 stuff from stable_baselines3 import PPO from stable_baselines3.common.callbacks import CheckpointCallback from stable_baselines3.common.env_util import make_vec_env # Import gym environment from apple_seeker_env import AppleSeekerEnv
Define Godot app address:
ADDRESS = "127.0.0.1" PORT = 9090
Define directory to store logs and logname:
LOG_DIR = "./logs/apple_seeker" LOG_NAME = "PPO" SUFFIX = "1"
Define training parameters:
TOTAL_TIMESTEPS = 150000 N_STEPS = 1000 CHECKPOINT_FREQUENCY = 1000 LR = 1e-3
Define training function:
def train(): # Instantiate gym environment env_fn = partial( AppleSeekerEnv, engine_address=(ADDRESS, PORT) ) env = make_vec_env(env_fn, n_envs=1, seed=0) # Instatiate agent model = PPO( "MultiInputPolicy", n_steps=N_STEPS, env=env, use_sde=False, learning_rate=LR, verbose=1, device="cpu", seed=0, tensorboard_log=LOG_DIR, ) # Define agent training model.learn( callback=CheckpointCallback( save_freq = CHECKPOINT_FREQUENCY, save_path = os.path.join(LOG_DIR, LOG_NAME + "_" + SUFFIX, "checkpoints"), ), total_timesteps=TOTAL_TIMESTEPS, tb_log_name=LOG_NAME, progress_bar=True, ) # Save final model model.save(os.path.join(LOG_DIR, LOG_NAME + SUFFIX, "checkpoints", "last.zip"))
Define module behaviour on launch:
if __name__ == "__main__": train()
Complete code should look as follows:
import os
from functools import partial
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.env_util import make_vec_env
from apple_seeker_env import AppleSeekerEnv
ADDRESS = "127.0.0.1"
PORT = 9090
LOG_DIR = "./logs/apple_seeker"
LOG_NAME = "PPO"
SUFFIX = "1"
TOTAL_TIMESTEPS = 150000
N_STEPS = 1000
CHECKPOINT_FREQUENCY = 1000
LR = 1e-3
def train():
env_fn = partial(
AppleSeekerEnv,
engine_address=(ADDRESS, PORT)
)
env = make_vec_env(env_fn, n_envs=1, seed=0)
model = PPO(
"MultiInputPolicy",
n_steps=N_STEPS,
env=env,
use_sde=False,
learning_rate=LR,
verbose=1,
device="cpu",
seed=0,
tensorboard_log=LOG_DIR,
)
model.learn(
callback=CheckpointCallback(
save_freq = CHECKPOINT_FREQUENCY,
save_path = os.path.join(LOG_DIR, LOG_NAME + "_" + SUFFIX, "checkpoints"),
),
total_timesteps=TOTAL_TIMESTEPS,
tb_log_name=LOG_NAME,
progress_bar=True,
)
model.save(os.path.join(LOG_DIR, LOG_NAME + SUFFIX, "checkpoints", "last.zip"))
if __name__ == "__main__":
train()
Run Godot app from Godot Editor.
Note
You also can compile Godot app and run executable.
Run training process as follows:
python3 train.py