160 lines
4.8 KiB
Python
160 lines
4.8 KiB
Python
|
import args
|
||
|
import os
|
||
|
import pathlib
|
||
|
import torch.nn as nn
|
||
|
|
||
|
from typing import Callable
|
||
|
|
||
|
from stable_baselines3 import PPO
|
||
|
from stable_baselines3.common.callbacks import CheckpointCallback
|
||
|
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
|
||
|
|
||
|
from godot_rl.core.utils import can_import
|
||
|
from godot_rl.wrappers.onnx.stable_baselines_export import export_ppo_model_as_onnx
|
||
|
from godot_rl.wrappers.stable_baselines_wrapper import StableBaselinesGodotEnv
|
||
|
|
||
|
if can_import("ray"):
|
||
|
print("WARNING: SB3 and ray[rllib] are not compatible.")
|
||
|
|
||
|
args, extras = args.parse_args()
|
||
|
|
||
|
def handle_onnx_export():
|
||
|
'''
|
||
|
Enforces the onnx and zip extentions when saving models.
|
||
|
This avoids potential conflicts in case of identical names and extentions
|
||
|
'''
|
||
|
if args.onnx_export_path is not None:
|
||
|
path_onnx = pathlib.Path(args.onnx_export_path).with_suffix(".onnx")
|
||
|
print(f"Exporting onnx to: {os.path.abspath(path_onnx)}")
|
||
|
export_ppo_model_as_onnx(model, str(path_onnx))
|
||
|
|
||
|
def handle_model_save():
|
||
|
if args.save_model_path is not None:
|
||
|
zip_save_path = pathlib.Path(Args.save_model_path).with_suffix(".zip")
|
||
|
print(f"Saving model to: {os.path.abspath(zip_save_path)}")
|
||
|
model.save(zip_save_path)
|
||
|
|
||
|
def close_env():
|
||
|
try:
|
||
|
print("Closing env...")
|
||
|
env.close()
|
||
|
except Exception as e:
|
||
|
print(f"Exception while closing env: {e}")
|
||
|
|
||
|
path_checkpoint = os.path.join(args.exper_dir, f"{args.exper_name}_checkpoints")
|
||
|
abs_path_checkpoint = os.path.abspath(path_checkpoint)
|
||
|
|
||
|
if args.save_checkpoint_frequency is not None and os.path.isdir(path_checkpoint):
|
||
|
raise RuntimeError(
|
||
|
f"{abs_path_checkpoint} already exists."
|
||
|
"Use a different directory or different name."
|
||
|
"If you want to override previous checkpoints you have to delete them manually."
|
||
|
)
|
||
|
|
||
|
if args.inference and args.resume_model_path is None:
|
||
|
raise parser.error(
|
||
|
"Using --inference requires --resume_model_path to be set."
|
||
|
)
|
||
|
|
||
|
if args.env_path is None and args.viz:
|
||
|
print("Info: using --viz without --env_path set has no effect.")
|
||
|
print("\nIn editor training will always render.")
|
||
|
|
||
|
env = StableBaselinesGodotEnv(
|
||
|
env_path=args.env_path,
|
||
|
show_window=args.viz,
|
||
|
seed=args.seed,
|
||
|
n_parallel=args.n_parallel,
|
||
|
speedup=args.speedup
|
||
|
)
|
||
|
env = VecMonitor(env)
|
||
|
|
||
|
# LR schedule code snippet from:
|
||
|
# https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#learning-rate-schedule
|
||
|
def linear_schedule(initial_value: float) -> Callable[[float], float]:
|
||
|
"""
|
||
|
Linear learning rate schedule.
|
||
|
|
||
|
:param initial_value: Initial learning rate.
|
||
|
:return: schedule that computes
|
||
|
current learning rate depending on remaining progress
|
||
|
"""
|
||
|
|
||
|
def func(progress_remaining: float) -> float:
|
||
|
"""
|
||
|
Progress will decrease from 1 (beginning) to 0.
|
||
|
|
||
|
:param progress_remaining:
|
||
|
:return: current learning rate
|
||
|
"""
|
||
|
return progress_remaining * initial_value
|
||
|
|
||
|
return func
|
||
|
|
||
|
policy_kwargs = dict(
|
||
|
activation_fn=nn.Tanh(),
|
||
|
net_atch = dict(
|
||
|
pi=[1024, 1024, 1024, 1024],
|
||
|
vf = [4096, 4096, 4096, 4096, 4096, 4096]
|
||
|
)
|
||
|
)
|
||
|
|
||
|
optimizer_kwargs = dict(
|
||
|
betas=(0.9, 0.9),
|
||
|
eps=1e-5
|
||
|
)
|
||
|
|
||
|
if args.resume_model_path is None:
|
||
|
if not args.linear_lr_schedule:
|
||
|
learning_rate = 0.0003
|
||
|
else:
|
||
|
linear_schedule(0.0003)
|
||
|
|
||
|
model: PPO = PPO(
|
||
|
"MultiInputPolicy",
|
||
|
env,
|
||
|
batch_size=64,
|
||
|
ent_coef=0.01,
|
||
|
verbose=2,
|
||
|
n_steps=256,
|
||
|
tensorboard_log=args.exper_dir,
|
||
|
learning_rate=learning_rate,
|
||
|
)
|
||
|
else:
|
||
|
path_zip = pathlib.Path(args.resume_model_path)
|
||
|
print(f"Loading model: {os.path.abspath(pathzip)}")
|
||
|
model: PPO = PPO.load(
|
||
|
path_zip,
|
||
|
env=env,
|
||
|
tensorboard_log=args.exper_dir
|
||
|
)
|
||
|
|
||
|
if args.inference:
|
||
|
obs = env.reset()
|
||
|
for i in range(args.timesteps):
|
||
|
action, _state = model.predict(obs, deterministic=True)
|
||
|
obs, reward, done, info = env.step(action)
|
||
|
else:
|
||
|
learn_arguments = dict(total_timesteps=args.timesteps, tb_log_name=args.exper_name)
|
||
|
if args.save_checkpoint_frequency:
|
||
|
print("Checkpoint saving enabled.")
|
||
|
print(f"\nCheckpoints will be saved to {abs_path_checkpoint}")
|
||
|
checkpoint_callback = CheckpointCallback(
|
||
|
save_freq=(args.save_checkpoint_frequency // env.num_envs),
|
||
|
save_path=path_checkpoint,
|
||
|
name_prefix=args.exper_name
|
||
|
)
|
||
|
learn_arguments["callback"] = checkpoint_callback
|
||
|
try:
|
||
|
model.learn(**learn_arguments)
|
||
|
except KeyboardInterrupt:
|
||
|
print(
|
||
|
"""
|
||
|
Training interrupted by user. Will save if --save_model_path was set and/or export if --onnx_export was set.
|
||
|
"""
|
||
|
)
|
||
|
|
||
|
close_env()
|
||
|
handle_onnx_export()
|
||
|
handle_model_save()
|