memorax.algorithms.PPOConfig

memorax.algorithms.PPOConfig#

class memorax.algorithms.PPOConfig[source]#

Bases: object

PPOConfig(name: str, num_envs: int, num_eval_envs: int, num_steps: int, gamma: float, gae_lambda: float, num_minibatches: int, update_epochs: int, normalize_advantage: bool, clip_coef: float, clip_vloss: bool, ent_coef: float, vf_coef: float, target_kl: Optional[float] = None, burn_in_length: int = 0)

name: str#

num_envs: int#

num_eval_envs: int#

num_steps: int#

gamma: float#

gae_lambda: float#

num_minibatches: int#

update_epochs: int#

normalize_advantage: bool#

clip_coef: float#

clip_vloss: bool#

ent_coef: float#

vf_coef: float#

target_kl: float | None = None#

burn_in_length: int = 0#

property batch_size#

__init__(name, num_envs, num_eval_envs, num_steps, gamma, gae_lambda, num_minibatches, update_epochs, normalize_advantage, clip_coef, clip_vloss, ent_coef, vf_coef, target_kl=None, burn_in_length=0)#

Parameters:

name (str)
num_envs (int)
num_eval_envs (int)
num_steps (int)
gamma (float)
gae_lambda (float)
num_minibatches (int)
update_epochs (int)
normalize_advantage (bool)
clip_coef (float)
clip_vloss (bool)
ent_coef (float)
vf_coef (float)
target_kl (float | None)
burn_in_length (int)

Return type:

None

replace(**updates)#: Returns a new object replacing the specified fields with new values.

memorax.algorithms.PPOConfig

Contents

memorax.algorithms.PPOConfig#