memorax.algorithms.PQN

memorax.algorithms.PQN#

class memorax.algorithms.PQN[source]#

Bases: object

PQN(cfg: memorax.algorithms.pqn.PQNConfig, env: gymnax.environments.environment.Environment, env_params: gymnax.environments.environment.EnvParams, q_network: flax.linen.module.Module, optimizer: optax._src.base.GradientTransformation, epsilon_schedule: collections.abc.Callable[[typing.Union[jax.jaxlib._jax.Array, numpy.ndarray, numpy.bool, numpy.number, float, int]], typing.Union[jax.jaxlib._jax.Array, numpy.ndarray, numpy.bool, numpy.number, float, int]])

cfg: PQNConfig#

env: Environment#

env_params: EnvParams#

q_network: Module#

optimizer: GradientTransformation#

epsilon_schedule: Callable[[Array | ndarray | bool | number | float | int], Array | ndarray | bool | number | float | int]#

init(key)[source]#

Return type:: tuple[Array, PQNState, Array, EnvState]

warmup(key, state, num_steps)[source]#

Return type:

tuple[Array, PQNState]

Parameters:

key (Array)
state (PQNState)
num_steps (int)

train(key, state, num_steps)[source]#

Return type:

tuple[Array, PQNState, dict]

Parameters:

key (Array)
state (PQNState)
num_steps (int)

evaluate(key, state, num_steps)[source]#

Return type:

tuple[Array, dict]

Parameters:

key (Array)
state (PQNState)
num_steps (int)

__init__(cfg, env, env_params, q_network, optimizer, epsilon_schedule)#

Parameters:

cfg (PQNConfig)
env (Environment)
env_params (EnvParams)
q_network (Module)
optimizer (GradientTransformation)
epsilon_schedule (Callable[[Array | ndarray | bool | number | float | int], Array | ndarray | bool | number | float | int])

Return type:

None

replace(**updates)#: Returns a new object replacing the specified fields with new values.

memorax.algorithms.PQN

Contents

memorax.algorithms.PQN#