Module bbrl.agents.gymb

Expand source code
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#

import numpy as np
import torch

import gym
from bbrl.agents.agent import Agent


def _convert_action(action):
    if len(action.size()) == 0:
        action = action.item()
        assert isinstance(action, int)
    else:
        action = np.array(action.tolist())
    return action


def _format_frame(frame):
    if isinstance(frame, dict):
        r = {}
        for k in frame:
            r[k] = _format_frame(frame[k])
        return r
    elif isinstance(frame, list):
        t = torch.tensor(frame).unsqueeze(0)
        if t.dtype == torch.float64 or t.dtype == torch.float32:
            t = t.float()
        else:
            t = t.long()
        return t
    elif isinstance(frame, np.ndarray):
        t = torch.from_numpy(frame).unsqueeze(0)
        if t.dtype == torch.float64 or t.dtype == torch.float32:
            t = t.float()
        else:
            t = t.long()
        return t
    elif isinstance(frame, torch.Tensor):
        return frame.unsqueeze(0)  # .float()
    elif isinstance(frame, bool):
        return torch.tensor([frame]).bool()
    elif isinstance(frame, int):
        return torch.tensor([frame]).long()
    elif isinstance(frame, float):
        return torch.tensor([frame]).float()

    else:
        try:
            # Check if it is a LazyFrame from OpenAI Baselines
            o = torch.from_numpy(frame.__array__()).unsqueeze(0).float()
            return o
        except TypeError:
            assert False


def _torch_type(d):
    nd = {}
    for k in d:
        if d[k].dtype == torch.float64:
            nd[k] = d[k].float()
        else:
            nd[k] = d[k]
    return nd


def _torch_cat_dict(d):
    r = {}
    for k in d[0]:
        a = [dd[k] for dd in d]
        r[k] = torch.cat(a, dim=0)
    return r


class GymAgent(Agent):
    """Create an Agent from a gym environment"""

    def __init__(
        self,
        make_env_fn=None,
        make_env_args={},
        n_envs=None,
        seed=None,
        action_string="action",
        output="env/",
    ):
        """Create an agent from a Gym environment

        Args:
            make_env_fn ([function that returns a gym.Env]): The function to create a single gym environments
            make_env_args (dict): The arguments of the function that creates a gym.Env
            n_envs ([int]): The number of environments to create.
            action_string (str, optional): [the name of the action variable in the workspace]. Defaults to "action".
            output (str, optional): [the output prefix of the environment]. Defaults to "env/".
            seed (int): the seed used to initialize the environment
            and each environment will have its own seed]. Defaults to True.
        """
        super().__init__()
        assert n_envs > 0
        self.envs = None
        self.env_args = make_env_args
        self._seed = seed
        assert self._seed is not None, "[GymAgent] seeds must be specified"

        self.n_envs = n_envs
        self.output = output
        self.input = action_string
        self.make_env_fn = make_env_fn
        self.ghost_params = torch.nn.Parameter(torch.randn(()))
        self.timestep = torch.tensor([0 for _ in range(n_envs)])
        self.finished = torch.tensor([True for _ in range(n_envs)])
        self.truncated = torch.tensor([False for _ in range(n_envs)])

        self.envs = [self.make_env_fn(**self.env_args) for _ in range(self.n_envs)]
        for k in range(self.n_envs):
            self.envs[k].seed(self._seed + k)

        self.observation_space = self.envs[0].observation_space
        self.action_space = self.envs[0].action_space
        self.finished = torch.tensor([True for _ in self.envs])
        self.truncated = torch.tensor([True for _ in self.envs])
        self.timestep = torch.tensor([0 for _ in self.envs])
        self.cumulated_reward = {}
        self.last_frame = {}

    def _common_reset(self, k, save_render):
        env = self.envs[k]
        self.cumulated_reward[k] = 0.0
        o = env.reset()
        observation = _format_frame(o)

        if isinstance(observation, torch.Tensor):
            observation = {"env_obs": observation}

        else:
            assert isinstance(observation, dict)
        if save_render:
            image = env.render(mode="image").unsqueeze(0)
            observation["rendering"] = image

        self.finished[k] = False
        self.truncated[k] = False
        self.timestep[k] = 0

        ret = {
            **observation,
            "done": torch.tensor([False]),
            "truncated": torch.tensor([False]),
            "timestep": torch.tensor([self.timestep[k]]),
            "cumulated_reward": torch.tensor([0.0]).float(),
        }
        return _torch_type(ret), observation

    def _reset(self, k, save_render):
        full_obs, observation = self._common_reset(k, save_render)
        self.last_frame[k] = observation
        return full_obs

    def _make_step(self, env, action, k, save_render):
        action = _convert_action(action)

        obs, reward, done, info = env.step(action)
        if "TimeLimit.truncated" in info.keys():
            truncated = info["TimeLimit.truncated"]
        else:
            truncated = False
        self.cumulated_reward[k] += reward
        observation = _format_frame(obs)
        if isinstance(observation, torch.Tensor):
            observation = {"env_obs": observation}
        else:
            assert isinstance(observation, dict)
        if save_render:
            image = env.render(mode="image").unsqueeze(0)
            observation["rendering"] = image
        ret = {
            **observation,
            "done": torch.tensor([done]),
            "truncated": torch.tensor([truncated]),
            "cumulated_reward": torch.tensor([self.cumulated_reward[k]]),
            "timestep": torch.tensor([self.timestep[k]]),
        }
        rew = _torch_type({"reward": torch.tensor([reward]).float()})
        return _torch_type(ret), rew, done, truncated, observation

    def _step(self, k, action, save_render):
        if self.finished[k]:
            assert k in self.last_frame
            rew = _torch_type({"reward": torch.tensor([0.0]).float()})
            return (
                {
                    **self.last_frame[k],
                    "done": torch.tensor([True]),
                    "truncated": torch.tensor([self.truncated[k]]),
                    "cumulated_reward": torch.tensor(
                        [self.cumulated_reward[k]]
                    ).float(),
                    "timestep": torch.tensor([self.timestep[k]]),
                },
                rew,
            )
        self.timestep[k] += 1
        full_obs, reward, done, truncated, observation = self._make_step(
            self.envs[k], action, k, save_render
        )

        self.last_frame[k] = observation
        if done:
            self.finished[k] = True
            self.truncated[k] = truncated
        return full_obs, reward

    def set_obs(self, observations, t):
        observations = _torch_cat_dict(observations)
        for k in observations:
            self.set((self.output + k, t), observations[k].to(self.ghost_params.device))

    def set_next_obs(self, observations, t):
        observations = _torch_cat_dict(observations)
        for k in observations:
            self.set(
                ("env/env_next_obs" + k, t),
                observations[k].to(self.ghost_params.device),
            )

    def set_reward(self, rewards, t):
        rewards = _torch_cat_dict(rewards)
        for k in rewards:
            self.set((self.output + k, t), rewards[k].to(self.ghost_params.device))

    def forward(self, t=0, save_render=False, **kwargs):
        """Do one step by reading the `action` at t-1
        If t==0, environments are reset
        If save_render is True, then the output of env.render(mode="image") is written as env/rendering
        """

        if t == 0:
            self.timestep = torch.tensor([0 for _ in self.envs])
            observations = []
            for k, e in enumerate(self.envs):
                obs = self._reset(k, save_render)
                observations.append(obs)
            self.set_obs(observations, t)
        else:
            assert t > 0
            action = self.get((self.input, t - 1))
            assert action.size()[0] == self.n_envs, "Incompatible number of envs"
            observations = []
            rewards = []
            for k, e in enumerate(self.envs):
                obs, reward = self._step(k, action[k], save_render)
                observations.append(obs)
                rewards.append(reward)
            self.set_reward(rewards, t - 1)
            self.set_reward(rewards, t)
            self.set_obs(observations, t)

    def is_continuous_action(self):
        return isinstance(self.action_space, gym.spaces.Box)

    def is_discrete_action(self):
        return isinstance(self.action_space, gym.spaces.Discrete)

    def is_continuous_state(self):
        return isinstance(self.observation_space, gym.spaces.Box)

    def is_discrete_state(self):
        return isinstance(self.observation_space, gym.spaces.Discrete)

    def get_obs_and_actions_sizes(self):
        action_dim = 0
        state_dim = 0
        if self.is_continuous_action():
            action_dim = self.action_space.shape[0]
        elif self.is_discrete_action():
            action_dim = self.action_space.n
        if self.is_continuous_state():
            state_dim = self.observation_space.shape[0]
        elif self.is_discrete_state():
            state_dim = self.observation_space.n
        return state_dim, action_dim


class AutoResetGymAgent(GymAgent):
    """The same as GymAgent, but with an automatic reset when done is True"""

    def __init__(
        self,
        make_env_fn=None,
        make_env_args={},
        n_envs=None,
        seed=None,
        action_string="action",
        output="env/",
    ):
        """Create an agent from a Gym environment  with Autoreset

        Args:
            make_env_fn ([function that returns a gym.Env]): The function to create a single gym environments
            make_env_args (dict): The arguments of the function that creates a gym.Env
            n_envs ([int]): The number of environments to create.
            action_string (str, optional): [the name of the action variable in the workspace]. Defaults to "action".
            output (str, optional): [the output prefix of the environment]. Defaults to "env/".
            use_seed (bool, optional): [If True, then the seed is chained to the environments,
            and each environment will have its own seed]. Defaults to True.
        """
        super().__init__(
            make_env_fn=make_env_fn,
            make_env_args=make_env_args,
            n_envs=n_envs,
            seed=seed,
            action_string=action_string,
            output=output,
        )
        self.is_running = [False for _ in range(self.n_envs)]
        self.previous_reward = [0 for _ in range(self.n_envs)]

    def _reset(self, k, save_render):
        self.is_running[k] = True
        full_obs, _ = self._common_reset(k, save_render)
        return full_obs

    def _step(self, k, action, save_render):
        self.timestep[k] += 1
        full_obs, reward, done, truncated, _ = self._make_step(
            self.envs[k], action, k, save_render
        )
        if done:
            self.is_running[k] = False
            self.truncated[k] = truncated
        return full_obs, reward

    def forward(self, t=0, save_render=False, **kwargs):
        """
        Perform one step by reading the `action`
        """

        observations = []
        rewards = []
        for k, env in enumerate(self.envs):
            if not self.is_running[k] or t == 0:
                observations.append(self._reset(k, save_render))

                if t > 0:
                    rew = self.previous_reward[k]
                    rewards.append(rew)
            else:
                assert t > 0
                action = self.get((self.input, t - 1))
                assert action.size()[0] == self.n_envs, "Incompatible number of envs"
                full_obs, reward = self._step(k, action[k], save_render)
                self.previous_reward[k] = reward
                observations.append(full_obs)
                rewards.append(reward)

        if t > 0:
            self.set_reward(rewards, t - 1)
            self.set_reward(rewards, t)
        self.set_obs(observations, t)


class NoAutoResetGymAgent(GymAgent):
    """The same as GymAgent, named to make sure it is not AutoReset"""

    def __init__(
        self,
        make_env_fn=None,
        make_env_args={},
        n_envs=None,
        seed=None,
        action_string="action",
        output="env/",
    ):
        super().__init__(
            make_env_fn=make_env_fn,
            make_env_args=make_env_args,
            n_envs=n_envs,
            seed=seed,
            action_string=action_string,
            output=output,
        )

Classes

class AutoResetGymAgent (make_env_fn=None, make_env_args={}, n_envs=None, seed=None, action_string='action', output='env/')

The same as GymAgent, but with an automatic reset when done is True

Create an agent from a Gym environment with Autoreset

Args

make_env_fn : [function that returns a gym.Env]
The function to create a single gym environments
make_env_args : dict
The arguments of the function that creates a gym.Env
n_envs : [int]
The number of environments to create.
action_string : str, optional
[the name of the action variable in the workspace]. Defaults to "action".
output : str, optional
[the output prefix of the environment]. Defaults to "env/".
use_seed : bool, optional
[If True, then the seed is chained to the environments,

and each environment will have its own seed]. Defaults to True.

Expand source code
class AutoResetGymAgent(GymAgent):
    """The same as GymAgent, but with an automatic reset when done is True"""

    def __init__(
        self,
        make_env_fn=None,
        make_env_args={},
        n_envs=None,
        seed=None,
        action_string="action",
        output="env/",
    ):
        """Create an agent from a Gym environment  with Autoreset

        Args:
            make_env_fn ([function that returns a gym.Env]): The function to create a single gym environments
            make_env_args (dict): The arguments of the function that creates a gym.Env
            n_envs ([int]): The number of environments to create.
            action_string (str, optional): [the name of the action variable in the workspace]. Defaults to "action".
            output (str, optional): [the output prefix of the environment]. Defaults to "env/".
            use_seed (bool, optional): [If True, then the seed is chained to the environments,
            and each environment will have its own seed]. Defaults to True.
        """
        super().__init__(
            make_env_fn=make_env_fn,
            make_env_args=make_env_args,
            n_envs=n_envs,
            seed=seed,
            action_string=action_string,
            output=output,
        )
        self.is_running = [False for _ in range(self.n_envs)]
        self.previous_reward = [0 for _ in range(self.n_envs)]

    def _reset(self, k, save_render):
        self.is_running[k] = True
        full_obs, _ = self._common_reset(k, save_render)
        return full_obs

    def _step(self, k, action, save_render):
        self.timestep[k] += 1
        full_obs, reward, done, truncated, _ = self._make_step(
            self.envs[k], action, k, save_render
        )
        if done:
            self.is_running[k] = False
            self.truncated[k] = truncated
        return full_obs, reward

    def forward(self, t=0, save_render=False, **kwargs):
        """
        Perform one step by reading the `action`
        """

        observations = []
        rewards = []
        for k, env in enumerate(self.envs):
            if not self.is_running[k] or t == 0:
                observations.append(self._reset(k, save_render))

                if t > 0:
                    rew = self.previous_reward[k]
                    rewards.append(rew)
            else:
                assert t > 0
                action = self.get((self.input, t - 1))
                assert action.size()[0] == self.n_envs, "Incompatible number of envs"
                full_obs, reward = self._step(k, action[k], save_render)
                self.previous_reward[k] = reward
                observations.append(full_obs)
                rewards.append(reward)

        if t > 0:
            self.set_reward(rewards, t - 1)
            self.set_reward(rewards, t)
        self.set_obs(observations, t)

Ancestors

Class variables

var dump_patches : bool
var training : bool

Methods

def forward(self, t=0, save_render=False, **kwargs) ‑> Callable[..., Any]

Perform one step by reading the action

Expand source code
def forward(self, t=0, save_render=False, **kwargs):
    """
    Perform one step by reading the `action`
    """

    observations = []
    rewards = []
    for k, env in enumerate(self.envs):
        if not self.is_running[k] or t == 0:
            observations.append(self._reset(k, save_render))

            if t > 0:
                rew = self.previous_reward[k]
                rewards.append(rew)
        else:
            assert t > 0
            action = self.get((self.input, t - 1))
            assert action.size()[0] == self.n_envs, "Incompatible number of envs"
            full_obs, reward = self._step(k, action[k], save_render)
            self.previous_reward[k] = reward
            observations.append(full_obs)
            rewards.append(reward)

    if t > 0:
        self.set_reward(rewards, t - 1)
        self.set_reward(rewards, t)
    self.set_obs(observations, t)

Inherited members

class GymAgent (make_env_fn=None, make_env_args={}, n_envs=None, seed=None, action_string='action', output='env/')

Create an Agent from a gym environment

Create an agent from a Gym environment

Args

make_env_fn : [function that returns a gym.Env]
The function to create a single gym environments
make_env_args : dict
The arguments of the function that creates a gym.Env
n_envs : [int]
The number of environments to create.
action_string : str, optional
[the name of the action variable in the workspace]. Defaults to "action".
output : str, optional
[the output prefix of the environment]. Defaults to "env/".
seed : int
the seed used to initialize the environment

and each environment will have its own seed]. Defaults to True.

Expand source code
class GymAgent(Agent):
    """Create an Agent from a gym environment"""

    def __init__(
        self,
        make_env_fn=None,
        make_env_args={},
        n_envs=None,
        seed=None,
        action_string="action",
        output="env/",
    ):
        """Create an agent from a Gym environment

        Args:
            make_env_fn ([function that returns a gym.Env]): The function to create a single gym environments
            make_env_args (dict): The arguments of the function that creates a gym.Env
            n_envs ([int]): The number of environments to create.
            action_string (str, optional): [the name of the action variable in the workspace]. Defaults to "action".
            output (str, optional): [the output prefix of the environment]. Defaults to "env/".
            seed (int): the seed used to initialize the environment
            and each environment will have its own seed]. Defaults to True.
        """
        super().__init__()
        assert n_envs > 0
        self.envs = None
        self.env_args = make_env_args
        self._seed = seed
        assert self._seed is not None, "[GymAgent] seeds must be specified"

        self.n_envs = n_envs
        self.output = output
        self.input = action_string
        self.make_env_fn = make_env_fn
        self.ghost_params = torch.nn.Parameter(torch.randn(()))
        self.timestep = torch.tensor([0 for _ in range(n_envs)])
        self.finished = torch.tensor([True for _ in range(n_envs)])
        self.truncated = torch.tensor([False for _ in range(n_envs)])

        self.envs = [self.make_env_fn(**self.env_args) for _ in range(self.n_envs)]
        for k in range(self.n_envs):
            self.envs[k].seed(self._seed + k)

        self.observation_space = self.envs[0].observation_space
        self.action_space = self.envs[0].action_space
        self.finished = torch.tensor([True for _ in self.envs])
        self.truncated = torch.tensor([True for _ in self.envs])
        self.timestep = torch.tensor([0 for _ in self.envs])
        self.cumulated_reward = {}
        self.last_frame = {}

    def _common_reset(self, k, save_render):
        env = self.envs[k]
        self.cumulated_reward[k] = 0.0
        o = env.reset()
        observation = _format_frame(o)

        if isinstance(observation, torch.Tensor):
            observation = {"env_obs": observation}

        else:
            assert isinstance(observation, dict)
        if save_render:
            image = env.render(mode="image").unsqueeze(0)
            observation["rendering"] = image

        self.finished[k] = False
        self.truncated[k] = False
        self.timestep[k] = 0

        ret = {
            **observation,
            "done": torch.tensor([False]),
            "truncated": torch.tensor([False]),
            "timestep": torch.tensor([self.timestep[k]]),
            "cumulated_reward": torch.tensor([0.0]).float(),
        }
        return _torch_type(ret), observation

    def _reset(self, k, save_render):
        full_obs, observation = self._common_reset(k, save_render)
        self.last_frame[k] = observation
        return full_obs

    def _make_step(self, env, action, k, save_render):
        action = _convert_action(action)

        obs, reward, done, info = env.step(action)
        if "TimeLimit.truncated" in info.keys():
            truncated = info["TimeLimit.truncated"]
        else:
            truncated = False
        self.cumulated_reward[k] += reward
        observation = _format_frame(obs)
        if isinstance(observation, torch.Tensor):
            observation = {"env_obs": observation}
        else:
            assert isinstance(observation, dict)
        if save_render:
            image = env.render(mode="image").unsqueeze(0)
            observation["rendering"] = image
        ret = {
            **observation,
            "done": torch.tensor([done]),
            "truncated": torch.tensor([truncated]),
            "cumulated_reward": torch.tensor([self.cumulated_reward[k]]),
            "timestep": torch.tensor([self.timestep[k]]),
        }
        rew = _torch_type({"reward": torch.tensor([reward]).float()})
        return _torch_type(ret), rew, done, truncated, observation

    def _step(self, k, action, save_render):
        if self.finished[k]:
            assert k in self.last_frame
            rew = _torch_type({"reward": torch.tensor([0.0]).float()})
            return (
                {
                    **self.last_frame[k],
                    "done": torch.tensor([True]),
                    "truncated": torch.tensor([self.truncated[k]]),
                    "cumulated_reward": torch.tensor(
                        [self.cumulated_reward[k]]
                    ).float(),
                    "timestep": torch.tensor([self.timestep[k]]),
                },
                rew,
            )
        self.timestep[k] += 1
        full_obs, reward, done, truncated, observation = self._make_step(
            self.envs[k], action, k, save_render
        )

        self.last_frame[k] = observation
        if done:
            self.finished[k] = True
            self.truncated[k] = truncated
        return full_obs, reward

    def set_obs(self, observations, t):
        observations = _torch_cat_dict(observations)
        for k in observations:
            self.set((self.output + k, t), observations[k].to(self.ghost_params.device))

    def set_next_obs(self, observations, t):
        observations = _torch_cat_dict(observations)
        for k in observations:
            self.set(
                ("env/env_next_obs" + k, t),
                observations[k].to(self.ghost_params.device),
            )

    def set_reward(self, rewards, t):
        rewards = _torch_cat_dict(rewards)
        for k in rewards:
            self.set((self.output + k, t), rewards[k].to(self.ghost_params.device))

    def forward(self, t=0, save_render=False, **kwargs):
        """Do one step by reading the `action` at t-1
        If t==0, environments are reset
        If save_render is True, then the output of env.render(mode="image") is written as env/rendering
        """

        if t == 0:
            self.timestep = torch.tensor([0 for _ in self.envs])
            observations = []
            for k, e in enumerate(self.envs):
                obs = self._reset(k, save_render)
                observations.append(obs)
            self.set_obs(observations, t)
        else:
            assert t > 0
            action = self.get((self.input, t - 1))
            assert action.size()[0] == self.n_envs, "Incompatible number of envs"
            observations = []
            rewards = []
            for k, e in enumerate(self.envs):
                obs, reward = self._step(k, action[k], save_render)
                observations.append(obs)
                rewards.append(reward)
            self.set_reward(rewards, t - 1)
            self.set_reward(rewards, t)
            self.set_obs(observations, t)

    def is_continuous_action(self):
        return isinstance(self.action_space, gym.spaces.Box)

    def is_discrete_action(self):
        return isinstance(self.action_space, gym.spaces.Discrete)

    def is_continuous_state(self):
        return isinstance(self.observation_space, gym.spaces.Box)

    def is_discrete_state(self):
        return isinstance(self.observation_space, gym.spaces.Discrete)

    def get_obs_and_actions_sizes(self):
        action_dim = 0
        state_dim = 0
        if self.is_continuous_action():
            action_dim = self.action_space.shape[0]
        elif self.is_discrete_action():
            action_dim = self.action_space.n
        if self.is_continuous_state():
            state_dim = self.observation_space.shape[0]
        elif self.is_discrete_state():
            state_dim = self.observation_space.n
        return state_dim, action_dim

Ancestors

  • Agent
  • torch.nn.modules.module.Module

Subclasses

Class variables

var dump_patches : bool
var training : bool

Methods

def forward(self, t=0, save_render=False, **kwargs) ‑> Callable[..., Any]

Do one step by reading the action at t-1 If t==0, environments are reset If save_render is True, then the output of env.render(mode="image") is written as env/rendering

Expand source code
def forward(self, t=0, save_render=False, **kwargs):
    """Do one step by reading the `action` at t-1
    If t==0, environments are reset
    If save_render is True, then the output of env.render(mode="image") is written as env/rendering
    """

    if t == 0:
        self.timestep = torch.tensor([0 for _ in self.envs])
        observations = []
        for k, e in enumerate(self.envs):
            obs = self._reset(k, save_render)
            observations.append(obs)
        self.set_obs(observations, t)
    else:
        assert t > 0
        action = self.get((self.input, t - 1))
        assert action.size()[0] == self.n_envs, "Incompatible number of envs"
        observations = []
        rewards = []
        for k, e in enumerate(self.envs):
            obs, reward = self._step(k, action[k], save_render)
            observations.append(obs)
            rewards.append(reward)
        self.set_reward(rewards, t - 1)
        self.set_reward(rewards, t)
        self.set_obs(observations, t)
def get_obs_and_actions_sizes(self)
Expand source code
def get_obs_and_actions_sizes(self):
    action_dim = 0
    state_dim = 0
    if self.is_continuous_action():
        action_dim = self.action_space.shape[0]
    elif self.is_discrete_action():
        action_dim = self.action_space.n
    if self.is_continuous_state():
        state_dim = self.observation_space.shape[0]
    elif self.is_discrete_state():
        state_dim = self.observation_space.n
    return state_dim, action_dim
def is_continuous_action(self)
Expand source code
def is_continuous_action(self):
    return isinstance(self.action_space, gym.spaces.Box)
def is_continuous_state(self)
Expand source code
def is_continuous_state(self):
    return isinstance(self.observation_space, gym.spaces.Box)
def is_discrete_action(self)
Expand source code
def is_discrete_action(self):
    return isinstance(self.action_space, gym.spaces.Discrete)
def is_discrete_state(self)
Expand source code
def is_discrete_state(self):
    return isinstance(self.observation_space, gym.spaces.Discrete)
def set_next_obs(self, observations, t)
Expand source code
def set_next_obs(self, observations, t):
    observations = _torch_cat_dict(observations)
    for k in observations:
        self.set(
            ("env/env_next_obs" + k, t),
            observations[k].to(self.ghost_params.device),
        )
def set_obs(self, observations, t)
Expand source code
def set_obs(self, observations, t):
    observations = _torch_cat_dict(observations)
    for k in observations:
        self.set((self.output + k, t), observations[k].to(self.ghost_params.device))
def set_reward(self, rewards, t)
Expand source code
def set_reward(self, rewards, t):
    rewards = _torch_cat_dict(rewards)
    for k in rewards:
        self.set((self.output + k, t), rewards[k].to(self.ghost_params.device))

Inherited members

class NoAutoResetGymAgent (make_env_fn=None, make_env_args={}, n_envs=None, seed=None, action_string='action', output='env/')

The same as GymAgent, named to make sure it is not AutoReset

Create an agent from a Gym environment

Args

make_env_fn : [function that returns a gym.Env]
The function to create a single gym environments
make_env_args : dict
The arguments of the function that creates a gym.Env
n_envs : [int]
The number of environments to create.
action_string : str, optional
[the name of the action variable in the workspace]. Defaults to "action".
output : str, optional
[the output prefix of the environment]. Defaults to "env/".
seed : int
the seed used to initialize the environment

and each environment will have its own seed]. Defaults to True.

Expand source code
class NoAutoResetGymAgent(GymAgent):
    """The same as GymAgent, named to make sure it is not AutoReset"""

    def __init__(
        self,
        make_env_fn=None,
        make_env_args={},
        n_envs=None,
        seed=None,
        action_string="action",
        output="env/",
    ):
        super().__init__(
            make_env_fn=make_env_fn,
            make_env_args=make_env_args,
            n_envs=n_envs,
            seed=seed,
            action_string=action_string,
            output=output,
        )

Ancestors

Class variables

var dump_patches : bool
var training : bool

Inherited members