Module bbrl.utils.utils

Expand source code
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#

import copy
from typing import Callable, List, Union

import gym
import torch
from gym.spaces import Box, Discrete
from gym.wrappers import TimeLimit
from omegaconf import DictConfig

from bbrl.agents.agent import Agent
from bbrl.workspace import Workspace
from bbrl import instantiate_class
from bbrl.agents.asynchronous import AsynchronousAgent


def get_env_dimensions(env) -> tuple:
    env = instantiate_class(env)
    obs_dim = env.observation_space.shape[0]
    if isinstance(env.action_space, Discrete):
        action_dim = env.action_space.n
        del env
        return obs_dim, action_dim

    elif isinstance(env.action_space, Box):
        action_dim = env.action_space.shape[0]
        max_action = env.action_space.high[0]
        del env
        return obs_dim, action_dim, max_action
    else:
        raise Exception(f"{type(env.action_space)} unknown")


def make_gym_env(max_episode_steps, env_name):
    return TimeLimit(gym.make(env_name), max_episode_steps=max_episode_steps)


def soft_param_update(network_to_update, network, rho):
    for n_to_update, p_net in zip(network_to_update.parameters(), network.parameters()):
        n_to_update.data.copy_(rho * p_net.data + (1 - rho) * n_to_update.data)


# dict configs utils :
def key_path_in_dict(nested_dict: dict, key_path: str):
    """
    Check if a sequences of keys exists in a nested dict
    """
    try:
        keys = key_path.split(".")
        rv = nested_dict
        for key in keys:
            rv = rv[key]
        return True
    except KeyError:
        return False


def set_value_with_key_path(nested_dict: DictConfig, key_path: str, value):
    keys = key_path.split(".")
    for key in keys[:-1]:
        nested_dict = nested_dict[key]
    nested_dict[keys[-1]] = value


# Salina additions ####

# need to check if this function works well using cuda
def vector_to_parameters(vec: torch.Tensor, parameters) -> None:
    r"""Convert one vector to the parameters

    Args:
        vec (Tensor): a single vector represents the parameters of a model.
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.
    """
    # Ensure vec of type Tensor
    if not isinstance(vec, torch.Tensor):
        raise TypeError(
            "expected torch.Tensor, but got: {}".format(torch.typename(vec))
        )

    # Pointer for slicing the vector for each parameter
    pointer = 0
    for param in parameters:
        # The length of the parameter
        num_param = param.numel()
        # Slice the vector, reshape it, and replace the old data of the parameter
        param.data.copy_(vec[pointer : pointer + num_param].view_as(param).data)

        # Increment the pointer
        pointer += num_param


# !!!! nRemoteParamAgent,  Not ready (WIP) !!!
class nRemoteParamAgent(Agent):
    """
    Class that allows to evaluate N (different) individuals with m processes
    The user have to provide:
       1/ the aquisition agent list or template
       2/ list of parameters for each of the individual of the pop
       3/ the function that apply the parameters to the acquisition agent
    This implementation is based on the  Asynchronous agents
    (I think another implementation could use the NRemote agent
    maybe by slicing the shared workspace to separate the experiences
    collected by each individual)
    """

    def __init__(self, acq_agent: Agent, n_process: int, name: str = "") -> None:
        """
        Implements a list of agent which are executed aynchronously in another process.
        Each agent can be parametrized by specific parameters and will returns it's own workspace.
        acq_agent : an instance of the agent that will be runned over each processes
        n_process :
        apply_params : a function f(acq_agent, param) => acq_agent
                       Allow to update each of the agent with a specific set of parameters.
        """
        super().__init__(name)
        self.async_agents = []
        self.n_process = n_process
        for i in range(n_process):
            async_agent = AsynchronousAgent(copy.deepcopy(acq_agent))
            self.async_agents.append(async_agent)

    def __call__(self, params: list, apply_params: Callable, **kwargs):
        self.workspaces = []
        nb_agent_to_launch = len(params)
        pool = []
        to_launch_id = 0

        def launch_agent(agent, to_launch_id):
            apply_params(agent, params[to_launch_id])
            agent(**kwargs)
            to_launch_id += 1
            pool.append(agent)

        for i in range(min(nb_agent_to_launch, self.n_process)):
            launch_agent(self.async_agents[i], i)

        while True:
            for agent in pool:
                if not agent.is_running():
                    workspace = agent.get_workspace()
                    if workspace:
                        self.workspaces.append(workspace)
                        if len(self.workspaces) == nb_agent_to_launch:
                            return
                    if to_launch_id < nb_agent_to_launch:
                        last_launched_id = len(self.workspaces) - 1
                        apply_params(agent, params[last_launched_id])
                        agent(**kwargs)
                        last_launched_id += 1
                    else:
                        pool.remove(agent)

    def get_workspaces(self) -> List[Workspace]:
        try:
            return self.workspaces
        except AttributeError:
            raise Exception(
                "The nRemoteParamAgent has not been called yet, workspaces have not been created"
            )

    def close(self) -> None:
        for a in self.async_agents:
            a.close()


# !!!! nRemoteDistinctAgents Not functionnal !!!
# To my knownledge you can't
# change the content of an async agent
class nRemoteDistinctAgents(Agent):
    """
    Class that allows to evaluate N (different) individuals with m processes
    Basic usage :
    remote = nRemoteDistinctAgents(n_process)
    remote(acq_agent_list,)
    The user have to provide:
        1/ a list of acqusition_agent that will be copied to remotes
    This implementation is based on the  Asynchronous agents
    (i think another implementation could use the Nremote agent
    maybe by slicing the shared workspace to separate the experiences
    collected by each individual)
    """

    def __init__(self, n_process: int, name: str = "") -> None:
        """
        Implements a list of agent which are executed aynchronously in another process.
        Each agent can be parametrized by specific parameters and will returns it's own
        workspace.
        acq_agent : an instance of the agent that will be runned over each processes
        n_process :
        apply_params : a function f(acq_agent, param) => acq_agent
                       Allow to update each of the agent with a specific set of parameters.
        """
        super().__init__(name)
        self.async_agents = []
        self.n_process = n_process
        for i in range(n_process):
            async_agent = AsynchronousAgent(None)
            self.async_agents.append(async_agent)

    def __call__(
        self, acq_agents: List[Agent], agents_args: Union[list, dict, None], **kwargs
    ):
        def get_agent_args(agent_id):
            if agents_args is None:
                args = {}
            elif isinstance(agents_args, dict):
                args = agents_args
            elif isinstance(agents_args, list):
                args = agents_args[agent_id]
            else:
                raise Exception("Unsupported")
            return args

        self.workspaces = []
        nb_agent_to_launch = len(acq_agents)
        to_launch_id = 0

        pool = []
        for _ in range(min(self.n_process, nb_agent_to_launch)):
            args = get_agent_args(to_launch_id)
            self.async_agents[to_launch_id].agent = acq_agents[to_launch_id]
            self.async_agents[to_launch_id](**args, **kwargs)
            pool.append(self.async_agents[to_launch_id])
            to_launch_id += 1

        while len(self.workspaces) < nb_agent_to_launch:
            j = 0
            for async_agent in pool:
                if not async_agent.is_running():
                    workspace = async_agent.get_workspace()
                    self.workspaces.append(workspace)
                    # print(f'process {j} finished total {len(self.workspaces)}/{nb_agent_to_launch}')
                    if len(self.workspaces) == nb_agent_to_launch:
                        return
                    if to_launch_id < nb_agent_to_launch:
                        async_agent.agent = acq_agents[to_launch_id]
                        args = get_agent_args(to_launch_id)
                        async_agent(**args, **kwargs)
                        # print(f'process {j} launched for agent {to_launch_id}')
                        to_launch_id += 1
                    else:
                        pool.remove(async_agent)
                j += 1

    def get_workspaces(self) -> List[Workspace]:
        try:
            return self.workspaces
        except AttributeError:
            raise Exception(
                "The nRemoteParamAgent has not been called yet, workspaces have not been created"
            )

    def close(self) -> None:
        for a in self.async_agents:
            a.close()


def is_vec_of_ones(vec) -> bool:
    # print(vec)
    # print(vec.shape)
    for subvec in vec:
        for i in subvec:
            # print(f"{i} ")
            if not i == 1:
                # print(f"{i} is not one")
                return False
    return True

Functions

def get_env_dimensions(env) ‑> tuple
Expand source code
def get_env_dimensions(env) -> tuple:
    env = instantiate_class(env)
    obs_dim = env.observation_space.shape[0]
    if isinstance(env.action_space, Discrete):
        action_dim = env.action_space.n
        del env
        return obs_dim, action_dim

    elif isinstance(env.action_space, Box):
        action_dim = env.action_space.shape[0]
        max_action = env.action_space.high[0]
        del env
        return obs_dim, action_dim, max_action
    else:
        raise Exception(f"{type(env.action_space)} unknown")
def is_vec_of_ones(vec) ‑> bool
Expand source code
def is_vec_of_ones(vec) -> bool:
    # print(vec)
    # print(vec.shape)
    for subvec in vec:
        for i in subvec:
            # print(f"{i} ")
            if not i == 1:
                # print(f"{i} is not one")
                return False
    return True
def key_path_in_dict(nested_dict: dict, key_path: str)

Check if a sequences of keys exists in a nested dict

Expand source code
def key_path_in_dict(nested_dict: dict, key_path: str):
    """
    Check if a sequences of keys exists in a nested dict
    """
    try:
        keys = key_path.split(".")
        rv = nested_dict
        for key in keys:
            rv = rv[key]
        return True
    except KeyError:
        return False
def make_gym_env(max_episode_steps, env_name)
Expand source code
def make_gym_env(max_episode_steps, env_name):
    return TimeLimit(gym.make(env_name), max_episode_steps=max_episode_steps)
def set_value_with_key_path(nested_dict: omegaconf.dictconfig.DictConfig, key_path: str, value)
Expand source code
def set_value_with_key_path(nested_dict: DictConfig, key_path: str, value):
    keys = key_path.split(".")
    for key in keys[:-1]:
        nested_dict = nested_dict[key]
    nested_dict[keys[-1]] = value
def soft_param_update(network_to_update, network, rho)
Expand source code
def soft_param_update(network_to_update, network, rho):
    for n_to_update, p_net in zip(network_to_update.parameters(), network.parameters()):
        n_to_update.data.copy_(rho * p_net.data + (1 - rho) * n_to_update.data)
def vector_to_parameters(vec: torch.Tensor, parameters) ‑> None

Convert one vector to the parameters

Args

vec : Tensor
a single vector represents the parameters of a model.
parameters : Iterable[Tensor]
an iterator of Tensors that are the parameters of a model.
Expand source code
def vector_to_parameters(vec: torch.Tensor, parameters) -> None:
    r"""Convert one vector to the parameters

    Args:
        vec (Tensor): a single vector represents the parameters of a model.
        parameters (Iterable[Tensor]): an iterator of Tensors that are the
            parameters of a model.
    """
    # Ensure vec of type Tensor
    if not isinstance(vec, torch.Tensor):
        raise TypeError(
            "expected torch.Tensor, but got: {}".format(torch.typename(vec))
        )

    # Pointer for slicing the vector for each parameter
    pointer = 0
    for param in parameters:
        # The length of the parameter
        num_param = param.numel()
        # Slice the vector, reshape it, and replace the old data of the parameter
        param.data.copy_(vec[pointer : pointer + num_param].view_as(param).data)

        # Increment the pointer
        pointer += num_param

Classes

class nRemoteDistinctAgents (n_process: int, name: str = '')

Class that allows to evaluate N (different) individuals with m processes Basic usage : remote = nRemoteDistinctAgents(n_process) remote(acq_agent_list,) The user have to provide: 1/ a list of acqusition_agent that will be copied to remotes This implementation is based on the Asynchronous agents (i think another implementation could use the Nremote agent maybe by slicing the shared workspace to separate the experiences collected by each individual)

Implements a list of agent which are executed aynchronously in another process. Each agent can be parametrized by specific parameters and will returns it's own workspace. acq_agent : an instance of the agent that will be runned over each processes n_process : apply_params : a function f(acq_agent, param) => acq_agent Allow to update each of the agent with a specific set of parameters.

Expand source code
class nRemoteDistinctAgents(Agent):
    """
    Class that allows to evaluate N (different) individuals with m processes
    Basic usage :
    remote = nRemoteDistinctAgents(n_process)
    remote(acq_agent_list,)
    The user have to provide:
        1/ a list of acqusition_agent that will be copied to remotes
    This implementation is based on the  Asynchronous agents
    (i think another implementation could use the Nremote agent
    maybe by slicing the shared workspace to separate the experiences
    collected by each individual)
    """

    def __init__(self, n_process: int, name: str = "") -> None:
        """
        Implements a list of agent which are executed aynchronously in another process.
        Each agent can be parametrized by specific parameters and will returns it's own
        workspace.
        acq_agent : an instance of the agent that will be runned over each processes
        n_process :
        apply_params : a function f(acq_agent, param) => acq_agent
                       Allow to update each of the agent with a specific set of parameters.
        """
        super().__init__(name)
        self.async_agents = []
        self.n_process = n_process
        for i in range(n_process):
            async_agent = AsynchronousAgent(None)
            self.async_agents.append(async_agent)

    def __call__(
        self, acq_agents: List[Agent], agents_args: Union[list, dict, None], **kwargs
    ):
        def get_agent_args(agent_id):
            if agents_args is None:
                args = {}
            elif isinstance(agents_args, dict):
                args = agents_args
            elif isinstance(agents_args, list):
                args = agents_args[agent_id]
            else:
                raise Exception("Unsupported")
            return args

        self.workspaces = []
        nb_agent_to_launch = len(acq_agents)
        to_launch_id = 0

        pool = []
        for _ in range(min(self.n_process, nb_agent_to_launch)):
            args = get_agent_args(to_launch_id)
            self.async_agents[to_launch_id].agent = acq_agents[to_launch_id]
            self.async_agents[to_launch_id](**args, **kwargs)
            pool.append(self.async_agents[to_launch_id])
            to_launch_id += 1

        while len(self.workspaces) < nb_agent_to_launch:
            j = 0
            for async_agent in pool:
                if not async_agent.is_running():
                    workspace = async_agent.get_workspace()
                    self.workspaces.append(workspace)
                    # print(f'process {j} finished total {len(self.workspaces)}/{nb_agent_to_launch}')
                    if len(self.workspaces) == nb_agent_to_launch:
                        return
                    if to_launch_id < nb_agent_to_launch:
                        async_agent.agent = acq_agents[to_launch_id]
                        args = get_agent_args(to_launch_id)
                        async_agent(**args, **kwargs)
                        # print(f'process {j} launched for agent {to_launch_id}')
                        to_launch_id += 1
                    else:
                        pool.remove(async_agent)
                j += 1

    def get_workspaces(self) -> List[Workspace]:
        try:
            return self.workspaces
        except AttributeError:
            raise Exception(
                "The nRemoteParamAgent has not been called yet, workspaces have not been created"
            )

    def close(self) -> None:
        for a in self.async_agents:
            a.close()

Ancestors

  • Agent
  • torch.nn.modules.module.Module

Class variables

var dump_patches : bool
var training : bool

Methods

def close(self) ‑> None
Expand source code
def close(self) -> None:
    for a in self.async_agents:
        a.close()
def get_workspaces(self) ‑> List[Workspace]
Expand source code
def get_workspaces(self) -> List[Workspace]:
    try:
        return self.workspaces
    except AttributeError:
        raise Exception(
            "The nRemoteParamAgent has not been called yet, workspaces have not been created"
        )

Inherited members

class nRemoteParamAgent (acq_agent: Agent, n_process: int, name: str = '')

Class that allows to evaluate N (different) individuals with m processes The user have to provide: 1/ the aquisition agent list or template 2/ list of parameters for each of the individual of the pop 3/ the function that apply the parameters to the acquisition agent This implementation is based on the Asynchronous agents (I think another implementation could use the NRemote agent maybe by slicing the shared workspace to separate the experiences collected by each individual)

Implements a list of agent which are executed aynchronously in another process. Each agent can be parametrized by specific parameters and will returns it's own workspace. acq_agent : an instance of the agent that will be runned over each processes n_process : apply_params : a function f(acq_agent, param) => acq_agent Allow to update each of the agent with a specific set of parameters.

Expand source code
class nRemoteParamAgent(Agent):
    """
    Class that allows to evaluate N (different) individuals with m processes
    The user have to provide:
       1/ the aquisition agent list or template
       2/ list of parameters for each of the individual of the pop
       3/ the function that apply the parameters to the acquisition agent
    This implementation is based on the  Asynchronous agents
    (I think another implementation could use the NRemote agent
    maybe by slicing the shared workspace to separate the experiences
    collected by each individual)
    """

    def __init__(self, acq_agent: Agent, n_process: int, name: str = "") -> None:
        """
        Implements a list of agent which are executed aynchronously in another process.
        Each agent can be parametrized by specific parameters and will returns it's own workspace.
        acq_agent : an instance of the agent that will be runned over each processes
        n_process :
        apply_params : a function f(acq_agent, param) => acq_agent
                       Allow to update each of the agent with a specific set of parameters.
        """
        super().__init__(name)
        self.async_agents = []
        self.n_process = n_process
        for i in range(n_process):
            async_agent = AsynchronousAgent(copy.deepcopy(acq_agent))
            self.async_agents.append(async_agent)

    def __call__(self, params: list, apply_params: Callable, **kwargs):
        self.workspaces = []
        nb_agent_to_launch = len(params)
        pool = []
        to_launch_id = 0

        def launch_agent(agent, to_launch_id):
            apply_params(agent, params[to_launch_id])
            agent(**kwargs)
            to_launch_id += 1
            pool.append(agent)

        for i in range(min(nb_agent_to_launch, self.n_process)):
            launch_agent(self.async_agents[i], i)

        while True:
            for agent in pool:
                if not agent.is_running():
                    workspace = agent.get_workspace()
                    if workspace:
                        self.workspaces.append(workspace)
                        if len(self.workspaces) == nb_agent_to_launch:
                            return
                    if to_launch_id < nb_agent_to_launch:
                        last_launched_id = len(self.workspaces) - 1
                        apply_params(agent, params[last_launched_id])
                        agent(**kwargs)
                        last_launched_id += 1
                    else:
                        pool.remove(agent)

    def get_workspaces(self) -> List[Workspace]:
        try:
            return self.workspaces
        except AttributeError:
            raise Exception(
                "The nRemoteParamAgent has not been called yet, workspaces have not been created"
            )

    def close(self) -> None:
        for a in self.async_agents:
            a.close()

Ancestors

  • Agent
  • torch.nn.modules.module.Module

Class variables

var dump_patches : bool
var training : bool

Methods

def close(self) ‑> None
Expand source code
def close(self) -> None:
    for a in self.async_agents:
        a.close()
def get_workspaces(self) ‑> List[Workspace]
Expand source code
def get_workspaces(self) -> List[Workspace]:
    try:
        return self.workspaces
    except AttributeError:
        raise Exception(
            "The nRemoteParamAgent has not been called yet, workspaces have not been created"
        )

Inherited members