Source code for lagom.envs.vec_env

import numpy as np

from lagom.vis import GridImage

try:  # workaround on server without fake screen but still running other things well
    from lagom.vis import ImageViewer
except ImportError:
    pass


[docs]class VecEnv(object): r"""A vectorized environment runs serially for each sub-environment. Each observation returned from vectorized environment is a batch of observations for each sub-environment. And :meth:`step` is expected to receive a batch of actions for each sub-environment. .. note:: All sub-environments should share the identical observation and action spaces. In other words, a vector of multiple different environments is not supported. Args: list_make_env (list): a list of functions each returns an instantiated enviroment. observation_space (Space): observation space of the environment action_space (Space): action space of the environment """ metadata = {'render.modes': ['human', 'rgb_array']} closed = False viewer = None def __init__(self, list_make_env): self.list_make_env = list_make_env self.list_env = [make_env() for make_env in list_make_env] self.observation_space = self.list_env[0].observation_space self.action_space = self.list_env[0].action_space self.reward_range = self.list_env[0].reward_range self.spec = self.list_env[0].spec
[docs] def step(self, actions): r"""Ask all the environments to take a step with a list of actions, each for one environment. Args: actions (list): a list of actions, each for one environment. Returns ------- observations : list a list of observations, each returned from one environment after executing the given action. rewards : list a list of scalar rewards, each returned from one environment. dones : list a list of booleans indicating whether the episode terminates, each returned from one environment. infos : list a list of dictionaries of additional informations, each returned from one environment. """ assert len(actions) == len(self) observations = [] rewards = [] dones = [] infos = [] for i, (env, action) in enumerate(zip(self.list_env, actions)): observation, reward, done, info = env.step(action) # If done=True, reset environment, store last observation in info and report new initial observation if done: info['last_observation'] = observation observation = env.reset() observations.append(observation) rewards.append(reward) dones.append(done) infos.append(info) return observations, rewards, dones, infos
[docs] def reset(self): r"""Reset all the environments and return a list of initial observations from each environment. .. warning:: If :meth:`step_async` is still working, then it will be aborted. Returns ------- observations : list a list of initial observations from all environments. """ observations = [env.reset() for env in self.list_env] return observations
[docs] def render(self, mode='human'): r"""Render all the environments. It firstly retrieve RGB images from all environments and use :class:`GridImage` to make a grid of them as a single image. Then it either returns the image array or display the image to the screen by using :class:`ImageViewer`. See docstring in :class:`Env` for more detais about rendering. """ # Get images from all environments with shape [N, H, W, C] imgs = self.get_images() imgs = np.stack(imgs) # Make a grid of images grid = GridImage(ncol=5, padding=5, pad_value=0) imgs = imgs.transpose(0, 3, 1, 2) # to shape [N, C, H, W] grid.add(imgs) gridimg = np.asarray(grid()) gridimg = gridimg.transpose(0, 2, 3, 1) # back to shape [N, H, W, C] # render the grid of image if mode == 'human': self.get_viewer()(gridimg) elif mode == 'rgb_array': return gridimg else: raise ValueError(f'expected human or rgb_array, got {mode}')
[docs] def get_images(self): r"""Returns a batched RGB array with shape [N, H, W, C] from all environments. Returns ------- imgs : ndarray a batched RGB array with shape [N, H, W, C] """ return [env.render(mode='rgb_array') for env in self.list_env]
[docs] def get_viewer(self): r"""Returns an instantiated :class:`ImageViewer`. Returns ------- viewer : ImageViewer an image viewer """ if self.viewer is None: # create viewer is not existed self.viewer = ImageViewer(max_width=500) # set a max width here return self.viewer
[docs] def close_extras(self): r"""Clean up the extra resources e.g. beyond what's in this base class. """ return [env.close() for env in self.list_env]
[docs] def close(self): r"""Close all environments. It closes all the existing image viewers, then calls :meth:`close_extras` and set :attr:`closed` as ``True``. .. warning:: This function itself does not close the environments, it should be handled in :meth:`close_extras`. This is useful for parallelized environments. .. note:: This will be automatically called when garbage collected or program exited. """ if self.closed: return if self.viewer is not None: self.viewer.close() self.close_extras() self.closed = True
@property def unwrapped(self): r"""Unwrap this vectorized environment. Useful for sequential wrappers applied, it can access information from the original vectorized environment. """ return self def __len__(self): return len(self.list_make_env) def __getitem__(self, index): return self.list_env[index] def __setitem__(self, index, x): self.list_env[index] = x def __repr__(self): return f'<{self.__class__.__name__}: {len(self)}, {self.spec.id}>' def __enter__(self): return self def __exit__(self, *args): self.close() # propagate exception return False
[docs]class VecEnvWrapper(VecEnv): r"""Wraps the vectorized environment to allow a modular transformation. This class is the base class for all wrappers for vectorized environments. The subclass could override some methods to change the behavior of the original vectorized environment without touching the original code. .. note:: Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`. """ def __init__(self, env): assert isinstance(env, VecEnv) self.env = env self.metadata = env.metadata self.list_make_env = env.list_make_env self.list_env = env.list_env self.observation_space = env.observation_space self.action_space = env.action_space self.reward_range = env.reward_range self.spec = env.spec
[docs] def step(self, actions): return self.env.step(actions)
[docs] def reset(self): return self.env.reset()
[docs] def get_images(self): return self.env.get_images()
[docs] def close_extras(self): return self.env.close_extras()
@property def unwrapped(self): return self.env.unwrapped def __len__(self): return len(self.env) def __getitem__(self, index): return self.env[index] def __setitem__(self, index, x): self.env[index] = x def __repr__(self): return f'<{self.__class__.__name__}, {self.env}>'