Source code for lib.envs.envGym

import gym, sys
import numpy as np
from collections import deque 
import itertools as it

[docs]class Env: '''A convinience function for generating episodes and memories This convinience class generates a context manager that can be used for generating a Gym environment. This is supposed to be a drop-in replacement for the Unity environment. This however differs from the Unity environment in that it needs the name of the environment as input. The other difference is that there is no such thing as `trainMode`. ''' def __init__(self, envName, showEnv=False): '''Initialize the environment This sets up the requirements that will later be used for generating the gym Environment. The gym environment can be used in a mode that hides the plotting of the actuual environment. This may result in a significant boost in speed. Arguments: envName {str} -- The name of the environment to be generated. This shoould be a valid name. In case the namme provided is not a valid name, this is going to exis with an error. Keyword Arguments: showEnv {bool} -- Set this to ``True`` if you want to view the environment (default: {False}) ''' try: self.no_graphics = not showEnv self.envName = envName self.states = None except Exception as e: raise type(e)( 'lib.envs.envUnity.Env.__init__ - ERROR - ' + str(e) ).with_traceback(sys.exc_info()[2]) return
[docs] def __enter__(self): '''generate a context manager This will actually generate the context manager and allow you use this within a ``with`` statement. This is the function that actually initialized the environment and maintains it, until it is needed. The idea of multiplel agents within the gym enviroonments doesnt exists as it does in the Unity agents. However, we shall incoroporoate this idea within the gym environment so that a signgle action can takke place. Returns: ``this`` -- Returns an instance of the same class ''' try: self.env = gym.make(self.envName) self.state = self.env.reset() except Exception as e: raise type(e)( 'lib.envs.envUnity.Env.__enter__ - ERROR - ' + str(e) ).with_traceback(sys.exc_info()[2]) return self
[docs] def reset(self): '''reset the environment before starting an episode Returns: status -- The current status after the reset ''' try: self.state = self.env.reset() except Exception as e: raise type(e)( 'lib.envs.envUnity.Env.reset - ERROR - ' + str(e) ).with_traceback(sys.exc_info()[2]) return self.states
[docs] def step(self, policy): '''advance one step by taking an action This function takes a policy function and generates an action according to that particular policy. This results in the advancement of the episode into a one step with the return of the reward, and the next state along with any done information. Arguments: policy {function} -- This function takes a state vector and returns an action vector. It is assumed that the policy is the correct type of policy, and is capable if taking the right returning the right type of vector corresponding the the policy for the current environment. It does not check for the validity of the policy function Returns: list -- This returns a list of tuples containing the tuple ``(s_t, a_t, r_{t+1}, s_{t+1}, d)``. One tuple for each agent. Even for the case of a single agent, this is going to return a list of states ''' try: results = [] states = np.array([self.state]) action = policy(states)[0].cpu().detach().numpy() #print('A'*30, action, self.env.env.action_space.sample(), self.env.env.action_space) if type(self.env.env.action_space.sample()) == int: action = int(action[0]) nextState, reward, done, info = self.env.step(action) results.append((self.state, action, reward, nextState, done)) self.state = nextState except Exception as e: raise type(e)( 'lib.envs.envUnity.Env.step - ERROR - ' + str(e) ).with_traceback(sys.exc_info()[2]) return results
[docs] def episode(self, policy, maxSteps=None): '''generate data for an entire episode This function generates an entire episde. It plays the environment by first resetting it too the beginning, and then playing the game for a given number of steps (or unless the game is terminated). It generates a set of list of tuplees, again one for each agent. Rememebr that even when the number of agents is 1, it will still return a list oof states. Arguments: policy {function} -- The function that takes the current state and returns the action vector. Keyword Arguments: maxSteps {int or None} -- The maximum number of steps that the agent is going to play the episode before the episode is terminated. (default: {None} in which case the episode will continue until it actually finishes) Returns: list -- This returns the list of tuples for the entire episode. Again, this is a lsit of lists, one for each agent. ''' try: self.reset() stepCount = 0 allResults = [[] for _ in range(1)] # One for each agent. while True: stepCount += 1 result = self.step(policy)[0] if not self.no_graphics: self.env.render() state, action, reward, next_state, done = result allResults[0].append(result) if done: break if (maxSteps is not None) and (stepCount >= maxSteps): break except Exception as e: raise type(e)( 'lib.envs.envUnity.Env.episode - ERROR - ' + str(e) ).with_traceback(sys.exc_info()[2]) return allResults
[docs] def __exit__(self, exc, value, traceback): '''Exit the context manager The exit funciton that will result in exiting the context manager. Typically one is supposed to check the error if any at this point. This will be handled at a higher level Arguments: *args {[type]} -- [description] ''' if not exec: self.env.close() return True
[docs]class Env1D: '''A convinience function for generating episodes and memories This convinience class generates a context manager that can be used for generating a Gym environment. This is supposed to be a drop-in replacement for the Unity environment. This however differs from the Unity environment in that it needs the name of the environment as input. The other difference is that there is no such thing as `trainMode`. This 1D environment is designed to takke 1D state vector and use this vector in its calculations. If you are using a 1D environment you are advised to use this. This environment has the added advantage that it will automatically stack together ``N`` previous states into a single state. Note that the first state will be copied ``N`` times, rather than zero padding as this seems a more natural state for the beginning. ''' def __init__(self, envName, N=1, showEnv=False): '''Initialize the environment This sets up the requirements that will later be used for generating the gym Environment. The gym environment can be used in a mode that hides the plotting of the actuual environment. This may result in a significant boost in speed. Arguments: envName {str} -- The name of the environment to be generated. This shoould be a valid name. In case the namme provided is not a valid name, this is going to exis with an error. Keyword Arguments: N {integer} -- Set this to the number of states that you wish to have that will be concatenated together. (default: 1). You will not be able to set a value less than 1. showEnv {bool} -- Set this to ``True`` if you want to view the environment (default: {False}) ''' try: self.N = N self.no_graphics = not showEnv self.envName = envName self.states = None assert type(self.N) == int, f'integer expected. Received {type(self.N)}' assert self.N > 0, f'self.N = {self.N} (should be greater than 0)' except Exception as e: raise type(e)( 'lib.envs.envUnity.Env.__init__ - ERROR - ' + str(e) ).with_traceback(sys.exc_info()[2]) return
[docs] def __enter__(self): '''generate a context manager This will actually generate the context manager and allow you use this within a ``with`` statement. This is the function that actually initialized the environment and maintains it, until it is needed. The idea of multiplel agents within the gym enviroonments doesnt exists as it does in the Unity agents. However, we shall incoroporoate this idea within the gym environment so that a signgle action can takke place. Returns: ``this`` -- Returns an instance of the same class ''' try: self.env = gym.make(self.envName) state = self.env.reset() self.state = deque([state for i in range(self.N+1)], maxlen=self.N+1) except Exception as e: raise type(e)( 'lib.envs.envUnity.Env.__enter__ - ERROR - ' + str(e) ).with_traceback(sys.exc_info()[2]) return self
[docs] def reset(self): '''reset the environment before starting an episode Returns: status -- The current status after the reset ''' try: state = self.env.reset() self.state = deque([state for i in range(self.N+1)], maxlen=self.N+1) except Exception as e: raise type(e)( 'lib.envs.envUnity.Env.reset - ERROR - ' + str(e) ).with_traceback(sys.exc_info()[2]) return self.states
[docs] def step(self, policy): '''advance one step by taking an action This function takes a policy function and generates an action according to that particular policy. This results in the advancement of the episode into a one step with the return of the reward, and the next state along with any done information. Arguments: policy {function} -- This function takes a state vector and returns an action vector. It is assumed that the policy is the correct type of policy, and is capable if taking the right returning the right type of vector corresponding the the policy for the current environment. It does not check for the validity of the policy function Returns: list -- This returns a list of tuples containing the tuple ``(s_t, a_t, r_{t+1}, s_{t+1}, d)``. One tuple for each agent. Even for the case of a single agent, this is going to return a list of states ''' try: results = [] state = np.array(list(it.islice(self.state, 1, 1+self.N))) state = state.flatten() states = np.array([state]) action = policy(states)[0].cpu().detach().numpy() #print('A'*30, action, self.env.env.action_space.sample(), self.env.env.action_space) if type(self.env.env.action_space.sample()) == int: action = int(action[0]) nextState, reward, done, info = self.env.step(action) self.state.append(nextState) nextState = np.array(list(it.islice(self.state, 1, 1+self.N))) nextState = nextState.flatten() results.append((state, action, reward, nextState, done)) except Exception as e: raise type(e)( 'lib.envs.envUnity.Env.step - ERROR - ' + str(e) ).with_traceback(sys.exc_info()[2]) return results
[docs] def episode(self, policy, maxSteps=None): '''generate data for an entire episode This function generates an entire episde. It plays the environment by first resetting it too the beginning, and then playing the game for a given number of steps (or unless the game is terminated). It generates a set of list of tuplees, again one for each agent. Rememebr that even when the number of agents is 1, it will still return a list oof states. Arguments: policy {function} -- The function that takes the current state and returns the action vector. Keyword Arguments: maxSteps {int or None} -- The maximum number of steps that the agent is going to play the episode before the episode is terminated. (default: {None} in which case the episode will continue until it actually finishes) Returns: list -- This returns the list of tuples for the entire episode. Again, this is a lsit of lists, one for each agent. ''' try: self.reset() stepCount = 0 allResults = [[] for _ in range(1)] # One for each agent. while True: stepCount += 1 result = self.step(policy)[0] if not self.no_graphics: self.env.render() state, action, reward, next_state, done = result allResults[0].append(result) if done: break if (maxSteps is not None) and (stepCount >= maxSteps): break except Exception as e: raise type(e)( 'lib.envs.envUnity.Env.episode - ERROR - ' + str(e) ).with_traceback(sys.exc_info()[2]) return allResults
[docs] def __exit__(self, exc, value, traceback): '''Exit the context manager The exit funciton that will result in exiting the context manager. Typically one is supposed to check the error if any at this point. This will be handled at a higher level Arguments: *args {[type]} -- [description] ''' if not exec: self.env.close() return True