Source code for lib.envs.envUnity

from unityagents import UnityEnvironment
import sys

[docs]class Env:
    '''A convinience function for generating episodes and memories
    
    This convinience class generates a context manager that can be
    used for generating a Unity environment. The Unity environment
    and the OpenAI Gym environment operates slightly differently
    and hence it will be difficult to create a uniform algorithm that
    is able to solve everything at the sametime. This environment
    tries to solve that problem.
    '''

    def __init__(self, fileName, showEnv=False, trainMode=True):
        '''Initialize the environment
        
        This sets up the requirements that will later be used for generating
        the Unity Environment. This assumes that you will provide a binary
        file for generating the environment. There are different ways in 
        which the environment can be generated. It can be generated either
        in a *headless* mode by using showEnv as False, in which case the 
        environment will not show a window at startup. This is good for 
        training, as well as situations when you are running the environment
        without the presence of an X server, especially when you are running 
        this environment remotely. The other thing that you can do is to 
        specify that this is being run in `trainMode`. In this case, the 
        environment will be primed for training. That is, each frame will
        finish as soon as possible. This is not good for observing what is
        happening. However, this significantly increases the speed of 
        training. 
        
        Arguments:
            fileName {str} -- Path to the binary file. This file must be
                the same as the one for which the `unityagents` package 
                has been generated. 
        
        Keyword Arguments:
            showEnv {bool} -- Set this to ``True`` if you want to view the 
                environment (default: {False})
            trainMode {bool} -- Set this to ``True`` if you want the environment
                tobe in training mode (i.e. fast execution) (default: {True})
        '''

        try:
            self.no_graphics = not showEnv
            self.trainMode   = trainMode
            self.fileName    = fileName
            self.states      = None
        except Exception as e:
            raise type(e)( 
                'lib.envs.envUnity.Env.__init__ - ERROR - ' + str(e) 
                ).with_traceback(sys.exc_info()[2])
        return

[docs]    def __enter__(self):
        '''generate a context manager
        
        This will actually generate the context manager and allow you use this 
        within a ``with`` statement. This is the function that actually 
        initialized the environment and maintains it, until it is needed. 
        
        Returns:
            ``this`` -- Returns an instance of the same class
        '''

        try:
            self.env    = UnityEnvironment(
                file_name   = self.fileName, 
                no_graphics = self.no_graphics )

            # get the default brain
            self.brain_name = self.env.brain_names[0]
            self.brain      = self.env.brains[self.brain_name]
            self.env_info   = self.env.reset(train_mode = self.trainMode)[self.brain_name]

            self.num_agents  = len(self.env_info.agents)
            self.action_size = self.brain.vector_action_space_size
        except Exception as e:
            raise type(e)( 
                'lib.envs.envUnity.Env.__enter__ - ERROR - ' + str(e) 
                ).with_traceback(sys.exc_info()[2])

        return self

[docs]    def reset(self):
        '''reset the environment before starting an episode
        
        Returns:
            status -- The current status after the reset
        '''
        try:
            self.env.reset(train_mode=self.trainMode)
            self.states = self.env_info.vector_observations
        except Exception as e:
            raise type(e)( 
                'lib.envs.envUnity.Env.reset - ERROR - ' + str(e) 
                ).with_traceback(sys.exc_info()[2])
        return self.states

[docs]    def step(self, policy):
        '''advance one step by taking an action
        
        This function takes a policy function and generates an action 
        according to that particular policy. This results in the 
        advancement of the episode into a one step with the return 
        of the reward, and the next state along with any done 
        information. 
        
        Arguments:
            policy {function} -- This function takes a state vector and 
                returns an action vector. It is assumed that the policy 
                is the correct type of policy, and is capable if taking
                the right returning the right type of vector corresponding
                the the policy for the current environment. It does not 
                check for the validity of the policy function
        
        Returns:
            list -- This returns a list of tuples containing the tuple 
                ``(s_t, a_t, r_{t+1}, s_{t+1}, d)``. One tuple for each
                agent. Even for the case of a single agent, this is going
                to return a list of states
        '''

        try:
            states      = self.states.copy()
            actions     = policy(states)
            env_info    = self.env.step(actions)[self.brain_name]
            next_states = env_info.vector_observations 
            rewards     = env_info.rewards             
            dones       = env_info.local_done          

            self.states = next_states

            results = []
            for i in range(self.num_agents):
                state       = states[i]
                action      = actions[i]
                reward      = rewards[i]
                next_state  = next_states[i]
                done        = dones[i]

                results.append((state, action, reward, next_state, done))

        except Exception as e:
            raise type(e)( 
                'lib.envs.envUnity.Env.step - ERROR - ' + str(e) 
                ).with_traceback(sys.exc_info()[2])

        return results

[docs]    def episode(self, policy, maxSteps=None):
        '''generate data for an entire episode
        
        This function generates an entire episde. It plays the environment
        by first resetting it too the beginning, and then playing the game for 
        a given number of steps (or unless the game is terminated). It generates
        a set of list of tuplees, again one for each agent. Rememebr that even
        when the number of agents is 1, it will still return a list oof states.

        Arguments:
            policy {function} -- The function that takes the current state and 
                returns the action vector. 
        
        Keyword Arguments:
            maxSteps {int or None} -- The maximum number of steps that the agent is
                going to play the episode before the episode is terminated. (default: 
                {None} in which case the episode will continue until it actually 
                finishes)
        
        Returns:
            list -- This returns the list of tuples for the entire episode. Again, this
                is a lsit of lists, one for each agent.
        '''

        try:
            self.reset()
            stepCount     = 0
            allResults    = [[] for _ in range(self.num_agents)]

            while True:

                stepCount += 1
                finished  = False
                results   = self.step(policy)
                for agent in range(self.num_agents):
                    state, action, reward, next_state, done = results[agent]
                    allResults[agent].append(results[agent])
                    finished = finished or done

                if finished:
                    break

                if (maxSteps is not None) and (stepCount >= maxSteps):
                    break
        except Exception as e:
            raise type(e)( 
                'lib.envs.envUnity.Env.episode - ERROR - ' + str(e) 
                ).with_traceback(sys.exc_info()[2])

        return allResults

[docs]    def __exit__(self, exc, value, traceback):
        '''Exit the context manager
        
        The exit funciton that will result in exiting the
        context manager. Typically one is supposed to check 
        the error if any at this point. This will be handled 
        at a higher level
        
        Arguments:
            *args {[type]} -- [description]
        '''

        if not exec:
            self.env.close()
            return True