Source code for lib.agents.qNetwork


import numpy as np
import sys
import torch
import torch.nn            as nn
import torch.nn.functional as F
import torch.optim         as optim

[docs]class qNetworkDiscrete(nn.Module):

    def __init__(self, stateSize, actionSize, layers=[10, 5], activations=[F.tanh, F.tanh], batchNormalization = False, lr=0.01 ):
        '''This is a Q network with discrete actions
        
        This takes a state and returns a Q function for each action. Hence, the
        input is a state and the output is a set of Q values, one for each action
        in the action space. The action is assumed to be discrete. i.e. a ``1``
        when the particular action is to be desired. The input state is assumed to
        be 1D in nature. A different network will have to be chosen if 2D and 3D 
        inputs are to be desired.
        
        Parameters
        ----------
        stateSize : {int}
            Size of the state. Since this is a 1D network, this represents the number
            of values will be used to represent the current state. 
        actionSize : {int}
            The number of discrete actions that will be used.
        layers : {list of int}, optional
            The number of nodes associated with each layer (the default is ``[10, 5]``
            , which will create two hidden layers with and and 5 nodes each)
        activations : {list of activations}, optional
            The activation functions to be used for each layer (the default is 
            ``[F.tanh, F.tanh]``, which will generate tanh activations for 
            each of the hidden layers)
        batchNormalization : {bool}, optional
            Whether batchnormalization is to be used (the default is ``False``,
            for which batch normalization will be neglected)
        '''


        try:
            super(qNetworkDiscrete, self).__init__()
            self.stateSize           = stateSize
            self.actionSize          = actionSize
            self.layers              = layers
            self.activations         = activations
            self.batchNormalization  = batchNormalization

            # Generate the fullly connected layer functions
            self.fcLayers = nn.ModuleList([])
            self.bns = nn.ModuleList([])

            oldN = stateSize
            if self.batchNormalization:
                for i, layer in enumerate(layers):
                    self.fcLayers.append( nn.Linear(oldN, layer) )
                    self.bns.append( nn.BatchNorm1d( num_features = layer, track_running_stats=True ) )
                    oldN = layer
            else:
                for i, layer in enumerate(layers):
                    self.fcLayers.append( nn.Linear(oldN, layer) )
                    oldN = layer

            # ------------------------------------------------------
            # The final layer will only need to supply a quality
            # function. This is a single value for each action 
            # provided. Ideally, you would want to provide a 
            # OHE action sequence for most purposes ...
            # ------------------------------------------------------
            self.fcFinal = nn.Linear( oldN, actionSize )

            # we shall put this is eval mode and only use 
            # the trian mode when we need to train the 
            # mode
            self.optimizer = optim.Adam(
                self.parameters(), lr=lr)
        
        except Exception as e:
            raise type(e)( 
                'lib.agents.qNetwork.qNetworkDiscrete.__init__ - ERROR - ' + str(e) 
                ).with_traceback(sys.exc_info()[2])

        return

[docs]    def forward(self, x, sigma=0):
        '''forward function that is called during the forward pass
        
        This is the forward function that will be called during a 
        forward pass. It takes thee states and gives the Q value 
        correspondidng to each of the applied actions that are 
        associated with that state. 
        
        Parameters
        ----------
        x : Tensor
            This is a 2D tensor. 
        
        Returns
        -------
        tensor
            This represents the Q value of the function
        '''

        try:
            if self.batchNormalization:
                for i, (bn, fc, a) in enumerate(zip(self.bns, self.fcLayers, self.activations)):
                    if self.training:
                        bn.train()
                    else:
                        bn.eval()
                    x = a(bn(fc(x)))
                    # https://discuss.pytorch.org/t/random-number-on-gpu/9649
                    if x.is_cuda:
                        normal = torch.cuda.FloatTensor(x.shape).normal_()
                    else:
                        normal = torch.FloatTensor(x.shape).normal_()
                    x = x + normal*sigma

                x = self.fcFinal( x )

            else:
                for i, (fc, a) in enumerate(zip(self.fcLayers, self.activations)):
                    x = a(fc(x))
                    # https://discuss.pytorch.org/t/random-number-on-gpu/9649
                    if x.is_cuda:
                        normal = torch.cuda.FloatTensor(x.shape).normal_()
                    else:
                        normal = torch.FloatTensor(x.shape).normal_()
                    x = x + normal*sigma

                x = self.fcFinal( x )

        except Exception as e:
            raise type(e)( 
                'lib.agents.qNetwork.qNetworkDiscrete.forward - ERROR - ' + str(e) 
                ).with_traceback(sys.exc_info()[2])

        return x

[docs]    def step(self, v1, v2):
        '''Uses the optimizer to update the weights
        
        This calculates the MSE loss given two inputs,
        one of which must be calculated with this current
        ``nn.Module``, and the other one that is expected.
        
        Note that this allows arbitrary functions to be used
        for calculating the loss.
        
        Parameters
        ----------
        v1 : {Tensor}
            Tensor for calculating the loss function
        v2 : {Tensor}
            Tensor for calculating the loss function
        
        Raises
        ------
        type
            [description]
        '''

        try:
            loss = F.mse_loss(v1, v2)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
        except Exception as e:
            raise type(e)( 
                'lib.agents.qNetwork.qNetworkDiscrete.forward - ERROR - ' + str(e) 
                ).with_traceback(sys.exc_info()[2])

        return