Source code for lib.utils.ReplayBuffer

from collections import deque
import numpy as np
import pickle, os, sys

[docs]class SimpleReplayBuffer: def __init__(self, maxDataTuples): '''The replay buffer Save data for the replay buffer Parameters ---------- maxDataTuples : {int} The size of the ``deque`` that is used for storing the data tuples. This assumes that the data tuples are present in the form: ``(state, action, reward, next_state, done, cumRewards)``. This means that we assume that the data will have some form of cumulative reward pints associated with each tuple. ''' self.maxDataTuples = maxDataTuples self.memory = deque(maxlen=maxDataTuples) return
[docs] def append(self, result): '''append a single tuple to the current replay buffer This function allows someone to add a single tuple to the replay buffer. Parameters ---------- result : {tuple} The tuple that should be added into the memory buffer. ''' self.memory.append(result) return
@property def len(self): '''returns the length of the emory buffer Remember that this is a property and there is no need to call it as a function. Returns: int -- the length of the currrent memory buffer ''' return len(self.memory) @property def shape(self): '''the shape of the buffer This is the shape of the memory buffers. This returns a tuple that contains the length of the buffer for the first element of the tuple, and the length of each element as the second element of the tuple. If there is nothing within the memory, this is going to return a None Returns: tuple -- the shape of the data within the memory buffer ''' N = len(self.memory) if N > 0: N1 = len( self.memory[0] ) else: N1 = None return N, N1
[docs] def clear(self): self.memory.clear() return
[docs] def appendMany(self, results): '''append multiple tuples to the memory buffer Most often we will not be insterested in inserting a single data point into the replay buffer, but rather a whole list of these. This function just iterates over this list and inserts each tuple one by one. Parameters ---------- results : {list} List of tuples that are to be inserted into the replay buffer. ''' for r in results: self.memory.append(r) return
[docs] def appendAllAgentResults(self, allResults): '''append all data from all agents into the same buffer This is useful when there is only one agent or when all the agents represent the same exact larning characteristics. In this case, multiple agents can be simulated by the same function. Arguments: allResults {list} -- List of list tuples to be entered into the buffer. ''' for results in allResults: self.appendMany( results ) return
[docs] def sample(self, nSamples): '''sample from the replay beffer This function samples form the memory buffer, and returns the number of samples required. This does sampling in an intelligent manner. Since we are saving the cumulative rewards, we selectively select values that provide us greater Parameters ---------- nSamples : {int} The number of memory elements to return Returns ------- list A list of samples that can be used for sampling the data. ''' choice = np.random.choice( np.arange( len(self.memory) ), nSamples) results = [ self.memory[c] for c in choice] return results
[docs] def save(self, folder, name): '''save the replay buffer This function is going to save the data within the replay buffer into a pickle file. This will allow us to reload the buffer to a state where it has already been saved. Parameters ---------- folder : {str} path to the folder where the data is to be saved name : {str} Name associated with the buffer. Since this program has two agents acting in tandum, we need to provide a name that will identify which agent's buffer we are saving. ''' try: with open(os.path.join(folder, f'memory_{name}.pickle'), 'wb') as fOut: pickle.dump(self.memory, fOut, pickle.HIGHEST_PROTOCOL) except Exception as e: raise type(e)( 'lib.utils.SimpleReplayBuffer.save - ERROR - ' + str(e) ).with_traceback(sys.exc_info()[2]) return
[docs] def load(self, folder, name): '''load the data from a particular file Data saved with the previous command can be reloaded into this new buffer. Parameters ---------- folder : {str} Path to the folder where the data is saved name : {str} Name of the agent associated whose data is to be extracted. ''' self.memory = pickle.load(open( os.path.join(folder, f'memory_{name}.pickle'), 'rb' )) return