.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "auto_ch8\ch8_lunarlander.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note :ref:`Go to the end ` to download the full example code. .. rst-class:: sphx-glr-example-title .. _sphx_glr_auto_ch8_ch8_lunarlander.py: ================================== 8.3 Experiments - LunarLander ================================== We use the OpenAI Gym library to instanciate the gymnasium LunarLander-v3 environment and reproduce the figure from chapter 8_XXX. We train the following agents: - PPO - DQN - Controller-based - Kernel Actor-Critic - Kernel Q-Learning - Kernel Q-Learning HJB - Kernel Policy-Gradient We show how you can tweak some methods in each algorithm to tune them to the environment. For a detailed documentation on KAgents, see **codpy documentation**. .. GENERATED FROM PYTHON SOURCE LINES 19-30 .. code-block:: Python # Importing necessary modules import sys from matplotlib import pyplot as plt import numpy as np import codpy.core as core import codpy.KQLearning as KQLearning import codpy.conditioning as conditioning from ignore_utils import * .. GENERATED FROM PYTHON SOURCE LINES 31-33 KQLearning ------------------------ .. GENERATED FROM PYTHON SOURCE LINES 33-80 .. code-block:: Python class KQLearningLN(KQLearning.KQLearning): def train( self, game, max_training_game_size=None, format=True, replay_buffer=True, tol=1e-2, **kwargs ): """ For LunarLander, we want to fit one kernel per game. So again, we override the train method. """ game = self.format( game, max_training_game_size=max_training_game_size, **kwargs ) # Here the kernel is fit on the latest game only. kernel = self.optimal_states_values_function(game, verbose=True, **kwargs) kernel.games = game self.critic.add_kernel(kernel, **kwargs) delete_kernels = [] for i, k in self.critic.kernels.items(): error = self.critic.kernels[i].bellman_error if error > tol and not hasattr(self.critic.kernels[i], "flag_kill_me"): kernel = self.optimal_states_values_function( self.critic.kernels[i].games, kernel=self.critic.kernels[i], verbose=True, **kwargs, ) kernel.games = self.critic.kernels[i].games if kernel.bellman_error >= error - tol: kernel.flag_kill_me = "please" else: self.critic.kernels[i] = kernel if ( len(delete_kernels) > 0 and len(self.critic.kernels) - len(delete_kernels) > 1 ): new_kernels = {} count = 0 for i in range(len(self.critic.kernels)): if i not in delete_kernels: new_kernels[count] = self.critic.kernels[i] count = count + 1 self.critic.kernels = new_kernels .. GENERATED FROM PYTHON SOURCE LINES 81-83 PolicyGradient ------------------------ .. GENERATED FROM PYTHON SOURCE LINES 83-155 .. code-block:: Python class PolicyGradientLN(KQLearning.PolicyGradient): def train(self, game, max_training_game_size=None, **kwargs): if self.actor.is_valid() and self.actor.get_x().shape[0] > self.replay_buffer.capacity: return params = kwargs.get("KCritic", {}) state, action, next_state, reward, return_, done = self.format( game, max_training_game_size=max_training_game_size, **kwargs ) if len(self.replay_buffer): states, actions, next_states, rewards, returns, dones = ( self.replay_buffer.memory ) else: states, actions, next_states, rewards, returns, dones = state, action, next_state, reward, return_, done # dones[0] = True games = [states, actions, next_states, rewards, returns, dones] if self.actor.is_valid(): last_policy = self.actor(states) else: last_policy = np.full( [states.shape[0], self.actions_dim], 1.0 / self.actions_dim ) last_policy = np.where(last_policy < 1e-9, 1e-9,last_policy) last_policy = np.where(last_policy > 1.-1e-9,1.- 1e-9,last_policy) # update probabilities if not self.actor.is_valid() or self.actor.get_x().shape[0] < self.replay_buffer.capacity: advantages, self.value_function = self.get_advantages(games, policy=last_policy, **kwargs) self.actor = self.update_probabilities( advantages, games, last_policy=last_policy, clip=.1, **kwargs ) else: pass # advantages, self.value_function = self.get_advantages(games, policy=last_policy, kernel = self.value_function,**kwargs) # kernel = self.update_probabilities( # advantages, games, last_policy=last_policy,kernel = self.actor, clip=.1, **kwargs # ) if not hasattr(self,"scores"): self.scores = [rewards.sum()] else: self.scores.append(rewards.sum()) # if len(self.replay_buffer)+states.shape[0] < self.replay_buffer.capacity: is_pushed = self.replay_buffer.push( state, action, next_state, reward, return_, done, worst_game=False,**kwargs ) def format(self, sarsd, max_training_game_size=None, **kwargs): states, actions, next_states, rewards, dones = [ core.get_matrix(e) for e in sarsd ] actions = KQLearning.rl_hot_encoder(actions, self.actions_dim) dones = core.get_matrix(dones, dtype=bool) len_game=states.shape[0] if max_training_game_size is not None : # indices = [int(n*len_game/max_training_game_size) for n in range(0, max_training_game_size)] states, actions, next_states, rewards, dones = ( states[-max_training_game_size:], actions[-max_training_game_size:], next_states[-max_training_game_size:], rewards[-max_training_game_size:], dones[-max_training_game_size:], # states[:max_training_game_size], # actions[:max_training_game_size], # next_states[:max_training_game_size], # rewards[:max_training_game_size], # dones[:max_training_game_size], ) returns = self.compute_returns( states, actions, next_states, rewards, dones, **kwargs ) # dones[0]=True return states, actions, next_states, rewards, returns, dones .. GENERATED FROM PYTHON SOURCE LINES 156-158 KActorCritic ------------------------ .. GENERATED FROM PYTHON SOURCE LINES 158-233 .. code-block:: Python class KActorCriticLN(KQLearning.KActorCritic): """ Defines the main KActorCritic class. This inherits from KQLearning.KActorCritic. You can then extend any method from the main class to fit your needs. """ def train(self, game, max_training_game_size=None, **kwargs): if self.actor.is_valid() and self.actor.get_x().shape[0] > self.replay_buffer.capacity: return params = kwargs.get("KCritic", {}) state, action, next_state, reward, return_, done = self.format( game, max_training_game_size=max_training_game_size, **kwargs ) if len(self.replay_buffer): states, actions, next_states, rewards, returns, dones = ( self.replay_buffer.memory ) else: states, actions, next_states, rewards, returns, dones = state, action, next_state, reward, return_, done # dones[0] = True games = [states, actions, next_states, rewards, returns, dones] if self.actor.is_valid(): last_policy = self.actor(states) else: last_policy = np.full( [states.shape[0], self.actions_dim], 1.0 / self.actions_dim ) last_policy = np.where(last_policy < 1e-9, 1e-9,last_policy) last_policy = np.where(last_policy > 1.-1e-9,1.- 1e-9,last_policy) # update probabilities if not self.actor.is_valid() or self.actor.get_x().shape[0] < self.replay_buffer.capacity: advantages, self.value_function = self.get_advantages(games, policy=last_policy, **kwargs) self.actor = self.update_probabilities( advantages, games, last_policy=last_policy, clip=.1, **kwargs ) else: pass # advantages, self.value_function = self.get_advantages(games, policy=last_policy, kernel = self.value_function,**kwargs) # kernel = self.update_probabilities( # advantages, games, last_policy=last_policy,kernel = self.actor, clip=.1, **kwargs # ) if not hasattr(self,"scores"): self.scores = [rewards.sum()] else: self.scores.append(rewards.sum()) # if len(self.replay_buffer)+states.shape[0] < self.replay_buffer.capacity: is_pushed = self.replay_buffer.push( state, action, next_state, reward, return_, done, worst_game=False,**kwargs ) def format(self, sarsd, max_training_game_size=None, **kwargs): states, actions, next_states, rewards, dones = [ core.get_matrix(e) for e in sarsd ] actions = KQLearning.rl_hot_encoder(actions, self.actions_dim) dones = core.get_matrix(dones, dtype=bool) len_game=states.shape[0] if max_training_game_size is not None : # indices = [int(n*len_game/max_training_game_size) for n in range(0, max_training_game_size)] states, actions, next_states, rewards, dones = ( states[-max_training_game_size:], actions[-max_training_game_size:], next_states[-max_training_game_size:], rewards[-max_training_game_size:], dones[-max_training_game_size:], ) returns = self.compute_returns( states, actions, next_states, rewards, dones, **kwargs ) # dones[0]=True return states, actions, next_states, rewards, returns, dones .. GENERATED FROM PYTHON SOURCE LINES 234-236 HJB ------------------------ .. GENERATED FROM PYTHON SOURCE LINES 236-302 .. code-block:: Python class KQLearningHJBLN(KQLearning.KQLearningHJB): def __call__(self, state, **kwargs): self.eps_threshold *= 0.999 if np.random.random() > self.eps_threshold and self.critic.is_valid() == True: z = self.all_states_actions(core.get_matrix(state).T) # z = self.all_states_actions(self.get_expectation_kernel(z)) q_values = self.critic(z) q_values += np.random.random(q_values.shape) * 1e-9 return np.argmax(q_values) return np.random.randint(0, self.actions_dim) def get_conditioned_kernel(self, games, **kwargs): return KQLearning.get_conditioned_kernel( games, base_class=conditioning.ConditionerKernel, **kwargs ) def train( self, game, max_training_game_size=None, format=True, replay_buffer=True, tol=1e-2, **kwargs ): # return super().train(game, max_training_game_size,format,replay_buffer, tol,**kwargs) # l = len(game[0]) # self.gamma = np.exp(-np.log(l) / l) game = self.format( game, max_training_game_size=max_training_game_size, **kwargs ) kernel = self.optimal_states_values_function(game, verbose=True, **kwargs) kernel.games = game # kernel.gamma = self.gamma self.critic.add_kernel(kernel, **kwargs) delete_kernels = [] for i, k in self.critic.kernels.items(): # self.gamma = k.gamma error = self.critic.kernels[i].bellman_error if error > tol and not hasattr(self.critic.kernels[i], "flag_kill_me"): kernel = self.optimal_states_values_function( self.critic.kernels[i].games, kernel=self.critic.kernels[i], verbose=True, **kwargs, ) kernel.games = self.critic.kernels[i].games # kernel.gamma = self.critic.kernels[i].gamma if kernel.bellman_error >= error - tol: # delete_kernels.append(i) kernel.flag_kill_me = "please" else: self.critic.kernels[i] = kernel if ( len(delete_kernels) > 0 and len(self.critic.kernels) - len(delete_kernels) > 1 ): new_kernels = {} count = 0 for i in range(len(self.critic.kernels)): if i not in delete_kernels: new_kernels[count] = self.critic.kernels[i] count = count + 1 self.critic.kernels = new_kernels .. GENERATED FROM PYTHON SOURCE LINES 303-305 KController ------------------------ .. GENERATED FROM PYTHON SOURCE LINES 305-507 .. code-block:: Python class heuristic_ControllerLN: """ Defines the heuristic controller for LunarLander. We choose to use 12 parameters to be tweaked. """ dim = 12 def __init__(self, w=None, **kwargs): if w is None: self.w = np.ones([self.dim]) * 0.5 else: self.w = w pass def get_distribution(self): class uniform: def __init__(self, shape1): self.shape1 = shape1 def __call__(self, n): return np.random.uniform(size=[n, self.shape1]) def support(self, v): out = np.clip(v, 0, 1) return out return uniform(self.w.shape[0]) def get_thetas(self): return self.w def set_thetas(self, w): self.w = w.flatten() def __call__(self, s, **kwargs): angle_targ = s[0] * self.w[0] + s[2] * self.w[1] if angle_targ > self.w[2]: angle_targ = self.w[2] if angle_targ < -self.w[2]: angle_targ = -self.w[2] hover_targ = self.w[3] * np.abs(s[0]) angle_todo = (angle_targ - s[4]) * self.w[4] - (s[5]) * self.w[5] hover_todo = (hover_targ - s[1]) * self.w[6] - (s[3]) * self.w[7] if s[6] or s[7]: angle_todo = self.w[8] hover_todo = -(s[3]) * self.w[9] a = 0 if hover_todo > np.abs(angle_todo) and hover_todo > self.w[10]: a = 2 elif angle_todo < -self.w[11]: a = 3 elif angle_todo > +self.w[11]: a = 1 return a class KControllerLN(KQLearning.KController): """ Defines the class for optimizing the controller. The class inherit from KQLearning.KController. You can then extend any method from the main class to fit your needs. Parameters: - state_dim: Dimension of the environment's state space. - actions_dim: Dimension of the environment's action space. """ def __init__(self, state_dim, actions_dim, **kwargs): controller = heuristic_ControllerLN(state_dim=state_dim, **kwargs) super().__init__(state_dim, actions_dim, controller, **kwargs) def get_function(self, **kwargs): """ The optimizer will find the best parameters which maximizes this function. This is where you would tweak the function to be maximized. """ self.expectation_estimator = self.get_expectation_estimator( self.x, self.y, **kwargs ) def function(x): expectation = self.expectation_estimator(x) distance = self.expectation_estimator.distance(x) return expectation + distance return function def format(self, sarsd, **kwargs): """ This formats the game data to be used in the train method """ state, action, next_state, reward, done = [core.get_matrix(e) for e in sarsd] action = KQLearning.rl_hot_encoder(action, self.actions_dim) action = core.get_matrix(self.controller.get_thetas()).T done = core.get_matrix(done, dtype=bool) return ( core.get_matrix(state.mean(axis=0)).T, core.get_matrix(action.mean(axis=0)).T, core.get_matrix(next_state.mean(axis=0)).T, core.get_matrix(reward.mean(axis=0)).T, core.get_matrix(done.mean(axis=0)).T, ) def main(): # Define agents here, which will be trained in the benchmark. If game_dictionnary is empty, the benchmark will try to load data from the .pkl file game_dictionary = { "PPOAgent": PPOAgent, "Controller-based": KControllerLN, "KACAgent": KActorCriticLN, "PolicyGradient": PolicyGradientLN, "DQNAgent": DQNAgent, # "KQLearningHJBCP": KQLearningHJBLN, #bug to solve get_transition "KQLearning": KQLearningLN, } # Define your agent's parameters here. This dict will be passed in each agent's __init__() method. extras = { "KActor": { # "latent_shape":[100,50], "max_size": 1000, "n_batch": 1000000, "max_nystrom": 1000, "reg": 1e-6, "order": None, }, "KCritic": { "max_size": 1000000, "n_batch": 1000000, "max_nystrom": 1000, "reg": 1e-9, "order": None, }, "HJBModel": { # "latent_shape":[100,50], "max_size": 100000, "n_batch": 1000000, "max_nystrom": 1000, "reg": 1e-9, "order": None, "state_dim": 8, }, "Rewards": { "max_size": 1000000, "n_batch": 100000, "max_nystrom": 1000, "reg": 1e-9, "order": None, }, "NextStates": { "max_size": 1000000, "n_batch": 100000, "max_nystrom": 1000, "reg": 1e-9, "order": None, }, "DQNAgent": { # 'reward_function': mc_reward_function, "episodes": 500, "policy_param": 64, "target_param": 64, }, "ACAgent": {"reward_function": None}, "QAgent": { 0 # 'reward_function': mc_reward_function }, "KController": { "reg": 1e-9, "order": 2, }, "Conditionner": { "reg": 1e-4, "order": 3, }, "max_game": 2000, "gamma": 0.99, "capacity": 10000, "max_training_game_size": 1000, # "max_kernel": 40 # "seed": 42, } seed = extras.get("seed", None) np.random.seed(seed) softmax = lambda x: np.exp(x) / np.sum(np.exp(x), axis=0) test = softmax([1,0]) Benchmark()( game_dictionary, "LunarLander-v3", num_games=100, eps_threshold=0.1, num_repeats=3, max_time=50, axis="episodes", # file_name="results_LN_final.pkl", **extras, ) plt.show() pass main() .. rst-class:: sphx-glr-horizontal * .. image-sg:: /auto_ch8/images/sphx_glr_ch8_lunarlander_001.png :alt: Cumulative Reward over 100 Games :srcset: /auto_ch8/images/sphx_glr_ch8_lunarlander_001.png :class: sphx-glr-multi-img * .. image-sg:: /auto_ch8/images/sphx_glr_ch8_lunarlander_002.png :alt: Training Time per Game over 100 Games :srcset: /auto_ch8/images/sphx_glr_ch8_lunarlander_002.png :class: sphx-glr-multi-img .. rst-class:: sphx-glr-script-out .. code-block:: none label PPOAgent, Reward 0: -155.007, Len(game): 110, Training Time: 0.028s, Prediction Time: 0.028s label PPOAgent, Reward 1: -49.228, Len(game): 68, Training Time: 0.044s, Prediction Time: 0.044s label PPOAgent, Reward 2: -63.131, Len(game): 73, Training Time: 0.062s, Prediction Time: 0.062s label PPOAgent, Reward 3: -85.729, Len(game): 70, Training Time: 0.078s, Prediction Time: 0.078s label PPOAgent, Reward 4: -125.021, Len(game): 80, Training Time: 0.098s, Prediction Time: 0.098s label PPOAgent, Reward 5: -164.398, Len(game): 119, Training Time: 0.127s, Prediction Time: 0.127s label PPOAgent, Reward 6: -99.909, Len(game): 80, Training Time: 0.146s, Prediction Time: 0.146s label PPOAgent, Reward 7: -276.220, Len(game): 104, Training Time: 0.172s, Prediction Time: 0.172s label PPOAgent, Reward 8: -248.290, Len(game): 105, Training Time: 0.197s, Prediction Time: 0.197s label PPOAgent, Reward 9: -218.675, Len(game): 107, Training Time: 0.223s, Prediction Time: 0.223s label PPOAgent, Reward 10: -237.605, Len(game): 123, Training Time: 0.253s, Prediction Time: 0.253s label PPOAgent, Reward 11: -429.480, Len(game): 110, Training Time: 0.279s, Prediction Time: 0.279s label PPOAgent, Reward 12: -161.509, Len(game): 113, Training Time: 0.346s, Prediction Time: 0.346s label PPOAgent, Reward 13: -61.647, Len(game): 60, Training Time: 0.360s, Prediction Time: 0.360s label PPOAgent, Reward 14: -319.218, Len(game): 81, Training Time: 0.380s, Prediction Time: 0.380s label PPOAgent, Reward 15: -120.076, Len(game): 82, Training Time: 0.401s, Prediction Time: 0.401s label PPOAgent, Reward 16: -7.705, Len(game): 2000, Training Time: 0.959s, Prediction Time: 0.959s label PPOAgent, Reward 17: -257.049, Len(game): 98, Training Time: 0.984s, Prediction Time: 0.984s label PPOAgent, Reward 18: -78.086, Len(game): 68, Training Time: 1.038s, Prediction Time: 1.038s label PPOAgent, Reward 19: -269.682, Len(game): 131, Training Time: 1.073s, Prediction Time: 1.073s label PPOAgent, Reward 20: -102.666, Len(game): 79, Training Time: 1.094s, Prediction Time: 1.094s label PPOAgent, Reward 21: -143.012, Len(game): 89, Training Time: 1.116s, Prediction Time: 1.116s label PPOAgent, Reward 22: -164.618, Len(game): 79, Training Time: 1.137s, Prediction Time: 1.137s label PPOAgent, Reward 23: -126.824, Len(game): 81, Training Time: 1.158s, Prediction Time: 1.158s label PPOAgent, Reward 24: -79.662, Len(game): 62, Training Time: 1.174s, Prediction Time: 1.174s label PPOAgent, Reward 25: -82.773, Len(game): 152, Training Time: 1.213s, Prediction Time: 1.213s label PPOAgent, Reward 26: -125.296, Len(game): 104, Training Time: 1.240s, Prediction Time: 1.240s label PPOAgent, Reward 27: -84.146, Len(game): 106, Training Time: 1.265s, Prediction Time: 1.265s label PPOAgent, Reward 28: -149.814, Len(game): 110, Training Time: 1.292s, Prediction Time: 1.292s label PPOAgent, Reward 29: 17.381, Len(game): 68, Training Time: 1.310s, Prediction Time: 1.310s label PPOAgent, Reward 30: -85.098, Len(game): 115, Training Time: 1.374s, Prediction Time: 1.374s label PPOAgent, Reward 31: -238.813, Len(game): 102, Training Time: 1.401s, Prediction Time: 1.401s label PPOAgent, Reward 32: -134.363, Len(game): 111, Training Time: 1.429s, Prediction Time: 1.429s label PPOAgent, Reward 33: -178.439, Len(game): 89, Training Time: 1.451s, Prediction Time: 1.451s label PPOAgent, Reward 34: -144.528, Len(game): 111, Training Time: 1.479s, Prediction Time: 1.479s label PPOAgent, Reward 35: -178.162, Len(game): 99, Training Time: 1.505s, Prediction Time: 1.505s label PPOAgent, Reward 36: -60.394, Len(game): 65, Training Time: 1.523s, Prediction Time: 1.523s label PPOAgent, Reward 37: -110.917, Len(game): 87, Training Time: 1.544s, Prediction Time: 1.544s label PPOAgent, Reward 38: -136.499, Len(game): 100, Training Time: 1.569s, Prediction Time: 1.569s label PPOAgent, Reward 39: -83.841, Len(game): 87, Training Time: 1.590s, Prediction Time: 1.590s label PPOAgent, Reward 40: -230.637, Len(game): 108, Training Time: 1.616s, Prediction Time: 1.616s label PPOAgent, Reward 41: -84.512, Len(game): 84, Training Time: 1.637s, Prediction Time: 1.637s label PPOAgent, Reward 42: -214.474, Len(game): 137, Training Time: 1.707s, Prediction Time: 1.707s label PPOAgent, Reward 43: -109.316, Len(game): 76, Training Time: 1.729s, Prediction Time: 1.729s label PPOAgent, Reward 44: -94.543, Len(game): 81, Training Time: 1.749s, Prediction Time: 1.749s label PPOAgent, Reward 45: -87.504, Len(game): 76, Training Time: 1.769s, Prediction Time: 1.769s label PPOAgent, Reward 46: -121.231, Len(game): 76, Training Time: 1.787s, Prediction Time: 1.787s label PPOAgent, Reward 47: -139.251, Len(game): 96, Training Time: 1.811s, Prediction Time: 1.811s label PPOAgent, Reward 48: -130.601, Len(game): 118, Training Time: 1.840s, Prediction Time: 1.840s label PPOAgent, Reward 49: -261.466, Len(game): 119, Training Time: 1.871s, Prediction Time: 1.871s label PPOAgent, Reward 50: -329.855, Len(game): 102, Training Time: 1.897s, Prediction Time: 1.897s label PPOAgent, Reward 51: -133.380, Len(game): 71, Training Time: 1.914s, Prediction Time: 1.914s label PPOAgent, Reward 52: -107.086, Len(game): 85, Training Time: 1.936s, Prediction Time: 1.936s label PPOAgent, Reward 53: -147.687, Len(game): 102, Training Time: 1.960s, Prediction Time: 1.960s label PPOAgent, Reward 54: -116.676, Len(game): 86, Training Time: 1.981s, Prediction Time: 1.981s label PPOAgent, Reward 55: -153.364, Len(game): 90, Training Time: 2.003s, Prediction Time: 2.003s label PPOAgent, Reward 56: -73.097, Len(game): 105, Training Time: 2.064s, Prediction Time: 2.064s label PPOAgent, Reward 57: -100.277, Len(game): 83, Training Time: 2.085s, Prediction Time: 2.085s label PPOAgent, Reward 58: -52.122, Len(game): 78, Training Time: 2.105s, Prediction Time: 2.105s label PPOAgent, Reward 59: -76.214, Len(game): 95, Training Time: 2.128s, Prediction Time: 2.128s label PPOAgent, Reward 60: -124.150, Len(game): 68, Training Time: 2.145s, Prediction Time: 2.145s label PPOAgent, Reward 61: -79.070, Len(game): 62, Training Time: 2.161s, Prediction Time: 2.161s label PPOAgent, Reward 62: -197.114, Len(game): 102, Training Time: 2.187s, Prediction Time: 2.187s label PPOAgent, Reward 63: -209.057, Len(game): 102, Training Time: 2.213s, Prediction Time: 2.213s label PPOAgent, Reward 64: -58.717, Len(game): 66, Training Time: 2.229s, Prediction Time: 2.229s label PPOAgent, Reward 65: -270.200, Len(game): 92, Training Time: 2.252s, Prediction Time: 2.252s label PPOAgent, Reward 66: -13.982, Len(game): 115, Training Time: 2.281s, Prediction Time: 2.281s label PPOAgent, Reward 67: -12.036, Len(game): 116, Training Time: 2.310s, Prediction Time: 2.310s label PPOAgent, Reward 68: -111.589, Len(game): 67, Training Time: 2.327s, Prediction Time: 2.327s label PPOAgent, Reward 69: -123.134, Len(game): 90, Training Time: 2.384s, Prediction Time: 2.384s label PPOAgent, Reward 70: -124.631, Len(game): 101, Training Time: 2.408s, Prediction Time: 2.408s label PPOAgent, Reward 71: -207.868, Len(game): 83, Training Time: 2.429s, Prediction Time: 2.429s label PPOAgent, Reward 72: -56.974, Len(game): 75, Training Time: 2.448s, Prediction Time: 2.448s label PPOAgent, Reward 73: -106.013, Len(game): 120, Training Time: 2.482s, Prediction Time: 2.482s label PPOAgent, Reward 74: -123.814, Len(game): 114, Training Time: 2.511s, Prediction Time: 2.511s label PPOAgent, Reward 75: -122.384, Len(game): 82, Training Time: 2.531s, Prediction Time: 2.531s label PPOAgent, Reward 76: -110.124, Len(game): 83, Training Time: 2.555s, Prediction Time: 2.555s label PPOAgent, Reward 77: -112.512, Len(game): 130, Training Time: 2.587s, Prediction Time: 2.587s label PPOAgent, Reward 78: -63.188, Len(game): 70, Training Time: 2.604s, Prediction Time: 2.604s label PPOAgent, Reward 79: 22.390, Len(game): 117, Training Time: 2.633s, Prediction Time: 2.633s label PPOAgent, Reward 80: -98.291, Len(game): 71, Training Time: 2.651s, Prediction Time: 2.651s label PPOAgent, Reward 81: -89.695, Len(game): 90, Training Time: 2.673s, Prediction Time: 2.673s label PPOAgent, Reward 82: -118.329, Len(game): 79, Training Time: 2.739s, Prediction Time: 2.739s label PPOAgent, Reward 83: -126.434, Len(game): 100, Training Time: 2.764s, Prediction Time: 2.764s label PPOAgent, Reward 84: -179.646, Len(game): 99, Training Time: 2.788s, Prediction Time: 2.788s label PPOAgent, Reward 85: -65.340, Len(game): 56, Training Time: 2.802s, Prediction Time: 2.802s label PPOAgent, Reward 86: -97.081, Len(game): 62, Training Time: 2.818s, Prediction Time: 2.818s label PPOAgent, Reward 87: -123.019, Len(game): 69, Training Time: 2.835s, Prediction Time: 2.835s label PPOAgent, Reward 88: -150.878, Len(game): 65, Training Time: 2.852s, Prediction Time: 2.852s label PPOAgent, Reward 89: -71.878, Len(game): 104, Training Time: 2.878s, Prediction Time: 2.878s label PPOAgent, Reward 90: -78.592, Len(game): 108, Training Time: 2.906s, Prediction Time: 2.906s label PPOAgent, Reward 91: -54.500, Len(game): 74, Training Time: 2.924s, Prediction Time: 2.924s label PPOAgent, Reward 92: -73.596, Len(game): 59, Training Time: 2.938s, Prediction Time: 2.938s label PPOAgent, Reward 93: -170.120, Len(game): 126, Training Time: 2.971s, Prediction Time: 2.971s label PPOAgent, Reward 94: -90.585, Len(game): 74, Training Time: 2.990s, Prediction Time: 2.990s label PPOAgent, Reward 95: -84.918, Len(game): 54, Training Time: 3.003s, Prediction Time: 3.003s label PPOAgent, Reward 96: -110.031, Len(game): 124, Training Time: 3.067s, Prediction Time: 3.067s label PPOAgent, Reward 97: -81.512, Len(game): 71, Training Time: 3.085s, Prediction Time: 3.085s label PPOAgent, Reward 98: -167.500, Len(game): 102, Training Time: 3.111s, Prediction Time: 3.111s label PPOAgent, Reward 99: -123.470, Len(game): 71, Training Time: 3.129s, Prediction Time: 3.129s label Controller-based, Reward 0: -66.389, Len(game): 62, Training Time: 0.002s, Prediction Time: 0.002s label Controller-based, Reward 1: -147.150, Len(game): 78, Training Time: 0.004s, Prediction Time: 0.004s label Controller-based, Reward 2: -29.537, Len(game): 91, Training Time: 0.035s, Prediction Time: 0.007s label Controller-based, Reward 3: -351.113, Len(game): 262, Training Time: 0.050s, Prediction Time: 0.014s label Controller-based, Reward 4: -238.660, Len(game): 190, Training Time: 0.062s, Prediction Time: 0.020s label Controller-based, Reward 5: -136.099, Len(game): 62, Training Time: 0.073s, Prediction Time: 0.022s label Controller-based, Reward 6: -130.023, Len(game): 73, Training Time: 0.083s, Prediction Time: 0.024s label Controller-based, Reward 7: -178.675, Len(game): 80, Training Time: 0.093s, Prediction Time: 0.027s label Controller-based, Reward 8: -35.860, Len(game): 75, Training Time: 0.104s, Prediction Time: 0.029s label Controller-based, Reward 9: -81.565, Len(game): 58, Training Time: 0.116s, Prediction Time: 0.030s label Controller-based, Reward 10: -38.254, Len(game): 72, Training Time: 0.126s, Prediction Time: 0.032s label Controller-based, Reward 11: -26.056, Len(game): 141, Training Time: 0.137s, Prediction Time: 0.036s label Controller-based, Reward 12: -107.701, Len(game): 93, Training Time: 0.148s, Prediction Time: 0.039s label Controller-based, Reward 13: -61.681, Len(game): 83, Training Time: 0.159s, Prediction Time: 0.041s label Controller-based, Reward 14: -73.744, Len(game): 54, Training Time: 0.170s, Prediction Time: 0.043s label Controller-based, Reward 15: -202.117, Len(game): 111, Training Time: 0.183s, Prediction Time: 0.046s label Controller-based, Reward 16: -42.078, Len(game): 83, Training Time: 0.194s, Prediction Time: 0.048s label Controller-based, Reward 17: -101.128, Len(game): 72, Training Time: 0.206s, Prediction Time: 0.050s label Controller-based, Reward 18: 198.978, Len(game): 358, Training Time: 0.222s, Prediction Time: 0.063s label Controller-based, Reward 19: -26.105, Len(game): 150, Training Time: 0.235s, Prediction Time: 0.068s label Controller-based, Reward 20: 178.616, Len(game): 297, Training Time: 0.249s, Prediction Time: 0.075s label Controller-based, Reward 21: 227.939, Len(game): 393, Training Time: 0.267s, Prediction Time: 0.089s label Controller-based, Reward 22: -149.331, Len(game): 88, Training Time: 0.279s, Prediction Time: 0.091s label Controller-based, Reward 23: -90.549, Len(game): 72, Training Time: 0.293s, Prediction Time: 0.092s label Controller-based, Reward 24: 283.269, Len(game): 293, Training Time: 0.310s, Prediction Time: 0.101s label Controller-based, Reward 25: -145.738, Len(game): 2000, Training Time: 0.341s, Prediction Time: 0.159s label Controller-based, Reward 26: 272.230, Len(game): 320, Training Time: 0.358s, Prediction Time: 0.170s label Controller-based, Reward 27: -7.610, Len(game): 213, Training Time: 0.372s, Prediction Time: 0.175s label Controller-based, Reward 28: -45.432, Len(game): 178, Training Time: 0.387s, Prediction Time: 0.180s label Controller-based, Reward 29: 264.286, Len(game): 307, Training Time: 0.404s, Prediction Time: 0.190s label Controller-based, Reward 30: 223.321, Len(game): 375, Training Time: 0.422s, Prediction Time: 0.202s label Controller-based, Reward 31: 236.921, Len(game): 332, Training Time: 0.440s, Prediction Time: 0.213s label Controller-based, Reward 32: 134.198, Len(game): 592, Training Time: 0.458s, Prediction Time: 0.228s label Controller-based, Reward 33: -192.755, Len(game): 181, Training Time: 0.472s, Prediction Time: 0.232s label Controller-based, Reward 34: 310.746, Len(game): 318, Training Time: 0.489s, Prediction Time: 0.243s label Controller-based, Reward 35: 172.609, Len(game): 453, Training Time: 0.508s, Prediction Time: 0.259s label Controller-based, Reward 36: 275.965, Len(game): 265, Training Time: 0.525s, Prediction Time: 0.267s label Controller-based, Reward 37: -195.630, Len(game): 190, Training Time: 0.540s, Prediction Time: 0.271s label Controller-based, Reward 38: 203.798, Len(game): 349, Training Time: 0.555s, Prediction Time: 0.279s label Controller-based, Reward 39: 302.250, Len(game): 284, Training Time: 0.570s, Prediction Time: 0.288s label Controller-based, Reward 40: -21.642, Len(game): 211, Training Time: 0.587s, Prediction Time: 0.294s label Controller-based, Reward 41: 234.800, Len(game): 298, Training Time: 0.603s, Prediction Time: 0.301s label Controller-based, Reward 42: 28.698, Len(game): 196, Training Time: 0.618s, Prediction Time: 0.305s label Controller-based, Reward 43: 206.002, Len(game): 319, Training Time: 0.633s, Prediction Time: 0.313s label Controller-based, Reward 44: 235.279, Len(game): 330, Training Time: 0.651s, Prediction Time: 0.324s label Controller-based, Reward 45: 65.881, Len(game): 202, Training Time: 0.666s, Prediction Time: 0.328s label Controller-based, Reward 46: 290.840, Len(game): 275, Training Time: 0.683s, Prediction Time: 0.336s label Controller-based, Reward 47: 250.375, Len(game): 661, Training Time: 0.702s, Prediction Time: 0.354s label Controller-based, Reward 48: 268.129, Len(game): 322, Training Time: 0.717s, Prediction Time: 0.361s label Controller-based, Reward 49: 187.047, Len(game): 518, Training Time: 0.737s, Prediction Time: 0.377s label Controller-based, Reward 50: 293.946, Len(game): 271, Training Time: 0.752s, Prediction Time: 0.384s label Controller-based, Reward 51: 202.701, Len(game): 325, Training Time: 0.769s, Prediction Time: 0.395s label Controller-based, Reward 52: 225.155, Len(game): 333, Training Time: 0.784s, Prediction Time: 0.403s label Controller-based, Reward 53: 242.825, Len(game): 303, Training Time: 0.801s, Prediction Time: 0.413s label Controller-based, Reward 54: 299.742, Len(game): 295, Training Time: 0.816s, Prediction Time: 0.419s label Controller-based, Reward 55: 248.438, Len(game): 337, Training Time: 0.832s, Prediction Time: 0.428s label Controller-based, Reward 56: 134.144, Len(game): 429, Training Time: 0.850s, Prediction Time: 0.442s label Controller-based, Reward 57: 272.800, Len(game): 260, Training Time: 0.865s, Prediction Time: 0.448s label Controller-based, Reward 58: 260.772, Len(game): 314, Training Time: 0.880s, Prediction Time: 0.456s label Controller-based, Reward 59: 232.116, Len(game): 471, Training Time: 0.897s, Prediction Time: 0.469s label Controller-based, Reward 60: -176.896, Len(game): 195, Training Time: 0.913s, Prediction Time: 0.474s label Controller-based, Reward 61: -193.130, Len(game): 190, Training Time: 0.927s, Prediction Time: 0.479s label Controller-based, Reward 62: 285.408, Len(game): 258, Training Time: 0.942s, Prediction Time: 0.486s label Controller-based, Reward 63: 229.752, Len(game): 306, Training Time: 0.958s, Prediction Time: 0.493s label Controller-based, Reward 64: 295.034, Len(game): 298, Training Time: 0.974s, Prediction Time: 0.503s label Controller-based, Reward 65: -165.219, Len(game): 480, Training Time: 0.991s, Prediction Time: 0.515s label Controller-based, Reward 66: -197.014, Len(game): 204, Training Time: 1.005s, Prediction Time: 0.520s label Controller-based, Reward 67: 209.169, Len(game): 277, Training Time: 1.023s, Prediction Time: 0.529s label Controller-based, Reward 68: 264.307, Len(game): 281, Training Time: 1.038s, Prediction Time: 0.535s label Controller-based, Reward 69: 252.392, Len(game): 307, Training Time: 1.054s, Prediction Time: 0.543s label Controller-based, Reward 70: -23.640, Len(game): 157, Training Time: 1.068s, Prediction Time: 0.546s label Controller-based, Reward 71: -197.094, Len(game): 186, Training Time: 1.083s, Prediction Time: 0.551s label Controller-based, Reward 72: -98.063, Len(game): 1120, Training Time: 1.107s, Prediction Time: 0.591s label Controller-based, Reward 73: 289.180, Len(game): 282, Training Time: 1.123s, Prediction Time: 0.598s label Controller-based, Reward 74: -189.898, Len(game): 186, Training Time: 1.138s, Prediction Time: 0.602s label Controller-based, Reward 75: 197.033, Len(game): 243, Training Time: 1.153s, Prediction Time: 0.608s label Controller-based, Reward 76: 285.819, Len(game): 321, Training Time: 1.169s, Prediction Time: 0.615s label Controller-based, Reward 77: 232.468, Len(game): 288, Training Time: 1.184s, Prediction Time: 0.622s label Controller-based, Reward 78: 244.307, Len(game): 278, Training Time: 1.199s, Prediction Time: 0.629s label Controller-based, Reward 79: 14.754, Len(game): 206, Training Time: 1.214s, Prediction Time: 0.634s label Controller-based, Reward 80: 237.025, Len(game): 399, Training Time: 1.232s, Prediction Time: 0.647s label Controller-based, Reward 81: 238.159, Len(game): 309, Training Time: 1.251s, Prediction Time: 0.656s label Controller-based, Reward 82: 195.445, Len(game): 356, Training Time: 1.267s, Prediction Time: 0.665s label Controller-based, Reward 83: 231.256, Len(game): 341, Training Time: 1.282s, Prediction Time: 0.673s label Controller-based, Reward 84: -184.248, Len(game): 379, Training Time: 1.299s, Prediction Time: 0.683s label Controller-based, Reward 85: 249.370, Len(game): 310, Training Time: 1.315s, Prediction Time: 0.690s label Controller-based, Reward 86: 274.466, Len(game): 324, Training Time: 1.330s, Prediction Time: 0.698s label Controller-based, Reward 87: 261.930, Len(game): 294, Training Time: 1.345s, Prediction Time: 0.705s label Controller-based, Reward 88: 212.232, Len(game): 381, Training Time: 1.362s, Prediction Time: 0.715s label Controller-based, Reward 89: -43.572, Len(game): 203, Training Time: 1.379s, Prediction Time: 0.719s label Controller-based, Reward 90: 231.690, Len(game): 528, Training Time: 1.397s, Prediction Time: 0.734s label Controller-based, Reward 91: 259.358, Len(game): 303, Training Time: 1.415s, Prediction Time: 0.743s label Controller-based, Reward 92: 21.422, Len(game): 214, Training Time: 1.430s, Prediction Time: 0.749s label Controller-based, Reward 93: -128.026, Len(game): 415, Training Time: 1.447s, Prediction Time: 0.760s label Controller-based, Reward 94: 249.865, Len(game): 277, Training Time: 1.464s, Prediction Time: 0.767s label Controller-based, Reward 95: 221.310, Len(game): 356, Training Time: 1.480s, Prediction Time: 0.776s label Controller-based, Reward 96: 228.415, Len(game): 290, Training Time: 1.498s, Prediction Time: 0.786s label Controller-based, Reward 97: 218.647, Len(game): 397, Training Time: 1.515s, Prediction Time: 0.795s label Controller-based, Reward 98: 190.291, Len(game): 410, Training Time: 1.533s, Prediction Time: 0.805s label Controller-based, Reward 99: 206.255, Len(game): 340, Training Time: 1.549s, Prediction Time: 0.814s label KACAgent, Reward 0: -328.568, Len(game): 64, Training Time: 0.007s, Prediction Time: 0.002s label KACAgent, Reward 1: -63.964, Len(game): 71, Training Time: 0.014s, Prediction Time: 0.014s label KACAgent, Reward 2: -207.799, Len(game): 106, Training Time: 0.028s, Prediction Time: 0.032s label KACAgent, Reward 3: -111.775, Len(game): 117, Training Time: 0.056s, Prediction Time: 0.059s label KACAgent, Reward 4: -128.210, Len(game): 83, Training Time: 0.099s, Prediction Time: 0.085s label KACAgent, Reward 5: -116.726, Len(game): 120, Training Time: 0.168s, Prediction Time: 0.129s label KACAgent, Reward 6: -144.208, Len(game): 74, Training Time: 0.266s, Prediction Time: 0.172s label KACAgent, Reward 7: -192.032, Len(game): 80, Training Time: 0.381s, Prediction Time: 0.232s label KACAgent, Reward 8: -80.821, Len(game): 79, Training Time: 0.524s, Prediction Time: 0.300s label KACAgent, Reward 9: -311.251, Len(game): 123, Training Time: 0.697s, Prediction Time: 0.401s label KACAgent, Reward 10: -105.327, Len(game): 144, Training Time: 0.926s, Prediction Time: 0.533s label KACAgent, Reward 11: -102.437, Len(game): 102, Training Time: 1.225s, Prediction Time: 0.653s label KACAgent, Reward 12: 13.917, Len(game): 109, Training Time: 1.568s, Prediction Time: 0.821s label KACAgent, Reward 13: -104.934, Len(game): 122, Training Time: 2.001s, Prediction Time: 1.026s label KACAgent, Reward 14: -253.741, Len(game): 119, Training Time: 2.491s, Prediction Time: 1.235s label KACAgent, Reward 15: -273.641, Len(game): 132, Training Time: 3.038s, Prediction Time: 1.495s label KACAgent, Reward 16: -360.088, Len(game): 130, Training Time: 3.604s, Prediction Time: 1.779s label KACAgent, Reward 17: -137.669, Len(game): 101, Training Time: 4.264s, Prediction Time: 2.056s label KACAgent, Reward 18: -95.321, Len(game): 115, Training Time: 5.039s, Prediction Time: 2.377s label KACAgent, Reward 19: -51.346, Len(game): 120, Training Time: 5.919s, Prediction Time: 2.768s label KACAgent, Reward 20: -20.718, Len(game): 89, Training Time: 6.770s, Prediction Time: 3.160s label KACAgent, Reward 21: -79.090, Len(game): 112, Training Time: 7.663s, Prediction Time: 3.595s label KACAgent, Reward 22: -51.047, Len(game): 176, Training Time: 8.694s, Prediction Time: 4.109s label KACAgent, Reward 23: -157.352, Len(game): 134, Training Time: 9.913s, Prediction Time: 4.633s label KACAgent, Reward 24: -84.571, Len(game): 155, Training Time: 11.302s, Prediction Time: 5.287s label KACAgent, Reward 25: -38.485, Len(game): 87, Training Time: 12.840s, Prediction Time: 5.865s label KACAgent, Reward 26: -50.646, Len(game): 142, Training Time: 14.349s, Prediction Time: 6.622s label KACAgent, Reward 27: -94.792, Len(game): 100, Training Time: 16.126s, Prediction Time: 7.352s label KACAgent, Reward 28: -88.876, Len(game): 92, Training Time: 18.067s, Prediction Time: 8.186s label KACAgent, Reward 29: -65.502, Len(game): 189, Training Time: 20.058s, Prediction Time: 9.204s label KACAgent, Reward 30: -59.935, Len(game): 135, Training Time: 22.282s, Prediction Time: 10.244s label KACAgent, Reward 31: -38.201, Len(game): 115, Training Time: 24.704s, Prediction Time: 11.381s label KACAgent, Reward 32: 35.936, Len(game): 108, Training Time: 27.232s, Prediction Time: 12.556s label KACAgent, Reward 33: -75.890, Len(game): 113, Training Time: 29.869s, Prediction Time: 13.816s label KACAgent, Reward 34: 7.389, Len(game): 155, Training Time: 32.601s, Prediction Time: 15.190s label KACAgent, Reward 35: -27.966, Len(game): 200, Training Time: 35.647s, Prediction Time: 16.746s label KACAgent, Reward 36: -104.508, Len(game): 144, Training Time: 38.920s, Prediction Time: 18.330s label KACAgent, Reward 37: 14.899, Len(game): 176, Training Time: 42.011s, Prediction Time: 19.865s label KACAgent, Reward 38: -10.088, Len(game): 146, Training Time: 45.404s, Prediction Time: 21.513s label KACAgent, Reward 39: 38.261, Len(game): 126, Training Time: 49.000s, Prediction Time: 23.256s label KACAgent, Reward 40: -88.638, Len(game): 152, Training Time: 52.735s, Prediction Time: 25.247s label KACAgent, Reward 41: 24.816, Len(game): 159, Training Time: 52.735s, Prediction Time: 27.218s label KACAgent, Reward 42: -18.355, Len(game): 104, Training Time: 52.735s, Prediction Time: 27.359s label KACAgent, Reward 43: -77.535, Len(game): 293, Training Time: 52.735s, Prediction Time: 27.757s label KACAgent, Reward 44: -81.236, Len(game): 248, Training Time: 52.735s, Prediction Time: 28.090s label KACAgent, Reward 45: 20.237, Len(game): 127, Training Time: 52.735s, Prediction Time: 28.260s label KACAgent, Reward 46: -26.196, Len(game): 288, Training Time: 52.735s, Prediction Time: 28.647s label KACAgent, Reward 47: -36.950, Len(game): 233, Training Time: 52.735s, Prediction Time: 28.963s label KACAgent, Reward 48: -223.085, Len(game): 218, Training Time: 52.735s, Prediction Time: 29.258s label KACAgent, Reward 49: -111.125, Len(game): 98, Training Time: 52.735s, Prediction Time: 29.389s label KACAgent, Reward 50: 42.071, Len(game): 2000, Training Time: 52.735s, Prediction Time: 32.119s label KACAgent, Reward 51: -53.059, Len(game): 115, Training Time: 52.735s, Prediction Time: 32.274s label KACAgent, Reward 52: -54.857, Len(game): 293, Training Time: 52.735s, Prediction Time: 32.671s label KACAgent, Reward 53: -0.778, Len(game): 128, Training Time: 52.735s, Prediction Time: 32.844s label KACAgent, Reward 54: -40.809, Len(game): 232, Training Time: 52.735s, Prediction Time: 33.157s label KACAgent, Reward 55: -65.557, Len(game): 194, Training Time: 52.735s, Prediction Time: 33.419s label KACAgent, Reward 56: -151.013, Len(game): 179, Training Time: 52.735s, Prediction Time: 33.662s label KACAgent, Reward 57: -138.108, Len(game): 105, Training Time: 52.735s, Prediction Time: 33.804s label KACAgent, Reward 58: -72.718, Len(game): 280, Training Time: 52.735s, Prediction Time: 34.181s label KACAgent, Reward 59: 40.494, Len(game): 142, Training Time: 52.735s, Prediction Time: 34.374s label KACAgent, Reward 60: -2.169, Len(game): 208, Training Time: 52.735s, Prediction Time: 34.653s label KACAgent, Reward 61: 6.666, Len(game): 148, Training Time: 52.735s, Prediction Time: 34.852s label KACAgent, Reward 62: 17.237, Len(game): 204, Training Time: 52.735s, Prediction Time: 35.128s label KACAgent, Reward 63: -123.301, Len(game): 168, Training Time: 52.735s, Prediction Time: 35.355s label KACAgent, Reward 64: -55.402, Len(game): 266, Training Time: 52.735s, Prediction Time: 35.716s label KACAgent, Reward 65: 10.282, Len(game): 132, Training Time: 52.735s, Prediction Time: 35.895s label KACAgent, Reward 66: -85.417, Len(game): 284, Training Time: 52.735s, Prediction Time: 36.278s label KACAgent, Reward 67: -405.309, Len(game): 170, Training Time: 52.735s, Prediction Time: 36.511s label KACAgent, Reward 68: 17.269, Len(game): 142, Training Time: 52.735s, Prediction Time: 36.703s label KACAgent, Reward 69: -57.186, Len(game): 169, Training Time: 52.735s, Prediction Time: 36.931s label KACAgent, Reward 70: -41.669, Len(game): 156, Training Time: 52.735s, Prediction Time: 37.142s label KACAgent, Reward 71: -59.518, Len(game): 123, Training Time: 52.735s, Prediction Time: 37.307s label KACAgent, Reward 72: -61.951, Len(game): 249, Training Time: 52.735s, Prediction Time: 37.644s label KACAgent, Reward 73: 37.137, Len(game): 170, Training Time: 52.735s, Prediction Time: 37.874s label KACAgent, Reward 74: 13.856, Len(game): 90, Training Time: 52.735s, Prediction Time: 37.995s label KACAgent, Reward 75: -36.667, Len(game): 280, Training Time: 52.735s, Prediction Time: 38.375s label KACAgent, Reward 76: 7.559, Len(game): 118, Training Time: 52.735s, Prediction Time: 38.535s label KACAgent, Reward 77: 34.614, Len(game): 168, Training Time: 52.735s, Prediction Time: 38.762s label KACAgent, Reward 78: -33.996, Len(game): 152, Training Time: 52.735s, Prediction Time: 38.968s label KACAgent, Reward 79: 43.459, Len(game): 143, Training Time: 52.735s, Prediction Time: 39.161s label KACAgent, Reward 80: -296.611, Len(game): 141, Training Time: 52.735s, Prediction Time: 39.354s label KACAgent, Reward 81: -34.731, Len(game): 335, Training Time: 52.735s, Prediction Time: 39.808s label KACAgent, Reward 82: -146.869, Len(game): 167, Training Time: 52.735s, Prediction Time: 40.036s label KACAgent, Reward 83: -41.911, Len(game): 271, Training Time: 52.735s, Prediction Time: 40.403s label KACAgent, Reward 84: -67.637, Len(game): 326, Training Time: 52.735s, Prediction Time: 40.844s label KACAgent, Reward 85: -28.237, Len(game): 151, Training Time: 52.735s, Prediction Time: 41.049s label KACAgent, Reward 86: 20.948, Len(game): 160, Training Time: 52.735s, Prediction Time: 41.265s label KACAgent, Reward 87: -64.373, Len(game): 113, Training Time: 52.735s, Prediction Time: 41.417s label KACAgent, Reward 88: -63.747, Len(game): 159, Training Time: 52.735s, Prediction Time: 41.633s label KACAgent, Reward 89: 3.050, Len(game): 271, Training Time: 52.735s, Prediction Time: 42.001s label KACAgent, Reward 90: 22.918, Len(game): 195, Training Time: 52.735s, Prediction Time: 42.264s label KACAgent, Reward 91: 59.352, Len(game): 138, Training Time: 52.735s, Prediction Time: 42.450s label KACAgent, Reward 92: -92.817, Len(game): 184, Training Time: 52.735s, Prediction Time: 42.699s label KACAgent, Reward 93: 11.008, Len(game): 178, Training Time: 52.735s, Prediction Time: 42.941s label KACAgent, Reward 94: -84.499, Len(game): 106, Training Time: 52.735s, Prediction Time: 43.084s label KACAgent, Reward 95: 24.324, Len(game): 138, Training Time: 52.735s, Prediction Time: 43.273s label KACAgent, Reward 96: 29.313, Len(game): 151, Training Time: 52.735s, Prediction Time: 43.477s label KACAgent, Reward 97: -161.641, Len(game): 143, Training Time: 52.735s, Prediction Time: 43.671s label KACAgent, Reward 98: -39.324, Len(game): 111, Training Time: 52.735s, Prediction Time: 43.822s label KACAgent, Reward 99: -24.729, Len(game): 178, Training Time: 52.735s, Prediction Time: 44.064s label PolicyGradient, Reward 0: -81.303, Len(game): 64, Training Time: 0.008s, Prediction Time: 0.001s label PolicyGradient, Reward 1: -102.050, Len(game): 67, Training Time: 0.015s, Prediction Time: 0.013s label PolicyGradient, Reward 2: -204.890, Len(game): 98, Training Time: 0.027s, Prediction Time: 0.029s label PolicyGradient, Reward 3: -161.854, Len(game): 99, Training Time: 0.119s, Prediction Time: 0.052s label PolicyGradient, Reward 4: -262.171, Len(game): 111, Training Time: 0.152s, Prediction Time: 0.086s label PolicyGradient, Reward 5: -99.184, Len(game): 63, Training Time: 0.207s, Prediction Time: 0.111s label PolicyGradient, Reward 6: -94.398, Len(game): 69, Training Time: 0.273s, Prediction Time: 0.148s label PolicyGradient, Reward 7: -170.477, Len(game): 129, Training Time: 0.362s, Prediction Time: 0.214s label PolicyGradient, Reward 8: -323.570, Len(game): 100, Training Time: 0.481s, Prediction Time: 0.277s label PolicyGradient, Reward 9: -203.517, Len(game): 108, Training Time: 0.626s, Prediction Time: 0.360s label PolicyGradient, Reward 10: -209.092, Len(game): 149, Training Time: 0.810s, Prediction Time: 0.475s label PolicyGradient, Reward 11: -54.381, Len(game): 71, Training Time: 1.045s, Prediction Time: 0.563s label PolicyGradient, Reward 12: -210.281, Len(game): 135, Training Time: 1.312s, Prediction Time: 0.728s label PolicyGradient, Reward 13: -137.631, Len(game): 108, Training Time: 1.653s, Prediction Time: 0.876s label PolicyGradient, Reward 14: -305.747, Len(game): 114, Training Time: 2.049s, Prediction Time: 1.046s label PolicyGradient, Reward 15: -234.554, Len(game): 111, Training Time: 2.479s, Prediction Time: 1.239s label PolicyGradient, Reward 16: -239.750, Len(game): 99, Training Time: 2.962s, Prediction Time: 1.455s label PolicyGradient, Reward 17: -172.324, Len(game): 129, Training Time: 3.494s, Prediction Time: 1.714s label PolicyGradient, Reward 18: -225.455, Len(game): 139, Training Time: 4.098s, Prediction Time: 2.004s label PolicyGradient, Reward 19: -187.867, Len(game): 104, Training Time: 4.800s, Prediction Time: 2.301s label PolicyGradient, Reward 20: -61.457, Len(game): 86, Training Time: 5.571s, Prediction Time: 2.625s label PolicyGradient, Reward 21: -96.494, Len(game): 145, Training Time: 6.389s, Prediction Time: 3.038s label PolicyGradient, Reward 22: -202.978, Len(game): 180, Training Time: 7.320s, Prediction Time: 3.492s label PolicyGradient, Reward 23: -130.890, Len(game): 100, Training Time: 8.385s, Prediction Time: 3.918s label PolicyGradient, Reward 24: -157.249, Len(game): 169, Training Time: 9.605s, Prediction Time: 4.513s label PolicyGradient, Reward 25: -122.789, Len(game): 110, Training Time: 10.988s, Prediction Time: 5.131s label PolicyGradient, Reward 26: -106.723, Len(game): 82, Training Time: 12.463s, Prediction Time: 5.787s label PolicyGradient, Reward 27: -118.584, Len(game): 139, Training Time: 14.062s, Prediction Time: 6.507s label PolicyGradient, Reward 28: -86.630, Len(game): 78, Training Time: 15.741s, Prediction Time: 7.213s label PolicyGradient, Reward 29: -33.499, Len(game): 98, Training Time: 17.483s, Prediction Time: 7.963s label PolicyGradient, Reward 30: -81.531, Len(game): 139, Training Time: 19.325s, Prediction Time: 8.796s label PolicyGradient, Reward 31: -86.179, Len(game): 131, Training Time: 21.536s, Prediction Time: 9.749s label PolicyGradient, Reward 32: -41.293, Len(game): 159, Training Time: 23.714s, Prediction Time: 10.848s label PolicyGradient, Reward 33: -58.410, Len(game): 78, Training Time: 26.094s, Prediction Time: 11.789s label PolicyGradient, Reward 34: -37.786, Len(game): 84, Training Time: 28.568s, Prediction Time: 12.906s label PolicyGradient, Reward 35: 2.557, Len(game): 101, Training Time: 31.185s, Prediction Time: 14.112s label PolicyGradient, Reward 36: -201.901, Len(game): 219, Training Time: 34.354s, Prediction Time: 15.479s label PolicyGradient, Reward 37: -119.981, Len(game): 211, Training Time: 37.941s, Prediction Time: 17.138s label PolicyGradient, Reward 38: 11.247, Len(game): 91, Training Time: 41.920s, Prediction Time: 18.788s label PolicyGradient, Reward 39: -20.067, Len(game): 248, Training Time: 46.147s, Prediction Time: 20.905s label PolicyGradient, Reward 40: -67.204, Len(game): 206, Training Time: 50.963s, Prediction Time: 23.085s label PolicyGradient, Reward 41: -126.823, Len(game): 185, Training Time: 50.963s, Prediction Time: 25.493s label PolicyGradient, Reward 42: -230.637, Len(game): 246, Training Time: 50.963s, Prediction Time: 25.838s label PolicyGradient, Reward 43: -18.282, Len(game): 140, Training Time: 50.963s, Prediction Time: 26.031s label PolicyGradient, Reward 44: -196.310, Len(game): 182, Training Time: 50.963s, Prediction Time: 26.283s label PolicyGradient, Reward 45: -190.296, Len(game): 200, Training Time: 50.963s, Prediction Time: 26.565s label PolicyGradient, Reward 46: -80.623, Len(game): 143, Training Time: 50.963s, Prediction Time: 26.761s label PolicyGradient, Reward 47: -99.060, Len(game): 303, Training Time: 50.963s, Prediction Time: 27.186s label PolicyGradient, Reward 48: -241.909, Len(game): 176, Training Time: 50.963s, Prediction Time: 27.438s label PolicyGradient, Reward 49: -371.817, Len(game): 257, Training Time: 50.963s, Prediction Time: 27.796s label PolicyGradient, Reward 50: -100.116, Len(game): 145, Training Time: 50.963s, Prediction Time: 27.998s label PolicyGradient, Reward 51: -0.238, Len(game): 96, Training Time: 50.963s, Prediction Time: 28.132s label PolicyGradient, Reward 52: -21.396, Len(game): 151, Training Time: 50.963s, Prediction Time: 28.341s label PolicyGradient, Reward 53: -248.328, Len(game): 208, Training Time: 50.963s, Prediction Time: 28.636s label PolicyGradient, Reward 54: -188.180, Len(game): 260, Training Time: 50.963s, Prediction Time: 28.995s label PolicyGradient, Reward 55: -78.366, Len(game): 119, Training Time: 50.963s, Prediction Time: 29.166s label PolicyGradient, Reward 56: -4.501, Len(game): 99, Training Time: 50.963s, Prediction Time: 29.305s label PolicyGradient, Reward 57: -245.866, Len(game): 159, Training Time: 50.963s, Prediction Time: 29.528s label PolicyGradient, Reward 58: -38.078, Len(game): 137, Training Time: 50.963s, Prediction Time: 29.721s label PolicyGradient, Reward 59: -40.093, Len(game): 90, Training Time: 50.963s, Prediction Time: 29.849s label PolicyGradient, Reward 60: -231.111, Len(game): 156, Training Time: 50.963s, Prediction Time: 30.069s label PolicyGradient, Reward 61: -106.972, Len(game): 204, Training Time: 50.963s, Prediction Time: 30.355s label PolicyGradient, Reward 62: -143.943, Len(game): 238, Training Time: 50.963s, Prediction Time: 30.690s label PolicyGradient, Reward 63: -148.996, Len(game): 213, Training Time: 50.963s, Prediction Time: 30.986s label PolicyGradient, Reward 64: -547.445, Len(game): 220, Training Time: 50.963s, Prediction Time: 31.299s label PolicyGradient, Reward 65: -104.275, Len(game): 142, Training Time: 50.963s, Prediction Time: 31.501s label PolicyGradient, Reward 66: -63.869, Len(game): 153, Training Time: 50.963s, Prediction Time: 31.715s label PolicyGradient, Reward 67: -149.268, Len(game): 117, Training Time: 50.963s, Prediction Time: 31.881s label PolicyGradient, Reward 68: -58.490, Len(game): 160, Training Time: 50.963s, Prediction Time: 32.108s label PolicyGradient, Reward 69: -85.277, Len(game): 135, Training Time: 50.963s, Prediction Time: 32.298s label PolicyGradient, Reward 70: -121.707, Len(game): 349, Training Time: 50.963s, Prediction Time: 32.791s label PolicyGradient, Reward 71: -18.086, Len(game): 99, Training Time: 50.963s, Prediction Time: 32.929s label PolicyGradient, Reward 72: -45.293, Len(game): 129, Training Time: 50.963s, Prediction Time: 33.110s label PolicyGradient, Reward 73: -4.228, Len(game): 152, Training Time: 50.963s, Prediction Time: 33.322s label PolicyGradient, Reward 74: -21.963, Len(game): 92, Training Time: 50.963s, Prediction Time: 33.455s label PolicyGradient, Reward 75: -341.886, Len(game): 359, Training Time: 50.963s, Prediction Time: 33.957s label PolicyGradient, Reward 76: -84.536, Len(game): 160, Training Time: 50.963s, Prediction Time: 34.181s label PolicyGradient, Reward 77: -45.906, Len(game): 135, Training Time: 50.963s, Prediction Time: 34.370s label PolicyGradient, Reward 78: -80.470, Len(game): 155, Training Time: 50.963s, Prediction Time: 34.590s label PolicyGradient, Reward 79: -78.294, Len(game): 131, Training Time: 50.963s, Prediction Time: 34.774s label PolicyGradient, Reward 80: -172.076, Len(game): 347, Training Time: 50.963s, Prediction Time: 35.261s label PolicyGradient, Reward 81: -74.395, Len(game): 298, Training Time: 50.963s, Prediction Time: 35.678s label PolicyGradient, Reward 82: -219.371, Len(game): 329, Training Time: 50.963s, Prediction Time: 36.145s label PolicyGradient, Reward 83: -15.508, Len(game): 162, Training Time: 50.963s, Prediction Time: 36.371s label PolicyGradient, Reward 84: -68.583, Len(game): 141, Training Time: 50.963s, Prediction Time: 36.566s label PolicyGradient, Reward 85: -82.117, Len(game): 231, Training Time: 50.963s, Prediction Time: 36.887s label PolicyGradient, Reward 86: -66.863, Len(game): 222, Training Time: 50.963s, Prediction Time: 37.199s label PolicyGradient, Reward 87: -311.287, Len(game): 205, Training Time: 50.963s, Prediction Time: 37.486s label PolicyGradient, Reward 88: -100.645, Len(game): 241, Training Time: 50.963s, Prediction Time: 37.820s label PolicyGradient, Reward 89: -46.934, Len(game): 167, Training Time: 50.963s, Prediction Time: 38.054s label PolicyGradient, Reward 90: -158.278, Len(game): 167, Training Time: 50.963s, Prediction Time: 38.285s label PolicyGradient, Reward 91: -46.275, Len(game): 124, Training Time: 50.963s, Prediction Time: 38.459s label PolicyGradient, Reward 92: -75.934, Len(game): 182, Training Time: 50.963s, Prediction Time: 38.714s label PolicyGradient, Reward 93: -191.096, Len(game): 206, Training Time: 50.963s, Prediction Time: 39.000s label PolicyGradient, Reward 94: 5.992, Len(game): 109, Training Time: 50.963s, Prediction Time: 39.152s label PolicyGradient, Reward 95: -113.291, Len(game): 198, Training Time: 50.963s, Prediction Time: 39.427s label PolicyGradient, Reward 96: -4.211, Len(game): 89, Training Time: 50.963s, Prediction Time: 39.549s label PolicyGradient, Reward 97: -123.054, Len(game): 233, Training Time: 50.963s, Prediction Time: 39.878s label PolicyGradient, Reward 98: -105.275, Len(game): 178, Training Time: 50.963s, Prediction Time: 40.126s label PolicyGradient, Reward 99: -35.657, Len(game): 99, Training Time: 50.963s, Prediction Time: 40.267s C:\Users\geoff\Desktop\Github\codpybook-rtd\docs\ch8\ignore_utils.py:621: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\pytorch\torch\csrc\utils\tensor_new.cpp:257.) state = torch.tensor([state], dtype=torch.float32) label DQNAgent, Reward 0: -113.085, Len(game): 78, Training Time: 0.025s, Prediction Time: 0.004s label DQNAgent, Reward 1: -379.710, Len(game): 114, Training Time: 0.172s, Prediction Time: 0.008s label DQNAgent, Reward 2: -154.823, Len(game): 91, Training Time: 0.281s, Prediction Time: 0.012s label DQNAgent, Reward 3: -158.450, Len(game): 115, Training Time: 0.422s, Prediction Time: 0.017s label DQNAgent, Reward 4: -126.051, Len(game): 142, Training Time: 0.591s, Prediction Time: 0.025s label DQNAgent, Reward 5: -362.040, Len(game): 453, Training Time: 1.171s, Prediction Time: 0.054s label DQNAgent, Reward 6: 2.751, Len(game): 244, Training Time: 1.475s, Prediction Time: 0.073s label DQNAgent, Reward 7: -224.326, Len(game): 2000, Training Time: 3.986s, Prediction Time: 0.242s label DQNAgent, Reward 8: -310.762, Len(game): 2000, Training Time: 6.763s, Prediction Time: 0.417s label DQNAgent, Reward 9: -432.592, Len(game): 2000, Training Time: 9.324s, Prediction Time: 0.591s label DQNAgent, Reward 10: -355.976, Len(game): 1604, Training Time: 11.380s, Prediction Time: 0.729s label DQNAgent, Reward 11: 236.521, Len(game): 367, Training Time: 11.857s, Prediction Time: 0.761s label DQNAgent, Reward 12: -187.677, Len(game): 2000, Training Time: 14.435s, Prediction Time: 0.933s label DQNAgent, Reward 13: -179.741, Len(game): 2000, Training Time: 17.013s, Prediction Time: 1.110s label DQNAgent, Reward 14: -183.653, Len(game): 603, Training Time: 17.794s, Prediction Time: 1.162s label DQNAgent, Reward 15: -191.268, Len(game): 383, Training Time: 18.287s, Prediction Time: 1.195s label DQNAgent, Reward 16: -188.280, Len(game): 332, Training Time: 18.720s, Prediction Time: 1.225s label DQNAgent, Reward 17: -196.164, Len(game): 541, Training Time: 19.416s, Prediction Time: 1.272s label DQNAgent, Reward 18: -167.276, Len(game): 2000, Training Time: 22.016s, Prediction Time: 1.443s label DQNAgent, Reward 19: -189.136, Len(game): 2000, Training Time: 24.666s, Prediction Time: 1.618s label DQNAgent, Reward 20: -174.624, Len(game): 2000, Training Time: 27.299s, Prediction Time: 1.801s label DQNAgent, Reward 21: -238.685, Len(game): 2000, Training Time: 29.948s, Prediction Time: 1.980s label DQNAgent, Reward 22: -154.886, Len(game): 124, Training Time: 30.116s, Prediction Time: 1.992s label DQNAgent, Reward 23: -174.114, Len(game): 2000, Training Time: 32.795s, Prediction Time: 2.168s label DQNAgent, Reward 24: -253.414, Len(game): 2000, Training Time: 35.460s, Prediction Time: 2.342s label DQNAgent, Reward 25: -191.407, Len(game): 2000, Training Time: 38.131s, Prediction Time: 2.520s label DQNAgent, Reward 26: -202.056, Len(game): 2000, Training Time: 40.805s, Prediction Time: 2.698s label DQNAgent, Reward 27: -194.853, Len(game): 2000, Training Time: 43.471s, Prediction Time: 2.875s label DQNAgent, Reward 28: -192.771, Len(game): 2000, Training Time: 46.156s, Prediction Time: 3.051s label DQNAgent, Reward 29: -217.784, Len(game): 2000, Training Time: 48.842s, Prediction Time: 3.226s label DQNAgent, Reward 30: -157.779, Len(game): 2000, Training Time: 51.526s, Prediction Time: 3.406s label DQNAgent, Reward 31: -297.432, Len(game): 2000, Training Time: 51.526s, Prediction Time: 3.585s label DQNAgent, Reward 32: -250.067, Len(game): 2000, Training Time: 51.526s, Prediction Time: 3.730s label DQNAgent, Reward 33: -278.377, Len(game): 2000, Training Time: 51.526s, Prediction Time: 3.874s label DQNAgent, Reward 34: -248.133, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.017s label DQNAgent, Reward 35: -268.962, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.159s label DQNAgent, Reward 36: -254.991, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.302s label DQNAgent, Reward 37: -292.653, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.445s label DQNAgent, Reward 38: -256.357, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.588s label DQNAgent, Reward 39: -219.518, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.731s label DQNAgent, Reward 40: -288.255, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.874s label DQNAgent, Reward 41: -211.079, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.019s label DQNAgent, Reward 42: -236.794, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.161s label DQNAgent, Reward 43: -305.389, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.305s label DQNAgent, Reward 44: -288.723, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.445s label DQNAgent, Reward 45: -267.358, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.587s label DQNAgent, Reward 46: -307.678, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.727s label DQNAgent, Reward 47: -326.587, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.869s label DQNAgent, Reward 48: -239.119, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.012s label DQNAgent, Reward 49: -288.258, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.153s label DQNAgent, Reward 50: -315.439, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.293s label DQNAgent, Reward 51: -201.114, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.435s label DQNAgent, Reward 52: -295.868, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.578s label DQNAgent, Reward 53: -259.802, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.720s label DQNAgent, Reward 54: -295.257, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.861s label DQNAgent, Reward 55: -248.141, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.004s label DQNAgent, Reward 56: -245.958, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.145s label DQNAgent, Reward 57: -315.552, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.286s label DQNAgent, Reward 58: -238.332, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.429s label DQNAgent, Reward 59: -311.968, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.573s label DQNAgent, Reward 60: -237.635, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.715s label DQNAgent, Reward 61: -320.902, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.855s label DQNAgent, Reward 62: -262.911, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.997s label DQNAgent, Reward 63: -306.299, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.140s label DQNAgent, Reward 64: -185.516, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.280s label DQNAgent, Reward 65: -264.190, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.423s label DQNAgent, Reward 66: -307.969, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.565s label DQNAgent, Reward 67: -262.631, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.710s label DQNAgent, Reward 68: -281.048, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.852s label DQNAgent, Reward 69: -258.154, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.994s label DQNAgent, Reward 70: -263.735, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.137s label DQNAgent, Reward 71: -319.130, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.280s label DQNAgent, Reward 72: -294.916, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.423s label DQNAgent, Reward 73: -233.601, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.567s label DQNAgent, Reward 74: -285.451, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.709s label DQNAgent, Reward 75: -296.343, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.850s label DQNAgent, Reward 76: -248.531, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.992s label DQNAgent, Reward 77: -286.580, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.133s label DQNAgent, Reward 78: -252.319, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.274s label DQNAgent, Reward 79: -292.500, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.416s label DQNAgent, Reward 80: -287.494, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.559s label DQNAgent, Reward 81: -258.796, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.700s label DQNAgent, Reward 82: -280.900, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.841s label DQNAgent, Reward 83: -287.505, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.980s label DQNAgent, Reward 84: -312.122, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.125s label DQNAgent, Reward 85: -291.135, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.266s label DQNAgent, Reward 86: -288.435, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.409s label DQNAgent, Reward 87: -260.070, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.554s label DQNAgent, Reward 88: -211.202, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.697s label DQNAgent, Reward 89: -236.297, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.838s label DQNAgent, Reward 90: -240.365, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.981s label DQNAgent, Reward 91: -294.879, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.124s label DQNAgent, Reward 92: -259.572, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.266s label DQNAgent, Reward 93: -278.656, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.408s label DQNAgent, Reward 94: -315.290, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.547s label DQNAgent, Reward 95: -237.135, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.688s label DQNAgent, Reward 96: -233.797, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.827s label DQNAgent, Reward 97: -271.331, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.968s label DQNAgent, Reward 98: -292.179, Len(game): 2000, Training Time: 51.526s, Prediction Time: 13.108s label DQNAgent, Reward 99: -324.016, Len(game): 2000, Training Time: 51.526s, Prediction Time: 13.248s Computed global error Bellman mean: 1.7878172036910294 iter: 0 Computed global error Bellman mean: 1.8623318222045684 iter: 0 label KQLearning, Reward 0: -200.395, Len(game): 128, Training Time: 0.055s, Prediction Time: 0.003s Computed global error Bellman mean: 1.2356713497168017e-07 iter: 3 label KQLearning, Reward 1: -620.667, Len(game): 73, Training Time: 0.075s, Prediction Time: 0.023s Computed global error Bellman mean: 0.00017559370102202926 iter: 5 label KQLearning, Reward 2: -708.620, Len(game): 151, Training Time: 0.154s, Prediction Time: 0.147s Computed global error Bellman mean: 1.5985114262427255e-07 iter: 5 label KQLearning, Reward 3: -162.912, Len(game): 128, Training Time: 0.207s, Prediction Time: 0.301s Computed global error Bellman mean: 0.09014312280287062 iter: 5 Computed global error Bellman mean: 0.005174441156915289 iter: 3 label KQLearning, Reward 4: -132.788, Len(game): 59, Training Time: 0.248s, Prediction Time: 0.394s Computed global error Bellman mean: 2.5997462881714264e-08 iter: 3 label KQLearning, Reward 5: -134.307, Len(game): 75, Training Time: 0.268s, Prediction Time: 0.524s Computed global error Bellman mean: 4.279074899312429e-08 iter: 5 label KQLearning, Reward 6: -145.728, Len(game): 59, Training Time: 0.288s, Prediction Time: 0.637s Computed global error Bellman mean: 1.1294630867435227 iter: 5 Computed global error Bellman mean: 0.003057647495996643 iter: 5 label KQLearning, Reward 7: -153.151, Len(game): 120, Training Time: 0.431s, Prediction Time: 0.875s Computed global error Bellman mean: 2.0004563183198205e-07 iter: 4 label KQLearning, Reward 8: 40.859, Len(game): 82, Training Time: 0.458s, Prediction Time: 1.078s Computed global error Bellman mean: 2.0093319964137324e-07 iter: 4 label KQLearning, Reward 9: -297.285, Len(game): 128, Training Time: 0.508s, Prediction Time: 1.403s Computed global error Bellman mean: 5.93768899459578e-05 iter: 5 label KQLearning, Reward 10: -377.949, Len(game): 58, Training Time: 0.532s, Prediction Time: 1.589s Computed global error Bellman mean: 0.1857227283768003 iter: 5 Computed global error Bellman mean: 0.1749357521973165 iter: 5 label KQLearning, Reward 11: -121.868, Len(game): 58, Training Time: 0.578s, Prediction Time: 1.779s Computed global error Bellman mean: 5.506837701738312e-08 iter: 4 Computed global error Bellman mean: 0.08027210785316556 iter: 5 label KQLearning, Reward 12: -156.228, Len(game): 65, Training Time: 0.625s, Prediction Time: 2.009s Computed global error Bellman mean: 8.817866412693878e-09 iter: 2 Computed global error Bellman mean: 0.034812471780843325 iter: 5 label KQLearning, Reward 13: -122.908, Len(game): 73, Training Time: 0.670s, Prediction Time: 2.263s Computed global error Bellman mean: 0.02543451762234678 iter: 5 Computed global error Bellman mean: 0.009514509920557154 iter: 5 Computed global error Bellman mean: 0.037829476788574255 iter: 3 label KQLearning, Reward 14: -96.159, Len(game): 85, Training Time: 0.757s, Prediction Time: 2.585s Computed global error Bellman mean: 0.24919380917819176 iter: 5 Computed global error Bellman mean: 0.044900472614210124 iter: 5 label KQLearning, Reward 15: -121.204, Len(game): 59, Training Time: 0.804s, Prediction Time: 2.837s Computed global error Bellman mean: 1.0091587948197072e-07 iter: 5 Computed global error Bellman mean: 0.022390631371600574 iter: 5 label KQLearning, Reward 16: -86.396, Len(game): 91, Training Time: 0.870s, Prediction Time: 3.197s Computed global error Bellman mean: 0.128533530536805 iter: 5 Computed global error Bellman mean: 0.001863147889892192 iter: 5 Computed global error Bellman mean: 0.007104770255512356 iter: 5 label KQLearning, Reward 17: -143.376, Len(game): 61, Training Time: 0.942s, Prediction Time: 3.479s Computed global error Bellman mean: 0.00023891959529186475 iter: 5 label KQLearning, Reward 18: -121.010, Len(game): 83, Training Time: 0.976s, Prediction Time: 3.846s Computed global error Bellman mean: 0.013989181209570873 iter: 5 Computed global error Bellman mean: 0.006825096004593422 iter: 4 label KQLearning, Reward 19: -119.881, Len(game): 118, Training Time: 1.119s, Prediction Time: 4.405s Computed global error Bellman mean: 1.0513683747492841e-07 iter: 4 label KQLearning, Reward 20: -118.150, Len(game): 80, Training Time: 1.149s, Prediction Time: 4.816s Computed global error Bellman mean: 7.453840246851411e-06 iter: 5 label KQLearning, Reward 21: -136.488, Len(game): 128, Training Time: 1.213s, Prediction Time: 5.502s Computed global error Bellman mean: 4.260048658277782e-08 iter: 5 label KQLearning, Reward 22: -99.267, Len(game): 70, Training Time: 1.240s, Prediction Time: 5.898s Computed global error Bellman mean: 1.1402115084113178e-08 iter: 4 label KQLearning, Reward 23: -131.985, Len(game): 84, Training Time: 1.274s, Prediction Time: 6.391s Computed global error Bellman mean: 0.1251552939518742 iter: 5 Computed global error Bellman mean: 0.04363123471567561 iter: 5 label KQLearning, Reward 24: -112.795, Len(game): 53, Training Time: 1.314s, Prediction Time: 6.735s Computed global error Bellman mean: 0.00016643639993477752 iter: 5 Computed global error Bellman mean: 0.005202712701594659 iter: 5 label KQLearning, Reward 25: -133.218, Len(game): 116, Training Time: 1.396s, Prediction Time: 7.442s Computed global error Bellman mean: 2.04596698215854e-08 iter: 3 label KQLearning, Reward 26: -103.134, Len(game): 52, Training Time: 1.413s, Prediction Time: 7.797s Computed global error Bellman mean: 8.631624331451543e-09 iter: 4 label KQLearning, Reward 27: -132.907, Len(game): 55, Training Time: 1.431s, Prediction Time: 8.165s Computed global error Bellman mean: 0.041597443857888286 iter: 5 Computed global error Bellman mean: 0.013447130230095082 iter: 5 label KQLearning, Reward 28: -145.249, Len(game): 64, Training Time: 1.486s, Prediction Time: 8.606s Computed global error Bellman mean: 0.04793728290235055 iter: 5 Computed global error Bellman mean: 0.013447129295314223 iter: 1 Computed global error Bellman mean: 0.05875239585481834 iter: 3 label KQLearning, Reward 29: -163.755, Len(game): 139, Training Time: 1.679s, Prediction Time: 9.588s Computed global error Bellman mean: 0.02056910697615851 iter: 5 Computed global error Bellman mean: 0.004457868533659127 iter: 5 label KQLearning, Reward 30: -224.982, Len(game): 244, Training Time: 2.268s, Prediction Time: 11.357s Computed global error Bellman mean: 2.7721892858304878e-08 iter: 5 label KQLearning, Reward 31: -111.750, Len(game): 75, Training Time: 2.303s, Prediction Time: 11.962s Computed global error Bellman mean: 8.88503637487098e-09 iter: 3 label KQLearning, Reward 32: -133.927, Len(game): 71, Training Time: 2.325s, Prediction Time: 12.546s Computed global error Bellman mean: 0.011135651236183156 iter: 5 Computed global error Bellman mean: 0.019759772703321254 iter: 4 label KQLearning, Reward 33: -113.742, Len(game): 58, Training Time: 2.367s, Prediction Time: 13.033s Computed global error Bellman mean: 0.012950627470135787 iter: 5 Computed global error Bellman mean: 0.018880851009172462 iter: 3 label KQLearning, Reward 34: -58.630, Len(game): 97, Training Time: 2.455s, Prediction Time: 13.857s Computed global error Bellman mean: 0.3276866377019415 iter: 5 Computed global error Bellman mean: 0.3008326609173155 iter: 5 label KQLearning, Reward 35: -104.955, Len(game): 53, Training Time: 2.498s, Prediction Time: 14.328s Computed global error Bellman mean: 1.1194796463118103e-07 iter: 4 Computed global error Bellman mean: 0.08565904938278161 iter: 5 label KQLearning, Reward 36: -137.875, Len(game): 94, Training Time: 2.555s, Prediction Time: 15.170s Computed global error Bellman mean: 0.0014778275554370845 iter: 5 Computed global error Bellman mean: 0.03495880138002345 iter: 5 label KQLearning, Reward 37: -116.011, Len(game): 105, Training Time: 2.628s, Prediction Time: 16.136s Computed global error Bellman mean: 4.1763729385912994e-08 iter: 5 Computed global error Bellman mean: 0.001885933119938559 iter: 5 label KQLearning, Reward 38: -100.451, Len(game): 60, Training Time: 2.673s, Prediction Time: 16.717s Computed global error Bellman mean: 0.1462575863035063 iter: 5 Computed global error Bellman mean: 0.01722092350381151 iter: 5 label KQLearning, Reward 39: -168.628, Len(game): 164, Training Time: 2.953s, Prediction Time: 18.286s Computed global error Bellman mean: 0.0010308075347473908 iter: 5 Computed global error Bellman mean: 0.01722092350381151 iter: 0 label KQLearning, Reward 40: -134.726, Len(game): 123, Training Time: 3.063s, Prediction Time: 19.510s Computed global error Bellman mean: 7.542191039163838e-08 iter: 5 label KQLearning, Reward 41: -177.308, Len(game): 99, Training Time: 3.104s, Prediction Time: 20.523s Computed global error Bellman mean: 2.8866856238707328e-08 iter: 4 label KQLearning, Reward 42: -161.166, Len(game): 82, Training Time: 3.133s, Prediction Time: 21.389s Computed global error Bellman mean: 2.397640513347026e-07 iter: 4 label KQLearning, Reward 43: -333.739, Len(game): 114, Training Time: 3.177s, Prediction Time: 22.620s Computed global error Bellman mean: 0.07508408132661465 iter: 5 Computed global error Bellman mean: 0.039524670065325604 iter: 5 label KQLearning, Reward 44: -126.057, Len(game): 55, Training Time: 3.220s, Prediction Time: 23.243s Computed global error Bellman mean: 0.021675287415418642 iter: 5 Computed global error Bellman mean: 0.023099582157161767 iter: 5 Computed global error Bellman mean: 0.02441240894226758 iter: 3 label KQLearning, Reward 45: -217.480, Len(game): 102, Training Time: 3.347s, Prediction Time: 24.386s Computed global error Bellman mean: 0.8280139871538977 iter: 5 Computed global error Bellman mean: 0.0030895465786024735 iter: 5 Computed global error Bellman mean: 0.8459046706688572 iter: 0 label KQLearning, Reward 46: -176.128, Len(game): 116, Training Time: 3.451s, Prediction Time: 25.716s Computed global error Bellman mean: 3.6333881026195065e-07 iter: 5 label KQLearning, Reward 47: -412.807, Len(game): 126, Training Time: 3.506s, Prediction Time: 27.205s Computed global error Bellman mean: 0.022067067426577363 iter: 5 Computed global error Bellman mean: 0.05577230577725157 iter: 1 label KQLearning, Reward 48: -137.337, Len(game): 70, Training Time: 3.551s, Prediction Time: 28.055s Computed global error Bellman mean: 4.629390375805136e-08 iter: 4 label KQLearning, Reward 49: -246.877, Len(game): 84, Training Time: 3.579s, Prediction Time: 29.101s Computed global error Bellman mean: 1.1653079550570192e-07 iter: 5 label KQLearning, Reward 50: -121.362, Len(game): 90, Training Time: 3.618s, Prediction Time: 30.220s Computed global error Bellman mean: 2.223875292330289e-07 iter: 4 label KQLearning, Reward 51: -205.546, Len(game): 112, Training Time: 3.668s, Prediction Time: 31.655s Computed global error Bellman mean: 0.00015011166943613467 iter: 5 label KQLearning, Reward 52: -277.724, Len(game): 217, Training Time: 3.844s, Prediction Time: 34.494s Computed global error Bellman mean: 0.8212383004785393 iter: 1 Computed global error Bellman mean: 0.8580645610499036 iter: 0 label KQLearning, Reward 53: -156.637, Len(game): 127, Training Time: 3.904s, Prediction Time: 36.234s Computed global error Bellman mean: 2.033127582415023e-07 iter: 4 label KQLearning, Reward 54: -191.852, Len(game): 82, Training Time: 3.934s, Prediction Time: 37.392s Computed global error Bellman mean: 0.00020429838190502467 iter: 5 label KQLearning, Reward 55: -199.740, Len(game): 236, Training Time: 4.145s, Prediction Time: 40.729s Computed global error Bellman mean: 1.3120410218598572e-08 iter: 3 label KQLearning, Reward 56: -111.694, Len(game): 56, Training Time: 4.164s, Prediction Time: 41.569s Computed global error Bellman mean: 0.00439517314175621 iter: 5 label KQLearning, Reward 57: -106.712, Len(game): 201, Training Time: 4.326s, Prediction Time: 44.569s Computed global error Bellman mean: 0.31998081255164346 iter: 5 Computed global error Bellman mean: 0.2997701749506863 iter: 1 label KQLearning, Reward 58: -141.385, Len(game): 76, Training Time: 4.384s, Prediction Time: 45.745s Computed global error Bellman mean: 0.8587894065022125 iter: 1 Computed global error Bellman mean: 0.2997701749506863 iter: 0 Computed global error Bellman mean: 0.9609382850441174 iter: 0 label KQLearning, Reward 59: -126.471, Len(game): 90, Training Time: 4.439s, Prediction Time: 47.134s Computed global error Bellman mean: 0.13265114924030735 iter: 5 Computed global error Bellman mean: 0.0851737249958913 iter: 5 label KQLearning, Reward 60: -158.298, Len(game): 65, Training Time: 4.496s, Prediction Time: 48.172s Computed global error Bellman mean: 1.8792662783272623e-07 iter: 5 Computed global error Bellman mean: 0.04316964766155983 iter: 5 label KQLearning, Reward 61: -128.194, Len(game): 87, Training Time: 4.568s, Prediction Time: 49.567s Computed global error Bellman mean: 5.014585652317337e-07 iter: 5 Computed global error Bellman mean: 0.012240207265594015 iter: 5 label KQLearning, Reward 62: -20.612, Len(game): 129, Training Time: 4.663s, Prediction Time: 51.667s Computed global error Bellman mean: 0.0002394081257820467 iter: 5 Computed global error Bellman mean: 0.01220292199060476 iter: 3 label KQLearning, Reward 63: -114.193, Len(game): 89, Training Time: 4.734s, Prediction Time: 53.146s Computed global error Bellman mean: 0.002011721584875883 iter: 5 label KQLearning, Reward 64: -161.212, Len(game): 223, Training Time: 4.922s, Prediction Time: 56.871s Computed global error Bellman mean: 0.0010813878500443924 iter: 5 label KQLearning, Reward 65: -43.540, Len(game): 86, Training Time: 4.954s, Prediction Time: 58.373s Computed global error Bellman mean: 0.041838648664400034 iter: 5 Computed global error Bellman mean: 0.021678195969673072 iter: 3 label KQLearning, Reward 66: -76.536, Len(game): 88, Training Time: 5.033s, Prediction Time: 59.912s Computed global error Bellman mean: 3.1878030651204306e-07 iter: 5 Computed global error Bellman mean: 0.021678195969673072 iter: 0 label KQLearning, Reward 67: -329.870, Len(game): 116, Training Time: 5.105s, Prediction Time: 61.973s Computed global error Bellman mean: 2.8089624420491666e-07 iter: 4 label KQLearning, Reward 68: -110.082, Len(game): 145, Training Time: 5.170s, Prediction Time: 64.594s Computed global error Bellman mean: 4.9160521471762124e-05 iter: 5 label KQLearning, Reward 69: -112.219, Len(game): 67, Training Time: 5.200s, Prediction Time: 65.840s Computed global error Bellman mean: 0.01302253247801825 iter: 5 Computed global error Bellman mean: 0.03073940390872866 iter: 0 label KQLearning, Reward 70: -165.107, Len(game): 125, Training Time: 5.287s, Prediction Time: 68.142s Computed global error Bellman mean: 1.450390678825464e-07 iter: 5 label KQLearning, Reward 71: -77.585, Len(game): 100, Training Time: 5.333s, Prediction Time: 70.012s Computed global error Bellman mean: 0.002150057297677937 iter: 5 label KQLearning, Reward 72: -99.093, Len(game): 89, Training Time: 5.370s, Prediction Time: 71.710s Computed global error Bellman mean: 1.5736717288337026e-07 iter: 5 label KQLearning, Reward 73: -53.694, Len(game): 95, Training Time: 5.413s, Prediction Time: 73.544s Computed global error Bellman mean: 0.10379776490011078 iter: 5 Computed global error Bellman mean: 0.03528469701228075 iter: 5 label KQLearning, Reward 74: -346.314, Len(game): 123, Training Time: 5.568s, Prediction Time: 75.974s Computed global error Bellman mean: 1.2592304101800802e-07 iter: 4 Computed global error Bellman mean: 0.03528469701228075 iter: 0 label KQLearning, Reward 75: -155.581, Len(game): 66, Training Time: 5.618s, Prediction Time: 77.293s Computed global error Bellman mean: 4.4272656867935096e-07 iter: 5 label KQLearning, Reward 76: -37.157, Len(game): 134, Training Time: 5.689s, Prediction Time: 79.989s Computed global error Bellman mean: 0.0035638326415961073 iter: 5 label KQLearning, Reward 77: -366.402, Len(game): 100, Training Time: 5.733s, Prediction Time: 82.029s Computed global error Bellman mean: 9.57788216707127e-08 iter: 5 label KQLearning, Reward 78: -100.144, Len(game): 58, Training Time: 5.756s, Prediction Time: 83.246s Computed global error Bellman mean: 1.4021770559091706e-07 iter: 5 label KQLearning, Reward 79: -194.275, Len(game): 100, Training Time: 5.801s, Prediction Time: 85.323s Computed global error Bellman mean: 0.001419789484288436 iter: 5 label KQLearning, Reward 80: -263.120, Len(game): 138, Training Time: 5.872s, Prediction Time: 88.233s Computed global error Bellman mean: 3.0411055276608776e-07 iter: 5 label KQLearning, Reward 81: -81.549, Len(game): 137, Training Time: 5.945s, Prediction Time: 91.170s Computed global error Bellman mean: 6.369757501627322e-08 iter: 5 label KQLearning, Reward 82: -120.753, Len(game): 65, Training Time: 5.973s, Prediction Time: 92.603s Computed global error Bellman mean: 3.226556651880328e-07 iter: 5 label KQLearning, Reward 83: -122.095, Len(game): 102, Training Time: 6.020s, Prediction Time: 94.849s Computed global error Bellman mean: 3.7966944364115074e-07 iter: 4 label KQLearning, Reward 84: -148.674, Len(game): 128, Training Time: 6.072s, Prediction Time: 97.709s Computed global error Bellman mean: 0.02860698640944178 iter: 5 Computed global error Bellman mean: 0.018572779889972066 iter: 4 label KQLearning, Reward 85: -176.882, Len(game): 186, Training Time: 6.440s, Prediction Time: 101.923s Computed global error Bellman mean: 0.763425499234148 iter: 3 Computed global error Bellman mean: 0.018572779889972066 iter: 0 Computed global error Bellman mean: 0.7819904862272558 iter: 0 label KQLearning, Reward 86: -139.821, Len(game): 107, Training Time: 6.569s, Prediction Time: 104.375s Computed global error Bellman mean: 1.9305178000519866e-07 iter: 4 label KQLearning, Reward 87: -89.087, Len(game): 93, Training Time: 6.606s, Prediction Time: 106.550s Computed global error Bellman mean: 5.592149356914433e-07 iter: 4 label KQLearning, Reward 88: -172.517, Len(game): 97, Training Time: 6.643s, Prediction Time: 108.857s Computed global error Bellman mean: 4.684396435051234 iter: 0 Computed global error Bellman mean: 0.09762563230220624 iter: 5 label KQLearning, Reward 89: -172.966, Len(game): 99, Training Time: 6.729s, Prediction Time: 111.230s Computed global error Bellman mean: 1.2812629991544435e-07 iter: 5 Computed global error Bellman mean: 0.021922939016299922 iter: 5 label KQLearning, Reward 90: -30.679, Len(game): 63, Training Time: 6.814s, Prediction Time: 112.767s Computed global error Bellman mean: 1.558713637569061e-07 iter: 5 Computed global error Bellman mean: 0.02126550307632721 iter: 2 label KQLearning, Reward 91: -109.270, Len(game): 90, Training Time: 6.894s, Prediction Time: 114.976s Computed global error Bellman mean: 1.8942307178829204e-07 iter: 4 label KQLearning, Reward 92: -76.169, Len(game): 71, Training Time: 6.919s, Prediction Time: 116.750s Computed global error Bellman mean: 4.7346803379463134e-07 iter: 5 label KQLearning, Reward 93: -168.278, Len(game): 137, Training Time: 6.996s, Prediction Time: 120.165s Computed global error Bellman mean: 1.5971887492236893e-07 iter: 5 label KQLearning, Reward 94: -70.672, Len(game): 77, Training Time: 7.030s, Prediction Time: 122.115s Computed global error Bellman mean: 1.5335284951487005e-07 iter: 5 label KQLearning, Reward 95: -98.835, Len(game): 102, Training Time: 7.079s, Prediction Time: 124.702s Computed global error Bellman mean: 5.951184342647667e-07 iter: 4 label KQLearning, Reward 96: -2.054, Len(game): 103, Training Time: 7.121s, Prediction Time: 127.361s Computed global error Bellman mean: 4.232589460560222e-07 iter: 5 label KQLearning, Reward 97: -28.490, Len(game): 92, Training Time: 7.164s, Prediction Time: 129.775s Computed global error Bellman mean: 3.9706448843362365e-07 iter: 4 label KQLearning, Reward 98: -190.312, Len(game): 145, Training Time: 7.228s, Prediction Time: 133.584s Computed global error Bellman mean: 1.3688804971793133e-07 iter: 5 label KQLearning, Reward 99: -56.519, Len(game): 87, Training Time: 7.266s, Prediction Time: 135.903s 0 label PPOAgent, Reward 0: -272.693, Len(game): 75, Training Time: 0.026s, Prediction Time: 0.026s label PPOAgent, Reward 1: -326.853, Len(game): 91, Training Time: 0.052s, Prediction Time: 0.052s label PPOAgent, Reward 2: -156.853, Len(game): 97, Training Time: 0.082s, Prediction Time: 0.082s label PPOAgent, Reward 3: -351.866, Len(game): 94, Training Time: 0.109s, Prediction Time: 0.109s label PPOAgent, Reward 4: -31.552, Len(game): 86, Training Time: 0.136s, Prediction Time: 0.136s label PPOAgent, Reward 5: -484.713, Len(game): 104, Training Time: 0.168s, Prediction Time: 0.168s label PPOAgent, Reward 6: -170.297, Len(game): 63, Training Time: 0.186s, Prediction Time: 0.186s label PPOAgent, Reward 7: -73.148, Len(game): 61, Training Time: 0.202s, Prediction Time: 0.202s label PPOAgent, Reward 8: -359.462, Len(game): 86, Training Time: 0.406s, Prediction Time: 0.406s label PPOAgent, Reward 9: -125.669, Len(game): 75, Training Time: 0.426s, Prediction Time: 0.426s label PPOAgent, Reward 10: -291.011, Len(game): 85, Training Time: 0.448s, Prediction Time: 0.448s label PPOAgent, Reward 11: -122.909, Len(game): 108, Training Time: 0.476s, Prediction Time: 0.476s label PPOAgent, Reward 12: -407.708, Len(game): 85, Training Time: 0.498s, Prediction Time: 0.498s label PPOAgent, Reward 13: -88.640, Len(game): 81, Training Time: 0.518s, Prediction Time: 0.518s label PPOAgent, Reward 14: -137.984, Len(game): 97, Training Time: 0.588s, Prediction Time: 0.588s label PPOAgent, Reward 15: -98.901, Len(game): 94, Training Time: 0.617s, Prediction Time: 0.617s label PPOAgent, Reward 16: -115.273, Len(game): 61, Training Time: 0.636s, Prediction Time: 0.636s label PPOAgent, Reward 17: -118.381, Len(game): 70, Training Time: 0.658s, Prediction Time: 0.658s label PPOAgent, Reward 18: -200.027, Len(game): 116, Training Time: 0.694s, Prediction Time: 0.694s label PPOAgent, Reward 19: -270.885, Len(game): 73, Training Time: 0.715s, Prediction Time: 0.715s label PPOAgent, Reward 20: -109.285, Len(game): 75, Training Time: 0.738s, Prediction Time: 0.738s label PPOAgent, Reward 21: -97.132, Len(game): 61, Training Time: 0.754s, Prediction Time: 0.754s label PPOAgent, Reward 22: -138.481, Len(game): 61, Training Time: 0.771s, Prediction Time: 0.771s label PPOAgent, Reward 23: -218.267, Len(game): 92, Training Time: 0.795s, Prediction Time: 0.795s label PPOAgent, Reward 24: -45.480, Len(game): 61, Training Time: 0.812s, Prediction Time: 0.812s label PPOAgent, Reward 25: -297.662, Len(game): 85, Training Time: 0.835s, Prediction Time: 0.835s label PPOAgent, Reward 26: -133.804, Len(game): 70, Training Time: 0.853s, Prediction Time: 0.853s label PPOAgent, Reward 27: -75.677, Len(game): 58, Training Time: 0.869s, Prediction Time: 0.869s label PPOAgent, Reward 28: -160.155, Len(game): 69, Training Time: 0.887s, Prediction Time: 0.887s label PPOAgent, Reward 29: -144.494, Len(game): 87, Training Time: 0.952s, Prediction Time: 0.952s label PPOAgent, Reward 30: -282.363, Len(game): 111, Training Time: 0.985s, Prediction Time: 0.985s label PPOAgent, Reward 31: -96.992, Len(game): 67, Training Time: 1.005s, Prediction Time: 1.005s label PPOAgent, Reward 32: -176.616, Len(game): 66, Training Time: 1.026s, Prediction Time: 1.026s label PPOAgent, Reward 33: -148.627, Len(game): 69, Training Time: 1.048s, Prediction Time: 1.048s label PPOAgent, Reward 34: -115.371, Len(game): 95, Training Time: 1.078s, Prediction Time: 1.078s label PPOAgent, Reward 35: -155.800, Len(game): 89, Training Time: 1.104s, Prediction Time: 1.104s label PPOAgent, Reward 36: -74.617, Len(game): 63, Training Time: 1.122s, Prediction Time: 1.122s label PPOAgent, Reward 37: -99.922, Len(game): 69, Training Time: 1.141s, Prediction Time: 1.141s label PPOAgent, Reward 38: -133.277, Len(game): 96, Training Time: 1.166s, Prediction Time: 1.166s label PPOAgent, Reward 39: -116.466, Len(game): 93, Training Time: 1.192s, Prediction Time: 1.192s label PPOAgent, Reward 40: 6.117, Len(game): 76, Training Time: 1.213s, Prediction Time: 1.213s label PPOAgent, Reward 41: -98.545, Len(game): 103, Training Time: 1.240s, Prediction Time: 1.240s label PPOAgent, Reward 42: -131.679, Len(game): 87, Training Time: 1.263s, Prediction Time: 1.263s label PPOAgent, Reward 43: -99.885, Len(game): 86, Training Time: 1.285s, Prediction Time: 1.285s label PPOAgent, Reward 44: -173.818, Len(game): 90, Training Time: 1.351s, Prediction Time: 1.351s label PPOAgent, Reward 45: -140.466, Len(game): 85, Training Time: 1.377s, Prediction Time: 1.377s label PPOAgent, Reward 46: -101.473, Len(game): 97, Training Time: 1.406s, Prediction Time: 1.406s label PPOAgent, Reward 47: -95.563, Len(game): 79, Training Time: 1.431s, Prediction Time: 1.431s label PPOAgent, Reward 48: -61.422, Len(game): 62, Training Time: 1.452s, Prediction Time: 1.452s label PPOAgent, Reward 49: -154.342, Len(game): 90, Training Time: 1.481s, Prediction Time: 1.481s label PPOAgent, Reward 50: -97.530, Len(game): 62, Training Time: 1.500s, Prediction Time: 1.500s label PPOAgent, Reward 51: -138.518, Len(game): 107, Training Time: 1.528s, Prediction Time: 1.528s label PPOAgent, Reward 52: -0.738, Len(game): 112, Training Time: 1.556s, Prediction Time: 1.556s label PPOAgent, Reward 53: -85.017, Len(game): 55, Training Time: 1.571s, Prediction Time: 1.571s label PPOAgent, Reward 54: -99.419, Len(game): 63, Training Time: 1.588s, Prediction Time: 1.588s label PPOAgent, Reward 55: -93.208, Len(game): 89, Training Time: 1.611s, Prediction Time: 1.611s label PPOAgent, Reward 56: -117.814, Len(game): 64, Training Time: 1.628s, Prediction Time: 1.628s label PPOAgent, Reward 57: -352.768, Len(game): 119, Training Time: 1.659s, Prediction Time: 1.659s label PPOAgent, Reward 58: -111.589, Len(game): 88, Training Time: 1.722s, Prediction Time: 1.722s label PPOAgent, Reward 59: -117.000, Len(game): 64, Training Time: 1.742s, Prediction Time: 1.742s label PPOAgent, Reward 60: -93.238, Len(game): 72, Training Time: 1.764s, Prediction Time: 1.764s label PPOAgent, Reward 61: -101.310, Len(game): 87, Training Time: 1.790s, Prediction Time: 1.790s label PPOAgent, Reward 62: -82.012, Len(game): 77, Training Time: 1.814s, Prediction Time: 1.814s label PPOAgent, Reward 63: -152.909, Len(game): 116, Training Time: 1.848s, Prediction Time: 1.848s label PPOAgent, Reward 64: -128.238, Len(game): 96, Training Time: 1.879s, Prediction Time: 1.879s label PPOAgent, Reward 65: -127.991, Len(game): 74, Training Time: 1.899s, Prediction Time: 1.899s label PPOAgent, Reward 66: -80.294, Len(game): 61, Training Time: 1.914s, Prediction Time: 1.914s label PPOAgent, Reward 67: -117.948, Len(game): 70, Training Time: 1.931s, Prediction Time: 1.931s label PPOAgent, Reward 68: -81.239, Len(game): 74, Training Time: 1.950s, Prediction Time: 1.950s label PPOAgent, Reward 69: -120.795, Len(game): 101, Training Time: 1.976s, Prediction Time: 1.976s label PPOAgent, Reward 70: -425.281, Len(game): 132, Training Time: 2.010s, Prediction Time: 2.010s label PPOAgent, Reward 71: -122.421, Len(game): 106, Training Time: 2.038s, Prediction Time: 2.038s label PPOAgent, Reward 72: -108.586, Len(game): 76, Training Time: 2.100s, Prediction Time: 2.100s label PPOAgent, Reward 73: 7.097, Len(game): 91, Training Time: 2.128s, Prediction Time: 2.128s label PPOAgent, Reward 74: -111.053, Len(game): 119, Training Time: 2.163s, Prediction Time: 2.163s label PPOAgent, Reward 75: -65.215, Len(game): 57, Training Time: 2.180s, Prediction Time: 2.180s label PPOAgent, Reward 76: -89.396, Len(game): 67, Training Time: 2.202s, Prediction Time: 2.202s label PPOAgent, Reward 77: -91.236, Len(game): 87, Training Time: 2.229s, Prediction Time: 2.229s label PPOAgent, Reward 78: -104.544, Len(game): 73, Training Time: 2.252s, Prediction Time: 2.252s label PPOAgent, Reward 79: -112.192, Len(game): 56, Training Time: 2.266s, Prediction Time: 2.266s label PPOAgent, Reward 80: -172.109, Len(game): 99, Training Time: 2.292s, Prediction Time: 2.292s label PPOAgent, Reward 81: -132.582, Len(game): 94, Training Time: 2.316s, Prediction Time: 2.316s label PPOAgent, Reward 82: -123.629, Len(game): 96, Training Time: 2.341s, Prediction Time: 2.341s label PPOAgent, Reward 83: -69.742, Len(game): 70, Training Time: 2.359s, Prediction Time: 2.359s label PPOAgent, Reward 84: -86.067, Len(game): 62, Training Time: 2.375s, Prediction Time: 2.375s label PPOAgent, Reward 85: -127.144, Len(game): 98, Training Time: 2.401s, Prediction Time: 2.401s label PPOAgent, Reward 86: -162.282, Len(game): 68, Training Time: 2.419s, Prediction Time: 2.419s label PPOAgent, Reward 87: -259.057, Len(game): 89, Training Time: 2.482s, Prediction Time: 2.482s label PPOAgent, Reward 88: -95.300, Len(game): 111, Training Time: 2.516s, Prediction Time: 2.516s label PPOAgent, Reward 89: -422.531, Len(game): 81, Training Time: 2.540s, Prediction Time: 2.540s label PPOAgent, Reward 90: -264.879, Len(game): 100, Training Time: 2.571s, Prediction Time: 2.571s label PPOAgent, Reward 91: 31.986, Len(game): 80, Training Time: 2.595s, Prediction Time: 2.595s label PPOAgent, Reward 92: -128.386, Len(game): 86, Training Time: 2.623s, Prediction Time: 2.623s label PPOAgent, Reward 93: -204.219, Len(game): 97, Training Time: 2.651s, Prediction Time: 2.651s label PPOAgent, Reward 94: -197.231, Len(game): 82, Training Time: 2.672s, Prediction Time: 2.672s label PPOAgent, Reward 95: -228.226, Len(game): 96, Training Time: 2.697s, Prediction Time: 2.697s label PPOAgent, Reward 96: -117.467, Len(game): 82, Training Time: 2.719s, Prediction Time: 2.719s label PPOAgent, Reward 97: -101.354, Len(game): 63, Training Time: 2.735s, Prediction Time: 2.735s label PPOAgent, Reward 98: -180.603, Len(game): 110, Training Time: 2.763s, Prediction Time: 2.763s label PPOAgent, Reward 99: -68.018, Len(game): 60, Training Time: 2.779s, Prediction Time: 2.779s label Controller-based, Reward 0: -109.187, Len(game): 78, Training Time: 0.002s, Prediction Time: 0.002s label Controller-based, Reward 1: -145.415, Len(game): 58, Training Time: 0.004s, Prediction Time: 0.003s label Controller-based, Reward 2: -87.215, Len(game): 65, Training Time: 0.013s, Prediction Time: 0.004s label Controller-based, Reward 3: -328.020, Len(game): 1882, Training Time: 0.049s, Prediction Time: 0.060s label Controller-based, Reward 4: -272.301, Len(game): 325, Training Time: 0.064s, Prediction Time: 0.069s label Controller-based, Reward 5: -22.272, Len(game): 59, Training Time: 0.075s, Prediction Time: 0.071s label Controller-based, Reward 6: -117.033, Len(game): 66, Training Time: 0.087s, Prediction Time: 0.073s label Controller-based, Reward 7: -108.191, Len(game): 70, Training Time: 0.097s, Prediction Time: 0.074s label Controller-based, Reward 8: 5.976, Len(game): 100, Training Time: 0.109s, Prediction Time: 0.078s label Controller-based, Reward 9: 249.708, Len(game): 198, Training Time: 0.123s, Prediction Time: 0.084s label Controller-based, Reward 10: -127.574, Len(game): 155, Training Time: 0.137s, Prediction Time: 0.088s label Controller-based, Reward 11: -108.222, Len(game): 78, Training Time: 0.149s, Prediction Time: 0.091s label Controller-based, Reward 12: -164.595, Len(game): 118, Training Time: 0.161s, Prediction Time: 0.095s label Controller-based, Reward 13: -381.053, Len(game): 90, Training Time: 0.174s, Prediction Time: 0.098s label Controller-based, Reward 14: -66.826, Len(game): 84, Training Time: 0.187s, Prediction Time: 0.101s label Controller-based, Reward 15: -46.173, Len(game): 80, Training Time: 0.199s, Prediction Time: 0.103s label Controller-based, Reward 16: -73.804, Len(game): 113, Training Time: 0.214s, Prediction Time: 0.108s label Controller-based, Reward 17: -28.463, Len(game): 91, Training Time: 0.228s, Prediction Time: 0.110s label Controller-based, Reward 18: -5.673, Len(game): 119, Training Time: 0.242s, Prediction Time: 0.113s label Controller-based, Reward 19: -321.711, Len(game): 89, Training Time: 0.255s, Prediction Time: 0.117s label Controller-based, Reward 20: -269.180, Len(game): 126, Training Time: 0.270s, Prediction Time: 0.120s label Controller-based, Reward 21: -308.091, Len(game): 159, Training Time: 0.284s, Prediction Time: 0.124s label Controller-based, Reward 22: -298.844, Len(game): 87, Training Time: 0.298s, Prediction Time: 0.128s label Controller-based, Reward 23: -275.690, Len(game): 95, Training Time: 0.312s, Prediction Time: 0.129s label Controller-based, Reward 24: -386.044, Len(game): 100, Training Time: 0.326s, Prediction Time: 0.133s label Controller-based, Reward 25: -375.524, Len(game): 104, Training Time: 0.339s, Prediction Time: 0.136s label Controller-based, Reward 26: -314.856, Len(game): 103, Training Time: 0.353s, Prediction Time: 0.139s label Controller-based, Reward 27: -313.109, Len(game): 115, Training Time: 0.368s, Prediction Time: 0.142s label Controller-based, Reward 28: -306.189, Len(game): 96, Training Time: 0.381s, Prediction Time: 0.144s label Controller-based, Reward 29: -371.251, Len(game): 150, Training Time: 0.397s, Prediction Time: 0.149s label Controller-based, Reward 30: -195.533, Len(game): 168, Training Time: 0.411s, Prediction Time: 0.153s label Controller-based, Reward 31: -27.537, Len(game): 123, Training Time: 0.426s, Prediction Time: 0.156s label Controller-based, Reward 32: -307.507, Len(game): 144, Training Time: 0.441s, Prediction Time: 0.160s label Controller-based, Reward 33: -318.105, Len(game): 98, Training Time: 0.455s, Prediction Time: 0.162s label Controller-based, Reward 34: -321.512, Len(game): 93, Training Time: 0.468s, Prediction Time: 0.165s label Controller-based, Reward 35: -45.163, Len(game): 68, Training Time: 0.482s, Prediction Time: 0.168s label Controller-based, Reward 36: -307.871, Len(game): 92, Training Time: 0.497s, Prediction Time: 0.170s label Controller-based, Reward 37: -123.997, Len(game): 164, Training Time: 0.514s, Prediction Time: 0.175s label Controller-based, Reward 38: -450.676, Len(game): 97, Training Time: 0.527s, Prediction Time: 0.178s label Controller-based, Reward 39: -324.244, Len(game): 113, Training Time: 0.541s, Prediction Time: 0.181s label Controller-based, Reward 40: -296.305, Len(game): 96, Training Time: 0.555s, Prediction Time: 0.184s label Controller-based, Reward 41: -303.796, Len(game): 100, Training Time: 0.569s, Prediction Time: 0.186s label Controller-based, Reward 42: -469.271, Len(game): 99, Training Time: 0.584s, Prediction Time: 0.189s label Controller-based, Reward 43: -258.104, Len(game): 111, Training Time: 0.598s, Prediction Time: 0.191s label Controller-based, Reward 44: -164.094, Len(game): 105, Training Time: 0.612s, Prediction Time: 0.194s label Controller-based, Reward 45: -336.855, Len(game): 148, Training Time: 0.626s, Prediction Time: 0.197s label Controller-based, Reward 46: -307.578, Len(game): 91, Training Time: 0.640s, Prediction Time: 0.199s label Controller-based, Reward 47: 43.214, Len(game): 135, Training Time: 0.655s, Prediction Time: 0.204s label Controller-based, Reward 48: -314.221, Len(game): 162, Training Time: 0.671s, Prediction Time: 0.209s label Controller-based, Reward 49: -159.623, Len(game): 140, Training Time: 0.686s, Prediction Time: 0.213s label Controller-based, Reward 50: 240.217, Len(game): 214, Training Time: 0.701s, Prediction Time: 0.219s label Controller-based, Reward 51: -306.454, Len(game): 103, Training Time: 0.715s, Prediction Time: 0.222s label Controller-based, Reward 52: 283.703, Len(game): 262, Training Time: 0.729s, Prediction Time: 0.230s label Controller-based, Reward 53: -117.697, Len(game): 207, Training Time: 0.745s, Prediction Time: 0.235s label Controller-based, Reward 54: 28.239, Len(game): 113, Training Time: 0.761s, Prediction Time: 0.238s label Controller-based, Reward 55: -109.534, Len(game): 112, Training Time: 0.776s, Prediction Time: 0.241s label Controller-based, Reward 56: -270.764, Len(game): 124, Training Time: 0.791s, Prediction Time: 0.244s label Controller-based, Reward 57: -105.658, Len(game): 81, Training Time: 0.804s, Prediction Time: 0.247s label Controller-based, Reward 58: -290.051, Len(game): 141, Training Time: 0.819s, Prediction Time: 0.250s label Controller-based, Reward 59: -344.216, Len(game): 103, Training Time: 0.832s, Prediction Time: 0.253s label Controller-based, Reward 60: 6.544, Len(game): 115, Training Time: 0.847s, Prediction Time: 0.256s label Controller-based, Reward 61: -42.887, Len(game): 92, Training Time: 0.861s, Prediction Time: 0.258s label Controller-based, Reward 62: -252.574, Len(game): 121, Training Time: 0.875s, Prediction Time: 0.261s label Controller-based, Reward 63: -323.205, Len(game): 103, Training Time: 0.890s, Prediction Time: 0.264s label Controller-based, Reward 64: -239.030, Len(game): 77, Training Time: 0.903s, Prediction Time: 0.267s label Controller-based, Reward 65: -205.818, Len(game): 69, Training Time: 0.917s, Prediction Time: 0.270s label Controller-based, Reward 66: 6.979, Len(game): 98, Training Time: 0.931s, Prediction Time: 0.273s label Controller-based, Reward 67: -311.754, Len(game): 87, Training Time: 0.945s, Prediction Time: 0.275s label Controller-based, Reward 68: -137.941, Len(game): 116, Training Time: 0.959s, Prediction Time: 0.278s label Controller-based, Reward 69: -309.389, Len(game): 121, Training Time: 0.974s, Prediction Time: 0.281s label Controller-based, Reward 70: -338.159, Len(game): 90, Training Time: 0.987s, Prediction Time: 0.284s label Controller-based, Reward 71: -328.075, Len(game): 83, Training Time: 1.001s, Prediction Time: 0.287s label Controller-based, Reward 72: -180.717, Len(game): 94, Training Time: 1.015s, Prediction Time: 0.289s label Controller-based, Reward 73: -313.592, Len(game): 127, Training Time: 1.031s, Prediction Time: 0.292s label Controller-based, Reward 74: -307.018, Len(game): 101, Training Time: 1.044s, Prediction Time: 0.295s label Controller-based, Reward 75: -254.989, Len(game): 99, Training Time: 1.058s, Prediction Time: 0.298s label Controller-based, Reward 76: -305.602, Len(game): 89, Training Time: 1.071s, Prediction Time: 0.300s label Controller-based, Reward 77: -105.594, Len(game): 119, Training Time: 1.085s, Prediction Time: 0.303s label Controller-based, Reward 78: -141.775, Len(game): 201, Training Time: 1.100s, Prediction Time: 0.308s label Controller-based, Reward 79: -315.513, Len(game): 85, Training Time: 1.114s, Prediction Time: 0.310s label Controller-based, Reward 80: -30.117, Len(game): 120, Training Time: 1.128s, Prediction Time: 0.313s label Controller-based, Reward 81: 52.009, Len(game): 125, Training Time: 1.143s, Prediction Time: 0.316s label Controller-based, Reward 82: -318.479, Len(game): 100, Training Time: 1.157s, Prediction Time: 0.319s label Controller-based, Reward 83: -295.388, Len(game): 92, Training Time: 1.170s, Prediction Time: 0.321s label Controller-based, Reward 84: 232.479, Len(game): 212, Training Time: 1.186s, Prediction Time: 0.328s label Controller-based, Reward 85: -19.038, Len(game): 108, Training Time: 1.201s, Prediction Time: 0.332s label Controller-based, Reward 86: -93.804, Len(game): 73, Training Time: 1.214s, Prediction Time: 0.335s label Controller-based, Reward 87: -253.262, Len(game): 107, Training Time: 1.228s, Prediction Time: 0.338s label Controller-based, Reward 88: -306.118, Len(game): 103, Training Time: 1.243s, Prediction Time: 0.340s label Controller-based, Reward 89: -355.999, Len(game): 152, Training Time: 1.257s, Prediction Time: 0.345s label Controller-based, Reward 90: -342.132, Len(game): 131, Training Time: 1.270s, Prediction Time: 0.349s label Controller-based, Reward 91: 9.256, Len(game): 107, Training Time: 1.285s, Prediction Time: 0.352s label Controller-based, Reward 92: -300.264, Len(game): 193, Training Time: 1.301s, Prediction Time: 0.358s label Controller-based, Reward 93: 48.608, Len(game): 105, Training Time: 1.315s, Prediction Time: 0.360s label Controller-based, Reward 94: -311.169, Len(game): 101, Training Time: 1.328s, Prediction Time: 0.363s label Controller-based, Reward 95: 15.571, Len(game): 103, Training Time: 1.343s, Prediction Time: 0.365s label Controller-based, Reward 96: -284.440, Len(game): 99, Training Time: 1.356s, Prediction Time: 0.367s label Controller-based, Reward 97: -210.060, Len(game): 236, Training Time: 1.371s, Prediction Time: 0.373s label Controller-based, Reward 98: -148.800, Len(game): 104, Training Time: 1.385s, Prediction Time: 0.376s label Controller-based, Reward 99: 2.763, Len(game): 109, Training Time: 1.398s, Prediction Time: 0.380s label KACAgent, Reward 0: -96.842, Len(game): 70, Training Time: 0.010s, Prediction Time: 0.001s label KACAgent, Reward 1: -369.215, Len(game): 84, Training Time: 0.019s, Prediction Time: 0.017s label KACAgent, Reward 2: -61.811, Len(game): 123, Training Time: 0.035s, Prediction Time: 0.039s label KACAgent, Reward 3: -153.078, Len(game): 93, Training Time: 0.063s, Prediction Time: 0.061s label KACAgent, Reward 4: -118.294, Len(game): 57, Training Time: 0.113s, Prediction Time: 0.083s label KACAgent, Reward 5: -69.676, Len(game): 108, Training Time: 0.175s, Prediction Time: 0.135s label KACAgent, Reward 6: -48.635, Len(game): 65, Training Time: 0.256s, Prediction Time: 0.174s label KACAgent, Reward 7: -284.986, Len(game): 101, Training Time: 0.347s, Prediction Time: 0.238s label KACAgent, Reward 8: -424.661, Len(game): 155, Training Time: 0.479s, Prediction Time: 0.326s label KACAgent, Reward 9: -323.118, Len(game): 100, Training Time: 0.648s, Prediction Time: 0.414s label KACAgent, Reward 10: -90.706, Len(game): 76, Training Time: 0.862s, Prediction Time: 0.506s label KACAgent, Reward 11: -109.113, Len(game): 95, Training Time: 1.083s, Prediction Time: 0.627s label KACAgent, Reward 12: -72.714, Len(game): 127, Training Time: 1.357s, Prediction Time: 0.784s label KACAgent, Reward 13: -11.326, Len(game): 104, Training Time: 1.704s, Prediction Time: 0.937s label KACAgent, Reward 14: 9.094, Len(game): 116, Training Time: 2.095s, Prediction Time: 1.121s label KACAgent, Reward 15: -107.403, Len(game): 132, Training Time: 2.545s, Prediction Time: 1.352s label KACAgent, Reward 16: -47.898, Len(game): 88, Training Time: 3.076s, Prediction Time: 1.584s label KACAgent, Reward 17: -261.339, Len(game): 85, Training Time: 3.667s, Prediction Time: 1.845s label KACAgent, Reward 18: -102.691, Len(game): 110, Training Time: 4.297s, Prediction Time: 2.147s label KACAgent, Reward 19: -114.236, Len(game): 86, Training Time: 5.023s, Prediction Time: 2.444s label KACAgent, Reward 20: 17.024, Len(game): 81, Training Time: 5.783s, Prediction Time: 2.782s label KACAgent, Reward 21: -309.332, Len(game): 117, Training Time: 6.616s, Prediction Time: 3.179s label KACAgent, Reward 22: -54.321, Len(game): 93, Training Time: 7.521s, Prediction Time: 3.579s label KACAgent, Reward 23: -256.266, Len(game): 104, Training Time: 8.484s, Prediction Time: 4.021s label KACAgent, Reward 24: -276.718, Len(game): 89, Training Time: 9.515s, Prediction Time: 4.499s label KACAgent, Reward 25: -227.532, Len(game): 127, Training Time: 10.628s, Prediction Time: 5.045s label KACAgent, Reward 26: -215.051, Len(game): 116, Training Time: 11.873s, Prediction Time: 5.625s label KACAgent, Reward 27: -69.964, Len(game): 105, Training Time: 13.218s, Prediction Time: 6.265s label KACAgent, Reward 28: -39.724, Len(game): 100, Training Time: 14.688s, Prediction Time: 6.947s label KACAgent, Reward 29: -81.385, Len(game): 101, Training Time: 16.245s, Prediction Time: 7.720s label KACAgent, Reward 30: -82.343, Len(game): 85, Training Time: 17.946s, Prediction Time: 8.529s label KACAgent, Reward 31: -263.379, Len(game): 87, Training Time: 19.784s, Prediction Time: 9.395s label KACAgent, Reward 32: -43.900, Len(game): 127, Training Time: 21.637s, Prediction Time: 10.360s label KACAgent, Reward 33: -39.229, Len(game): 100, Training Time: 23.725s, Prediction Time: 11.306s label KACAgent, Reward 34: -3.636, Len(game): 96, Training Time: 25.901s, Prediction Time: 12.406s label KACAgent, Reward 35: -149.382, Len(game): 143, Training Time: 28.247s, Prediction Time: 13.623s label KACAgent, Reward 36: 3.228, Len(game): 124, Training Time: 30.723s, Prediction Time: 14.857s label KACAgent, Reward 37: -44.013, Len(game): 97, Training Time: 33.358s, Prediction Time: 16.130s label KACAgent, Reward 38: -36.072, Len(game): 92, Training Time: 36.156s, Prediction Time: 17.473s label KACAgent, Reward 39: -9.850, Len(game): 146, Training Time: 39.122s, Prediction Time: 18.980s label KACAgent, Reward 40: -38.324, Len(game): 110, Training Time: 42.274s, Prediction Time: 20.569s label KACAgent, Reward 41: -42.383, Len(game): 89, Training Time: 45.614s, Prediction Time: 22.250s label KACAgent, Reward 42: -27.385, Len(game): 231, Training Time: 49.047s, Prediction Time: 24.259s label KACAgent, Reward 43: -16.779, Len(game): 83, Training Time: 52.967s, Prediction Time: 26.070s label KACAgent, Reward 44: -61.728, Len(game): 81, Training Time: 52.967s, Prediction Time: 28.125s label KACAgent, Reward 45: -27.585, Len(game): 97, Training Time: 52.967s, Prediction Time: 28.272s label KACAgent, Reward 46: -214.309, Len(game): 153, Training Time: 52.967s, Prediction Time: 28.479s label KACAgent, Reward 47: 0.448, Len(game): 99, Training Time: 52.967s, Prediction Time: 28.612s label KACAgent, Reward 48: -21.611, Len(game): 100, Training Time: 52.967s, Prediction Time: 28.746s label KACAgent, Reward 49: -52.920, Len(game): 123, Training Time: 52.967s, Prediction Time: 28.914s label KACAgent, Reward 50: -10.061, Len(game): 104, Training Time: 52.967s, Prediction Time: 29.055s label KACAgent, Reward 51: -43.226, Len(game): 103, Training Time: 52.967s, Prediction Time: 29.195s label KACAgent, Reward 52: 18.740, Len(game): 90, Training Time: 52.967s, Prediction Time: 29.318s label KACAgent, Reward 53: -8.629, Len(game): 98, Training Time: 52.967s, Prediction Time: 29.451s label KACAgent, Reward 54: 1.215, Len(game): 168, Training Time: 52.967s, Prediction Time: 29.679s label KACAgent, Reward 55: -266.347, Len(game): 142, Training Time: 52.967s, Prediction Time: 29.872s label KACAgent, Reward 56: -5.459, Len(game): 104, Training Time: 52.967s, Prediction Time: 30.011s label KACAgent, Reward 57: -160.539, Len(game): 146, Training Time: 52.967s, Prediction Time: 30.211s label KACAgent, Reward 58: -12.529, Len(game): 118, Training Time: 52.967s, Prediction Time: 30.371s label KACAgent, Reward 59: -35.501, Len(game): 120, Training Time: 52.967s, Prediction Time: 30.535s label KACAgent, Reward 60: 19.573, Len(game): 107, Training Time: 52.967s, Prediction Time: 30.679s label KACAgent, Reward 61: -14.930, Len(game): 87, Training Time: 52.967s, Prediction Time: 30.795s label KACAgent, Reward 62: -32.161, Len(game): 100, Training Time: 52.967s, Prediction Time: 30.932s label KACAgent, Reward 63: 7.399, Len(game): 114, Training Time: 52.967s, Prediction Time: 31.085s label KACAgent, Reward 64: -3.288, Len(game): 113, Training Time: 52.967s, Prediction Time: 31.236s label KACAgent, Reward 65: -82.042, Len(game): 125, Training Time: 52.967s, Prediction Time: 31.408s label KACAgent, Reward 66: -53.235, Len(game): 106, Training Time: 52.967s, Prediction Time: 31.552s label KACAgent, Reward 67: -62.717, Len(game): 150, Training Time: 52.967s, Prediction Time: 31.754s label KACAgent, Reward 68: -1.777, Len(game): 151, Training Time: 52.967s, Prediction Time: 31.958s label KACAgent, Reward 69: -43.269, Len(game): 74, Training Time: 52.967s, Prediction Time: 32.058s label KACAgent, Reward 70: -63.784, Len(game): 110, Training Time: 52.967s, Prediction Time: 32.208s label KACAgent, Reward 71: -73.038, Len(game): 69, Training Time: 52.967s, Prediction Time: 32.303s label KACAgent, Reward 72: -68.695, Len(game): 114, Training Time: 52.967s, Prediction Time: 32.457s label KACAgent, Reward 73: 11.010, Len(game): 150, Training Time: 52.967s, Prediction Time: 32.659s label KACAgent, Reward 74: -12.993, Len(game): 103, Training Time: 52.967s, Prediction Time: 32.797s label KACAgent, Reward 75: -432.062, Len(game): 147, Training Time: 52.967s, Prediction Time: 32.997s label KACAgent, Reward 76: -29.239, Len(game): 100, Training Time: 52.967s, Prediction Time: 33.136s label KACAgent, Reward 77: 15.039, Len(game): 157, Training Time: 52.967s, Prediction Time: 33.350s label KACAgent, Reward 78: -60.004, Len(game): 116, Training Time: 52.967s, Prediction Time: 33.512s label KACAgent, Reward 79: 17.875, Len(game): 122, Training Time: 52.967s, Prediction Time: 33.676s label KACAgent, Reward 80: -10.065, Len(game): 160, Training Time: 52.967s, Prediction Time: 33.893s label KACAgent, Reward 81: -18.335, Len(game): 145, Training Time: 52.967s, Prediction Time: 34.091s label KACAgent, Reward 82: -89.590, Len(game): 162, Training Time: 52.967s, Prediction Time: 34.312s label KACAgent, Reward 83: -18.717, Len(game): 130, Training Time: 52.967s, Prediction Time: 34.489s label KACAgent, Reward 84: -73.466, Len(game): 138, Training Time: 52.967s, Prediction Time: 34.677s label KACAgent, Reward 85: 16.586, Len(game): 108, Training Time: 52.967s, Prediction Time: 34.823s label KACAgent, Reward 86: -48.256, Len(game): 110, Training Time: 52.967s, Prediction Time: 34.970s label KACAgent, Reward 87: -101.534, Len(game): 280, Training Time: 52.967s, Prediction Time: 35.348s label KACAgent, Reward 88: -57.668, Len(game): 107, Training Time: 52.967s, Prediction Time: 35.491s label KACAgent, Reward 89: -23.000, Len(game): 147, Training Time: 52.967s, Prediction Time: 35.693s label KACAgent, Reward 90: -11.871, Len(game): 181, Training Time: 52.967s, Prediction Time: 35.940s label KACAgent, Reward 91: -7.514, Len(game): 171, Training Time: 52.967s, Prediction Time: 36.170s label KACAgent, Reward 92: -37.977, Len(game): 111, Training Time: 52.967s, Prediction Time: 36.322s label KACAgent, Reward 93: 6.681, Len(game): 79, Training Time: 52.967s, Prediction Time: 36.429s label KACAgent, Reward 94: -19.688, Len(game): 147, Training Time: 52.967s, Prediction Time: 36.629s label KACAgent, Reward 95: -45.433, Len(game): 150, Training Time: 52.967s, Prediction Time: 36.836s label KACAgent, Reward 96: 20.047, Len(game): 110, Training Time: 52.967s, Prediction Time: 36.984s label KACAgent, Reward 97: -79.894, Len(game): 152, Training Time: 52.967s, Prediction Time: 37.193s label KACAgent, Reward 98: -15.137, Len(game): 95, Training Time: 52.967s, Prediction Time: 37.323s label KACAgent, Reward 99: -51.432, Len(game): 146, Training Time: 52.967s, Prediction Time: 37.521s label PolicyGradient, Reward 0: -393.430, Len(game): 137, Training Time: 0.014s, Prediction Time: 0.003s label PolicyGradient, Reward 1: -75.146, Len(game): 71, Training Time: 0.030s, Prediction Time: 0.022s label PolicyGradient, Reward 2: -152.498, Len(game): 95, Training Time: 0.053s, Prediction Time: 0.046s label PolicyGradient, Reward 3: -324.945, Len(game): 94, Training Time: 0.088s, Prediction Time: 0.076s label PolicyGradient, Reward 4: -104.241, Len(game): 63, Training Time: 0.141s, Prediction Time: 0.101s label PolicyGradient, Reward 5: -105.430, Len(game): 61, Training Time: 0.203s, Prediction Time: 0.136s label PolicyGradient, Reward 6: -229.294, Len(game): 116, Training Time: 0.280s, Prediction Time: 0.196s label PolicyGradient, Reward 7: -67.352, Len(game): 73, Training Time: 0.392s, Prediction Time: 0.244s label PolicyGradient, Reward 8: -65.958, Len(game): 137, Training Time: 0.522s, Prediction Time: 0.344s label PolicyGradient, Reward 9: -92.362, Len(game): 66, Training Time: 0.701s, Prediction Time: 0.413s label PolicyGradient, Reward 10: -264.395, Len(game): 120, Training Time: 0.903s, Prediction Time: 0.534s label PolicyGradient, Reward 11: -80.529, Len(game): 168, Training Time: 1.178s, Prediction Time: 0.687s label PolicyGradient, Reward 12: -215.839, Len(game): 184, Training Time: 1.514s, Prediction Time: 0.885s label PolicyGradient, Reward 13: -211.972, Len(game): 158, Training Time: 1.941s, Prediction Time: 1.099s label PolicyGradient, Reward 14: -241.048, Len(game): 86, Training Time: 2.480s, Prediction Time: 1.287s label PolicyGradient, Reward 15: -45.792, Len(game): 133, Training Time: 3.060s, Prediction Time: 1.562s label PolicyGradient, Reward 16: -62.876, Len(game): 147, Training Time: 3.704s, Prediction Time: 1.872s label PolicyGradient, Reward 17: -206.646, Len(game): 107, Training Time: 4.547s, Prediction Time: 2.198s label PolicyGradient, Reward 18: -113.774, Len(game): 176, Training Time: 5.379s, Prediction Time: 2.633s label PolicyGradient, Reward 19: -161.748, Len(game): 152, Training Time: 6.345s, Prediction Time: 3.064s label PolicyGradient, Reward 20: -164.830, Len(game): 137, Training Time: 7.445s, Prediction Time: 3.559s label PolicyGradient, Reward 21: -81.720, Len(game): 136, Training Time: 8.731s, Prediction Time: 4.108s label PolicyGradient, Reward 22: -316.899, Len(game): 125, Training Time: 10.112s, Prediction Time: 4.727s label PolicyGradient, Reward 23: -135.891, Len(game): 144, Training Time: 11.624s, Prediction Time: 5.447s label PolicyGradient, Reward 24: -25.885, Len(game): 159, Training Time: 13.346s, Prediction Time: 6.258s label PolicyGradient, Reward 25: -97.168, Len(game): 143, Training Time: 15.327s, Prediction Time: 7.130s label PolicyGradient, Reward 26: -181.733, Len(game): 113, Training Time: 17.423s, Prediction Time: 8.051s label PolicyGradient, Reward 27: 13.154, Len(game): 181, Training Time: 19.700s, Prediction Time: 9.114s label PolicyGradient, Reward 28: -77.998, Len(game): 143, Training Time: 22.179s, Prediction Time: 10.268s label PolicyGradient, Reward 29: -64.061, Len(game): 217, Training Time: 24.847s, Prediction Time: 11.585s label PolicyGradient, Reward 30: -18.764, Len(game): 219, Training Time: 27.893s, Prediction Time: 13.003s label PolicyGradient, Reward 31: -255.009, Len(game): 391, Training Time: 31.394s, Prediction Time: 14.789s label PolicyGradient, Reward 32: -351.244, Len(game): 1817, Training Time: 35.562s, Prediction Time: 18.689s label PolicyGradient, Reward 33: -14.374, Len(game): 205, Training Time: 41.989s, Prediction Time: 20.802s label PolicyGradient, Reward 34: -354.272, Len(game): 231, Training Time: 49.651s, Prediction Time: 24.064s label PolicyGradient, Reward 35: -345.703, Len(game): 211, Training Time: 57.217s, Prediction Time: 27.649s label PolicyGradient, Reward 36: -389.583, Len(game): 298, Training Time: 57.217s, Prediction Time: 31.726s label PolicyGradient, Reward 37: -53.407, Len(game): 112, Training Time: 57.217s, Prediction Time: 31.919s label PolicyGradient, Reward 38: -88.474, Len(game): 126, Training Time: 57.217s, Prediction Time: 32.135s label PolicyGradient, Reward 39: -211.173, Len(game): 160, Training Time: 57.217s, Prediction Time: 32.407s label PolicyGradient, Reward 40: -225.880, Len(game): 171, Training Time: 57.217s, Prediction Time: 32.698s label PolicyGradient, Reward 41: -151.298, Len(game): 158, Training Time: 57.217s, Prediction Time: 32.968s label PolicyGradient, Reward 42: -355.336, Len(game): 231, Training Time: 57.217s, Prediction Time: 33.355s label PolicyGradient, Reward 43: -183.964, Len(game): 177, Training Time: 57.217s, Prediction Time: 33.657s label PolicyGradient, Reward 44: -14.213, Len(game): 196, Training Time: 57.217s, Prediction Time: 33.989s label PolicyGradient, Reward 45: -190.849, Len(game): 194, Training Time: 57.217s, Prediction Time: 34.319s label PolicyGradient, Reward 46: -49.239, Len(game): 194, Training Time: 57.217s, Prediction Time: 34.648s label PolicyGradient, Reward 47: -226.348, Len(game): 131, Training Time: 57.217s, Prediction Time: 34.873s label PolicyGradient, Reward 48: 6.076, Len(game): 188, Training Time: 57.217s, Prediction Time: 35.194s label PolicyGradient, Reward 49: -39.302, Len(game): 178, Training Time: 57.217s, Prediction Time: 35.492s label PolicyGradient, Reward 50: -80.477, Len(game): 124, Training Time: 57.217s, Prediction Time: 35.700s label PolicyGradient, Reward 51: -160.085, Len(game): 244, Training Time: 57.217s, Prediction Time: 36.117s label PolicyGradient, Reward 52: -77.668, Len(game): 193, Training Time: 57.217s, Prediction Time: 36.445s label PolicyGradient, Reward 53: -58.755, Len(game): 136, Training Time: 57.217s, Prediction Time: 36.677s label PolicyGradient, Reward 54: -233.658, Len(game): 283, Training Time: 57.217s, Prediction Time: 37.152s label PolicyGradient, Reward 55: -97.790, Len(game): 134, Training Time: 57.217s, Prediction Time: 37.377s label PolicyGradient, Reward 56: -239.818, Len(game): 195, Training Time: 57.217s, Prediction Time: 37.705s label PolicyGradient, Reward 57: 49.956, Len(game): 156, Training Time: 57.217s, Prediction Time: 37.966s label PolicyGradient, Reward 58: -52.775, Len(game): 171, Training Time: 57.217s, Prediction Time: 38.252s label PolicyGradient, Reward 59: -327.191, Len(game): 254, Training Time: 57.217s, Prediction Time: 38.685s label PolicyGradient, Reward 60: -44.443, Len(game): 227, Training Time: 57.217s, Prediction Time: 39.066s label PolicyGradient, Reward 61: 26.450, Len(game): 154, Training Time: 57.217s, Prediction Time: 39.325s label PolicyGradient, Reward 62: -247.901, Len(game): 207, Training Time: 57.217s, Prediction Time: 39.677s label PolicyGradient, Reward 63: -38.375, Len(game): 187, Training Time: 57.217s, Prediction Time: 39.993s label PolicyGradient, Reward 64: -8.211, Len(game): 193, Training Time: 57.217s, Prediction Time: 40.320s label PolicyGradient, Reward 65: -40.982, Len(game): 150, Training Time: 57.217s, Prediction Time: 40.575s label PolicyGradient, Reward 66: -326.341, Len(game): 237, Training Time: 57.217s, Prediction Time: 40.975s label PolicyGradient, Reward 67: -2.344, Len(game): 2000, Training Time: 57.217s, Prediction Time: 44.378s label PolicyGradient, Reward 68: -80.559, Len(game): 272, Training Time: 57.217s, Prediction Time: 44.840s label PolicyGradient, Reward 69: -340.607, Len(game): 160, Training Time: 57.217s, Prediction Time: 45.109s label PolicyGradient, Reward 70: -212.233, Len(game): 233, Training Time: 57.217s, Prediction Time: 45.502s label PolicyGradient, Reward 71: -197.870, Len(game): 212, Training Time: 57.217s, Prediction Time: 45.860s label PolicyGradient, Reward 72: -187.449, Len(game): 187, Training Time: 57.217s, Prediction Time: 46.179s label PolicyGradient, Reward 73: -225.196, Len(game): 207, Training Time: 57.217s, Prediction Time: 46.531s label PolicyGradient, Reward 74: -35.257, Len(game): 195, Training Time: 57.217s, Prediction Time: 46.863s label PolicyGradient, Reward 75: -356.615, Len(game): 159, Training Time: 57.217s, Prediction Time: 47.131s label PolicyGradient, Reward 76: -266.305, Len(game): 205, Training Time: 57.217s, Prediction Time: 47.480s label PolicyGradient, Reward 77: -21.267, Len(game): 186, Training Time: 57.217s, Prediction Time: 47.801s label PolicyGradient, Reward 78: -254.046, Len(game): 161, Training Time: 57.217s, Prediction Time: 48.075s label PolicyGradient, Reward 79: -51.840, Len(game): 181, Training Time: 57.217s, Prediction Time: 48.376s label PolicyGradient, Reward 80: -59.774, Len(game): 257, Training Time: 57.217s, Prediction Time: 48.810s label PolicyGradient, Reward 81: -124.355, Len(game): 197, Training Time: 57.217s, Prediction Time: 49.142s label PolicyGradient, Reward 82: -73.173, Len(game): 154, Training Time: 57.217s, Prediction Time: 49.407s label PolicyGradient, Reward 83: -222.295, Len(game): 189, Training Time: 57.217s, Prediction Time: 49.736s label PolicyGradient, Reward 84: -57.890, Len(game): 201, Training Time: 57.217s, Prediction Time: 50.076s label PolicyGradient, Reward 85: -132.483, Len(game): 121, Training Time: 57.217s, Prediction Time: 50.282s label PolicyGradient, Reward 86: -57.338, Len(game): 120, Training Time: 57.217s, Prediction Time: 50.488s label PolicyGradient, Reward 87: -241.359, Len(game): 204, Training Time: 57.217s, Prediction Time: 50.837s label PolicyGradient, Reward 88: -34.278, Len(game): 163, Training Time: 57.217s, Prediction Time: 51.115s label PolicyGradient, Reward 89: -308.502, Len(game): 342, Training Time: 57.217s, Prediction Time: 51.691s label PolicyGradient, Reward 90: 20.146, Len(game): 146, Training Time: 57.217s, Prediction Time: 51.938s label PolicyGradient, Reward 91: -83.766, Len(game): 117, Training Time: 57.217s, Prediction Time: 52.138s label PolicyGradient, Reward 92: -33.971, Len(game): 213, Training Time: 57.217s, Prediction Time: 52.498s label PolicyGradient, Reward 93: -2.186, Len(game): 211, Training Time: 57.217s, Prediction Time: 52.854s label PolicyGradient, Reward 94: -207.453, Len(game): 172, Training Time: 57.217s, Prediction Time: 53.148s label PolicyGradient, Reward 95: 1.965, Len(game): 162, Training Time: 57.217s, Prediction Time: 53.427s label PolicyGradient, Reward 96: -299.235, Len(game): 109, Training Time: 57.217s, Prediction Time: 53.613s label PolicyGradient, Reward 97: -56.088, Len(game): 125, Training Time: 57.217s, Prediction Time: 53.826s label PolicyGradient, Reward 98: 8.456, Len(game): 150, Training Time: 57.217s, Prediction Time: 54.081s label PolicyGradient, Reward 99: -67.207, Len(game): 126, Training Time: 57.217s, Prediction Time: 54.292s label DQNAgent, Reward 0: -32.345, Len(game): 76, Training Time: 0.075s, Prediction Time: 0.003s label DQNAgent, Reward 1: -341.777, Len(game): 115, Training Time: 0.213s, Prediction Time: 0.008s label DQNAgent, Reward 2: -45.154, Len(game): 97, Training Time: 0.333s, Prediction Time: 0.013s label DQNAgent, Reward 3: -39.019, Len(game): 100, Training Time: 0.458s, Prediction Time: 0.018s label DQNAgent, Reward 4: -244.580, Len(game): 104, Training Time: 0.593s, Prediction Time: 0.023s label DQNAgent, Reward 5: -109.868, Len(game): 82, Training Time: 0.698s, Prediction Time: 0.028s label DQNAgent, Reward 6: -28.085, Len(game): 112, Training Time: 0.838s, Prediction Time: 0.036s label DQNAgent, Reward 7: -56.343, Len(game): 180, Training Time: 1.073s, Prediction Time: 0.048s label DQNAgent, Reward 8: -30.156, Len(game): 137, Training Time: 1.247s, Prediction Time: 0.058s label DQNAgent, Reward 9: -154.895, Len(game): 158, Training Time: 1.446s, Prediction Time: 0.069s label DQNAgent, Reward 10: -23.092, Len(game): 250, Training Time: 1.760s, Prediction Time: 0.088s label DQNAgent, Reward 11: -318.624, Len(game): 2000, Training Time: 4.284s, Prediction Time: 0.259s label DQNAgent, Reward 12: 66.829, Len(game): 149, Training Time: 4.473s, Prediction Time: 0.273s label DQNAgent, Reward 13: -294.896, Len(game): 2000, Training Time: 7.040s, Prediction Time: 0.450s label DQNAgent, Reward 14: -128.997, Len(game): 257, Training Time: 7.366s, Prediction Time: 0.472s label DQNAgent, Reward 15: 86.828, Len(game): 1235, Training Time: 8.977s, Prediction Time: 0.582s label DQNAgent, Reward 16: -79.989, Len(game): 293, Training Time: 9.362s, Prediction Time: 0.609s label DQNAgent, Reward 17: -76.022, Len(game): 289, Training Time: 9.736s, Prediction Time: 0.635s label DQNAgent, Reward 18: -76.693, Len(game): 497, Training Time: 10.386s, Prediction Time: 0.677s label DQNAgent, Reward 19: 17.214, Len(game): 222, Training Time: 10.675s, Prediction Time: 0.698s label DQNAgent, Reward 20: -77.502, Len(game): 707, Training Time: 11.589s, Prediction Time: 0.760s label DQNAgent, Reward 21: -143.504, Len(game): 139, Training Time: 11.771s, Prediction Time: 0.773s label DQNAgent, Reward 22: -138.275, Len(game): 1069, Training Time: 13.171s, Prediction Time: 0.864s label DQNAgent, Reward 23: 108.491, Len(game): 1276, Training Time: 14.849s, Prediction Time: 0.974s label DQNAgent, Reward 24: 242.335, Len(game): 451, Training Time: 15.442s, Prediction Time: 1.014s label DQNAgent, Reward 25: -158.982, Len(game): 2000, Training Time: 18.097s, Prediction Time: 1.199s label DQNAgent, Reward 26: 42.868, Len(game): 218, Training Time: 18.394s, Prediction Time: 1.219s label DQNAgent, Reward 27: 214.845, Len(game): 577, Training Time: 19.155s, Prediction Time: 1.271s label DQNAgent, Reward 28: 217.150, Len(game): 566, Training Time: 19.904s, Prediction Time: 1.322s label DQNAgent, Reward 29: 258.343, Len(game): 307, Training Time: 20.305s, Prediction Time: 1.353s label DQNAgent, Reward 30: 226.532, Len(game): 360, Training Time: 20.778s, Prediction Time: 1.386s label DQNAgent, Reward 31: 224.708, Len(game): 560, Training Time: 21.520s, Prediction Time: 1.438s label DQNAgent, Reward 32: 258.744, Len(game): 373, Training Time: 22.023s, Prediction Time: 1.473s label DQNAgent, Reward 33: 245.188, Len(game): 357, Training Time: 22.497s, Prediction Time: 1.507s label DQNAgent, Reward 34: -41.809, Len(game): 444, Training Time: 23.089s, Prediction Time: 1.545s label DQNAgent, Reward 35: -36.180, Len(game): 145, Training Time: 23.283s, Prediction Time: 1.558s label DQNAgent, Reward 36: 274.259, Len(game): 320, Training Time: 23.704s, Prediction Time: 1.587s label DQNAgent, Reward 37: 224.643, Len(game): 416, Training Time: 24.267s, Prediction Time: 1.625s label DQNAgent, Reward 38: 252.419, Len(game): 734, Training Time: 25.254s, Prediction Time: 1.700s label DQNAgent, Reward 39: -48.439, Len(game): 169, Training Time: 25.484s, Prediction Time: 1.715s label DQNAgent, Reward 40: -7.429, Len(game): 228, Training Time: 25.789s, Prediction Time: 1.735s label DQNAgent, Reward 41: 39.578, Len(game): 148, Training Time: 25.987s, Prediction Time: 1.750s label DQNAgent, Reward 42: 94.496, Len(game): 2000, Training Time: 28.908s, Prediction Time: 1.956s label DQNAgent, Reward 43: -77.404, Len(game): 121, Training Time: 29.072s, Prediction Time: 1.969s label DQNAgent, Reward 44: -34.316, Len(game): 82, Training Time: 29.180s, Prediction Time: 1.978s label DQNAgent, Reward 45: -101.696, Len(game): 261, Training Time: 29.526s, Prediction Time: 2.001s label DQNAgent, Reward 46: -52.923, Len(game): 124, Training Time: 29.688s, Prediction Time: 2.011s label DQNAgent, Reward 47: 266.802, Len(game): 266, Training Time: 30.050s, Prediction Time: 2.036s label DQNAgent, Reward 48: 1.389, Len(game): 232, Training Time: 30.366s, Prediction Time: 2.055s label DQNAgent, Reward 49: -10.112, Len(game): 179, Training Time: 30.603s, Prediction Time: 2.073s label DQNAgent, Reward 50: 70.895, Len(game): 162, Training Time: 30.818s, Prediction Time: 2.089s label DQNAgent, Reward 51: 54.644, Len(game): 185, Training Time: 31.064s, Prediction Time: 2.106s label DQNAgent, Reward 52: 29.431, Len(game): 244, Training Time: 31.393s, Prediction Time: 2.128s label DQNAgent, Reward 53: 6.784, Len(game): 122, Training Time: 31.555s, Prediction Time: 2.140s label DQNAgent, Reward 54: 195.130, Len(game): 708, Training Time: 32.499s, Prediction Time: 2.208s label DQNAgent, Reward 55: 16.518, Len(game): 101, Training Time: 32.633s, Prediction Time: 2.218s label DQNAgent, Reward 56: 229.328, Len(game): 892, Training Time: 33.818s, Prediction Time: 2.304s label DQNAgent, Reward 57: 10.079, Len(game): 203, Training Time: 34.091s, Prediction Time: 2.322s label DQNAgent, Reward 58: -32.992, Len(game): 103, Training Time: 34.227s, Prediction Time: 2.333s label DQNAgent, Reward 59: -10.238, Len(game): 206, Training Time: 34.505s, Prediction Time: 2.352s label DQNAgent, Reward 60: -11.807, Len(game): 272, Training Time: 34.869s, Prediction Time: 2.376s label DQNAgent, Reward 61: 37.263, Len(game): 119, Training Time: 35.027s, Prediction Time: 2.388s label DQNAgent, Reward 62: -73.480, Len(game): 247, Training Time: 35.359s, Prediction Time: 2.411s label DQNAgent, Reward 63: -5.797, Len(game): 255, Training Time: 35.693s, Prediction Time: 2.436s label DQNAgent, Reward 64: -90.319, Len(game): 220, Training Time: 35.987s, Prediction Time: 2.458s label DQNAgent, Reward 65: 9.954, Len(game): 176, Training Time: 36.219s, Prediction Time: 2.474s label DQNAgent, Reward 66: -0.394, Len(game): 2000, Training Time: 38.900s, Prediction Time: 2.676s label DQNAgent, Reward 67: -36.812, Len(game): 2000, Training Time: 41.547s, Prediction Time: 2.870s label DQNAgent, Reward 68: -24.807, Len(game): 413, Training Time: 42.104s, Prediction Time: 2.908s label DQNAgent, Reward 69: -131.701, Len(game): 134, Training Time: 42.284s, Prediction Time: 2.920s label DQNAgent, Reward 70: -23.883, Len(game): 160, Training Time: 42.501s, Prediction Time: 2.935s label DQNAgent, Reward 71: -238.337, Len(game): 111, Training Time: 42.649s, Prediction Time: 2.945s label DQNAgent, Reward 72: -19.809, Len(game): 130, Training Time: 42.823s, Prediction Time: 2.957s label DQNAgent, Reward 73: -17.040, Len(game): 111, Training Time: 42.969s, Prediction Time: 2.967s label DQNAgent, Reward 74: -286.120, Len(game): 185, Training Time: 43.217s, Prediction Time: 2.984s label DQNAgent, Reward 75: 262.301, Len(game): 509, Training Time: 43.902s, Prediction Time: 3.034s label DQNAgent, Reward 76: 154.535, Len(game): 198, Training Time: 44.161s, Prediction Time: 3.054s label DQNAgent, Reward 77: 24.319, Len(game): 92, Training Time: 44.283s, Prediction Time: 3.063s label DQNAgent, Reward 78: -241.954, Len(game): 88, Training Time: 44.405s, Prediction Time: 3.070s label DQNAgent, Reward 79: -330.269, Len(game): 53, Training Time: 44.476s, Prediction Time: 3.075s label DQNAgent, Reward 80: -19.116, Len(game): 206, Training Time: 44.752s, Prediction Time: 3.091s label DQNAgent, Reward 81: -69.397, Len(game): 80, Training Time: 44.858s, Prediction Time: 3.099s label DQNAgent, Reward 82: -254.371, Len(game): 73, Training Time: 44.955s, Prediction Time: 3.106s label DQNAgent, Reward 83: -195.465, Len(game): 58, Training Time: 45.032s, Prediction Time: 3.113s label DQNAgent, Reward 84: -87.897, Len(game): 103, Training Time: 45.167s, Prediction Time: 3.122s label DQNAgent, Reward 85: -225.071, Len(game): 63, Training Time: 45.253s, Prediction Time: 3.128s label DQNAgent, Reward 86: -34.987, Len(game): 142, Training Time: 45.443s, Prediction Time: 3.143s label DQNAgent, Reward 87: -22.486, Len(game): 159, Training Time: 45.656s, Prediction Time: 3.158s label DQNAgent, Reward 88: -19.353, Len(game): 106, Training Time: 45.798s, Prediction Time: 3.167s label DQNAgent, Reward 89: -61.599, Len(game): 96, Training Time: 45.927s, Prediction Time: 3.177s label DQNAgent, Reward 90: 243.727, Len(game): 347, Training Time: 46.396s, Prediction Time: 3.211s label DQNAgent, Reward 91: -17.510, Len(game): 103, Training Time: 46.532s, Prediction Time: 3.221s label DQNAgent, Reward 92: 262.185, Len(game): 263, Training Time: 46.881s, Prediction Time: 3.245s label DQNAgent, Reward 93: 277.732, Len(game): 440, Training Time: 47.474s, Prediction Time: 3.288s label DQNAgent, Reward 94: -77.197, Len(game): 180, Training Time: 47.713s, Prediction Time: 3.304s label DQNAgent, Reward 95: -157.270, Len(game): 436, Training Time: 48.296s, Prediction Time: 3.341s label DQNAgent, Reward 96: 181.631, Len(game): 299, Training Time: 48.692s, Prediction Time: 3.372s label DQNAgent, Reward 97: -44.327, Len(game): 96, Training Time: 48.824s, Prediction Time: 3.382s label DQNAgent, Reward 98: 216.648, Len(game): 281, Training Time: 49.202s, Prediction Time: 3.409s label DQNAgent, Reward 99: -101.295, Len(game): 245, Training Time: 49.523s, Prediction Time: 3.431s Computed global error Bellman mean: 1.3189805374485473 iter: 5 Computed global error Bellman mean: 1.2523126574679762 iter: 1 label KQLearning, Reward 0: -372.562, Len(game): 92, Training Time: 0.072s, Prediction Time: 0.002s Computed global error Bellman mean: 7.411354497632007e-08 iter: 1 Computed global error Bellman mean: 1.2523126574679762 iter: 0 label KQLearning, Reward 1: -584.066, Len(game): 65, Training Time: 0.099s, Prediction Time: 0.019s Computed global error Bellman mean: 7.100065456596863e-07 iter: 4 label KQLearning, Reward 2: -437.061, Len(game): 151, Training Time: 0.160s, Prediction Time: 0.133s Computed global error Bellman mean: 0.0019651913974301073 iter: 5 label KQLearning, Reward 3: -135.792, Len(game): 69, Training Time: 0.184s, Prediction Time: 0.212s Computed global error Bellman mean: 2.919271432600007e-08 iter: 5 label KQLearning, Reward 4: -139.419, Len(game): 69, Training Time: 0.208s, Prediction Time: 0.299s Computed global error Bellman mean: 2.0043804439961245e-08 iter: 4 label KQLearning, Reward 5: -140.436, Len(game): 87, Training Time: 0.237s, Prediction Time: 0.436s Computed global error Bellman mean: 0.13576275654331782 iter: 5 Computed global error Bellman mean: 0.020962495119750388 iter: 5 label KQLearning, Reward 6: -130.612, Len(game): 76, Training Time: 0.306s, Prediction Time: 0.577s Computed global error Bellman mean: 0.15713600956428617 iter: 5 Computed global error Bellman mean: 0.0020650690661126385 iter: 5 Computed global error Bellman mean: 0.024317659835294214 iter: 5 label KQLearning, Reward 7: -126.258, Len(game): 66, Training Time: 0.393s, Prediction Time: 0.705s Computed global error Bellman mean: 0.017851189729401005 iter: 5 Computed global error Bellman mean: 0.0007217437808625365 iter: 5 Computed global error Bellman mean: 0.0015827117802054164 iter: 5 label KQLearning, Reward 8: -135.454, Len(game): 67, Training Time: 0.476s, Prediction Time: 0.846s Computed global error Bellman mean: 2.063433774096256e-08 iter: 3 label KQLearning, Reward 9: -243.629, Len(game): 89, Training Time: 0.504s, Prediction Time: 1.050s Computed global error Bellman mean: 6.700834498567436e-08 iter: 5 label KQLearning, Reward 10: -153.162, Len(game): 61, Training Time: 0.530s, Prediction Time: 1.220s Computed global error Bellman mean: 9.495778856264773e-07 iter: 2 label KQLearning, Reward 11: -146.903, Len(game): 60, Training Time: 0.547s, Prediction Time: 1.396s Computed global error Bellman mean: 0.0030796875629900615 iter: 5 label KQLearning, Reward 12: -109.240, Len(game): 54, Training Time: 0.571s, Prediction Time: 1.556s Computed global error Bellman mean: 0.03341978501448979 iter: 5 Computed global error Bellman mean: 0.009044383680920833 iter: 5 label KQLearning, Reward 13: -119.908, Len(game): 62, Training Time: 0.624s, Prediction Time: 1.755s Computed global error Bellman mean: 0.20808780242644276 iter: 5 Computed global error Bellman mean: 0.1585449000412748 iter: 5 label KQLearning, Reward 14: -130.531, Len(game): 72, Training Time: 0.690s, Prediction Time: 1.996s Computed global error Bellman mean: 0.2679024756682797 iter: 5 Computed global error Bellman mean: 0.07897550232560825 iter: 5 Computed global error Bellman mean: 0.2188307754703123 iter: 5 label KQLearning, Reward 15: -120.269, Len(game): 76, Training Time: 0.794s, Prediction Time: 2.276s Computed global error Bellman mean: 0.00817506846491195 iter: 5 Computed global error Bellman mean: 0.022806644167262444 iter: 5 Computed global error Bellman mean: 0.08965084354436514 iter: 5 label KQLearning, Reward 16: -93.561, Len(game): 64, Training Time: 0.896s, Prediction Time: 2.516s Computed global error Bellman mean: 0.2859106155482297 iter: 5 Computed global error Bellman mean: 0.000860394198221856 iter: 5 Computed global error Bellman mean: 0.03149992370752994 iter: 5 Computed global error Bellman mean: 0.12055260590243212 iter: 5 label KQLearning, Reward 17: -122.599, Len(game): 73, Training Time: 1.053s, Prediction Time: 2.819s Computed global error Bellman mean: 1.83044586710146e-08 iter: 3 Computed global error Bellman mean: 0.0007470566333216264 iter: 5 Computed global error Bellman mean: 0.0774641048007587 iter: 5 label KQLearning, Reward 18: -134.459, Len(game): 80, Training Time: 1.150s, Prediction Time: 3.157s Computed global error Bellman mean: 0.10731042912237276 iter: 5 Computed global error Bellman mean: 0.02610362985333355 iter: 5 Computed global error Bellman mean: 0.002993378342766674 iter: 5 label KQLearning, Reward 19: -138.905, Len(game): 85, Training Time: 1.266s, Prediction Time: 3.530s Computed global error Bellman mean: 0.23419570223319502 iter: 5 Computed global error Bellman mean: 0.002914609285175714 iter: 5 Computed global error Bellman mean: 0.02236859336212139 iter: 5 label KQLearning, Reward 20: -129.713, Len(game): 81, Training Time: 1.383s, Prediction Time: 3.903s Computed global error Bellman mean: 1.6014807838326037e-08 iter: 3 Computed global error Bellman mean: 0.0003924388587439735 iter: 5 label KQLearning, Reward 21: -203.197, Len(game): 84, Training Time: 1.456s, Prediction Time: 4.301s Computed global error Bellman mean: 0.05100331124491497 iter: 5 Computed global error Bellman mean: 0.03186978686701474 iter: 5 label KQLearning, Reward 22: -133.069, Len(game): 70, Training Time: 1.521s, Prediction Time: 4.664s Computed global error Bellman mean: 0.003950938087914362 iter: 5 Computed global error Bellman mean: 0.005001478837385489 iter: 5 label KQLearning, Reward 23: -146.560, Len(game): 75, Training Time: 1.589s, Prediction Time: 5.061s Computed global error Bellman mean: 6.546387785609096e-08 iter: 4 label KQLearning, Reward 24: -123.907, Len(game): 74, Training Time: 1.613s, Prediction Time: 5.476s Computed global error Bellman mean: 0.10879058808361394 iter: 5 Computed global error Bellman mean: 0.07290738721268956 iter: 5 label KQLearning, Reward 25: -133.527, Len(game): 60, Training Time: 1.661s, Prediction Time: 5.805s Computed global error Bellman mean: 0.016974743021143903 iter: 5 Computed global error Bellman mean: 0.04238259138373758 iter: 5 Computed global error Bellman mean: 0.05721483609996408 iter: 0 label KQLearning, Reward 26: -101.744, Len(game): 78, Training Time: 1.737s, Prediction Time: 6.262s Computed global error Bellman mean: 0.37744042431767055 iter: 5 Computed global error Bellman mean: 0.014116144514556426 iter: 5 Computed global error Bellman mean: 0.3204706232566178 iter: 5 label KQLearning, Reward 27: -97.095, Len(game): 55, Training Time: 1.811s, Prediction Time: 6.610s Computed global error Bellman mean: 0.2453438557181358 iter: 5 Computed global error Bellman mean: 0.014108517998810804 iter: 4 Computed global error Bellman mean: 0.24250058526812496 iter: 5 Computed global error Bellman mean: 0.06446487764621492 iter: 5 label KQLearning, Reward 28: -131.495, Len(game): 72, Training Time: 1.929s, Prediction Time: 7.061s Computed global error Bellman mean: 0.012575581683966493 iter: 5 Computed global error Bellman mean: 0.14278238596181772 iter: 5 Computed global error Bellman mean: 0.019633335268757077 iter: 5 Computed global error Bellman mean: 0.00040599195590076536 iter: 5 label KQLearning, Reward 29: -174.707, Len(game): 71, Training Time: 2.054s, Prediction Time: 7.519s Computed global error Bellman mean: 0.04208379596448945 iter: 5 Computed global error Bellman mean: 0.023102606487868323 iter: 5 Computed global error Bellman mean: 0.007103893949980033 iter: 5 Computed global error Bellman mean: 0.04317234861216545 iter: 5 label KQLearning, Reward 30: -124.773, Len(game): 85, Training Time: 2.212s, Prediction Time: 8.081s Computed global error Bellman mean: 0.12058780308266578 iter: 5 Computed global error Bellman mean: 0.00020293306826099392 iter: 4 Computed global error Bellman mean: 0.08392629178733378 iter: 5 label KQLearning, Reward 31: -76.054, Len(game): 92, Training Time: 2.333s, Prediction Time: 8.709s Computed global error Bellman mean: 0.09828368033783424 iter: 5 Computed global error Bellman mean: 0.037782878816039166 iter: 5 Computed global error Bellman mean: 0.06029686964513399 iter: 5 label KQLearning, Reward 32: -137.634, Len(game): 65, Training Time: 2.447s, Prediction Time: 9.179s Computed global error Bellman mean: 0.5136728081973398 iter: 5 Computed global error Bellman mean: 0.0009918043434892655 iter: 5 Computed global error Bellman mean: 0.01235009238282136 iter: 5 Computed global error Bellman mean: 0.180617942613106 iter: 5 label KQLearning, Reward 33: -86.094, Len(game): 53, Training Time: 2.581s, Prediction Time: 9.575s Computed global error Bellman mean: 1.2584718080871243e-08 iter: 5 Computed global error Bellman mean: 0.0018476014637430129 iter: 5 Computed global error Bellman mean: 0.056234339680268246 iter: 5 label KQLearning, Reward 34: -173.360, Len(game): 94, Training Time: 2.684s, Prediction Time: 10.282s Computed global error Bellman mean: 7.844241938522363e-08 iter: 5 Computed global error Bellman mean: 0.00043903664548086436 iter: 5 label KQLearning, Reward 35: -102.194, Len(game): 75, Training Time: 2.740s, Prediction Time: 10.868s Computed global error Bellman mean: 0.19120189207201235 iter: 5 Computed global error Bellman mean: 0.0829391213649813 iter: 5 label KQLearning, Reward 36: -108.647, Len(game): 52, Training Time: 2.784s, Prediction Time: 11.290s Computed global error Bellman mean: 9.700896319046813e-08 iter: 4 Computed global error Bellman mean: 0.03956048601856398 iter: 5 label KQLearning, Reward 37: -196.977, Len(game): 84, Training Time: 2.841s, Prediction Time: 11.964s Computed global error Bellman mean: 0.05229476501481367 iter: 5 Computed global error Bellman mean: 0.009654067140801232 iter: 3 Computed global error Bellman mean: 0.028373453359756315 iter: 5 label KQLearning, Reward 38: -129.452, Len(game): 85, Training Time: 2.944s, Prediction Time: 12.690s Computed global error Bellman mean: 0.019472858023542202 iter: 5 Computed global error Bellman mean: 0.0011164661361261203 iter: 5 Computed global error Bellman mean: 0.0006816488909307312 iter: 5 label KQLearning, Reward 39: -117.806, Len(game): 54, Training Time: 3.040s, Prediction Time: 13.161s Computed global error Bellman mean: 0.015399148714532362 iter: 5 Computed global error Bellman mean: 0.008808021489861489 iter: 4 label KQLearning, Reward 40: -124.378, Len(game): 140, Training Time: 3.254s, Prediction Time: 14.373s Computed global error Bellman mean: 0.03609878105199148 iter: 5 Computed global error Bellman mean: 0.005822428587485815 iter: 5 label KQLearning, Reward 41: -164.887, Len(game): 82, Training Time: 3.337s, Prediction Time: 15.120s Computed global error Bellman mean: 0.26996843196918135 iter: 5 Computed global error Bellman mean: 0.06656820291052908 iter: 5 label KQLearning, Reward 42: -120.646, Len(game): 73, Training Time: 3.404s, Prediction Time: 15.806s Computed global error Bellman mean: 0.017139636495967635 iter: 5 Computed global error Bellman mean: 0.022196231913062057 iter: 5 Computed global error Bellman mean: 0.006494194517247013 iter: 5 label KQLearning, Reward 43: -104.596, Len(game): 55, Training Time: 3.484s, Prediction Time: 16.338s Computed global error Bellman mean: 2.3037905815209953e-08 iter: 5 Computed global error Bellman mean: 0.00020246571991301376 iter: 5 label KQLearning, Reward 44: -142.510, Len(game): 67, Training Time: 3.553s, Prediction Time: 16.993s Computed global error Bellman mean: 5.6706410073423916e-08 iter: 4 label KQLearning, Reward 45: -194.903, Len(game): 105, Training Time: 3.591s, Prediction Time: 18.028s Computed global error Bellman mean: 0.05119886950137757 iter: 5 Computed global error Bellman mean: 0.01042185810882918 iter: 5 label KQLearning, Reward 46: -128.087, Len(game): 59, Training Time: 3.639s, Prediction Time: 18.634s Computed global error Bellman mean: 2.5003558635979185e-08 iter: 3 Computed global error Bellman mean: 0.00014307256291403762 iter: 5 label KQLearning, Reward 47: -177.993, Len(game): 59, Training Time: 3.687s, Prediction Time: 19.258s Computed global error Bellman mean: 1.48014213633436e-07 iter: 4 label KQLearning, Reward 48: -137.158, Len(game): 63, Training Time: 3.711s, Prediction Time: 19.938s Computed global error Bellman mean: 3.0484152879067805e-08 iter: 4 label KQLearning, Reward 49: -134.247, Len(game): 96, Training Time: 3.750s, Prediction Time: 20.964s Computed global error Bellman mean: 0.07536368772118596 iter: 5 Computed global error Bellman mean: 0.03990564935632911 iter: 5 label KQLearning, Reward 50: -144.798, Len(game): 79, Training Time: 3.828s, Prediction Time: 21.834s Computed global error Bellman mean: 0.0017133615512324154 iter: 5 Computed global error Bellman mean: 0.0007265210284082993 iter: 4 label KQLearning, Reward 51: -160.618, Len(game): 72, Training Time: 3.899s, Prediction Time: 22.647s Computed global error Bellman mean: 8.044588339347824e-08 iter: 5 label KQLearning, Reward 52: -131.064, Len(game): 78, Training Time: 3.932s, Prediction Time: 23.539s Computed global error Bellman mean: 3.605390685740978e-08 iter: 3 label KQLearning, Reward 53: -100.629, Len(game): 64, Training Time: 3.951s, Prediction Time: 24.297s Computed global error Bellman mean: 0.2004441542827797 iter: 5 Computed global error Bellman mean: 0.1658095968517145 iter: 5 label KQLearning, Reward 54: -126.926, Len(game): 75, Training Time: 4.016s, Prediction Time: 25.197s Computed global error Bellman mean: 0.09288273713150447 iter: 5 Computed global error Bellman mean: 0.11327606177504151 iter: 5 Computed global error Bellman mean: 0.04359814015673651 iter: 5 label KQLearning, Reward 55: -81.539, Len(game): 52, Training Time: 4.101s, Prediction Time: 25.873s Computed global error Bellman mean: 0.0005143734323825142 iter: 5 Computed global error Bellman mean: 0.04430338207013421 iter: 5 Computed global error Bellman mean: 0.0011049728149696504 iter: 5 label KQLearning, Reward 56: -92.505, Len(game): 69, Training Time: 4.187s, Prediction Time: 26.721s Computed global error Bellman mean: 0.10746479195065407 iter: 5 Computed global error Bellman mean: 0.0005125659828415754 iter: 5 Computed global error Bellman mean: 0.10009934921669968 iter: 5 label KQLearning, Reward 57: -139.251, Len(game): 85, Training Time: 4.319s, Prediction Time: 27.778s Computed global error Bellman mean: 9.029182978766732e-07 iter: 5 label KQLearning, Reward 58: -184.628, Len(game): 183, Training Time: 4.449s, Prediction Time: 30.099s Computed global error Bellman mean: 0.2589339881900597 iter: 5 Computed global error Bellman mean: 0.03163800796125399 iter: 5 label KQLearning, Reward 59: -110.732, Len(game): 84, Training Time: 4.537s, Prediction Time: 31.219s Computed global error Bellman mean: 1.8859149875067765e-06 iter: 5 Computed global error Bellman mean: 0.006564670144132386 iter: 5 label KQLearning, Reward 60: -22.853, Len(game): 83, Training Time: 4.623s, Prediction Time: 32.330s Computed global error Bellman mean: 0.01863704602424488 iter: 5 Computed global error Bellman mean: 0.04687310605471646 iter: 2 label KQLearning, Reward 61: -54.676, Len(game): 79, Training Time: 4.686s, Prediction Time: 33.436s Computed global error Bellman mean: 2.8455440473054395e-08 iter: 2 label KQLearning, Reward 62: -125.842, Len(game): 54, Training Time: 4.701s, Prediction Time: 34.210s Computed global error Bellman mean: 0.0111061336365271 iter: 5 Computed global error Bellman mean: 0.044638940747805854 iter: 1 label KQLearning, Reward 63: -122.321, Len(game): 81, Training Time: 4.762s, Prediction Time: 35.336s Computed global error Bellman mean: 1.3872605528209014e-08 iter: 4 label KQLearning, Reward 64: -146.616, Len(game): 80, Training Time: 4.793s, Prediction Time: 36.493s Computed global error Bellman mean: 2.3803313065473975e-08 iter: 3 label KQLearning, Reward 65: -94.491, Len(game): 81, Training Time: 4.817s, Prediction Time: 37.685s Computed global error Bellman mean: 0.32549571653429427 iter: 5 Computed global error Bellman mean: 0.08911715330411236 iter: 5 label KQLearning, Reward 66: -180.841, Len(game): 63, Training Time: 4.868s, Prediction Time: 38.608s Computed global error Bellman mean: 4.000977550183407e-08 iter: 5 Computed global error Bellman mean: 0.004253662822167722 iter: 5 label KQLearning, Reward 67: -137.975, Len(game): 97, Training Time: 4.945s, Prediction Time: 40.080s Computed global error Bellman mean: 2.2676408195643205e-08 iter: 2 label KQLearning, Reward 68: -291.425, Len(game): 89, Training Time: 4.971s, Prediction Time: 41.437s Computed global error Bellman mean: 6.718595003287946e-08 iter: 5 label KQLearning, Reward 69: -125.370, Len(game): 92, Training Time: 5.014s, Prediction Time: 42.872s Computed global error Bellman mean: 2.7517433216756364e-07 iter: 5 label KQLearning, Reward 70: -56.264, Len(game): 85, Training Time: 5.050s, Prediction Time: 44.178s Computed global error Bellman mean: 6.962977897463758e-08 iter: 5 label KQLearning, Reward 71: -344.401, Len(game): 59, Training Time: 5.075s, Prediction Time: 45.118s Computed global error Bellman mean: 1.0711693531115362e-07 iter: 5 label KQLearning, Reward 72: -112.351, Len(game): 84, Training Time: 5.109s, Prediction Time: 46.423s Computed global error Bellman mean: 1.6126971528758742e-07 iter: 3 label KQLearning, Reward 73: -335.432, Len(game): 77, Training Time: 5.134s, Prediction Time: 47.659s Computed global error Bellman mean: 0.024788042740105546 iter: 5 Computed global error Bellman mean: 0.06001539848507912 iter: 0 label KQLearning, Reward 74: -440.919, Len(game): 131, Training Time: 5.235s, Prediction Time: 49.771s Computed global error Bellman mean: 1.2423309922889423e-07 iter: 5 label KQLearning, Reward 75: -504.080, Len(game): 97, Training Time: 5.281s, Prediction Time: 51.371s Computed global error Bellman mean: 0.00036428533820603365 iter: 5 label KQLearning, Reward 76: -390.916, Len(game): 149, Training Time: 5.367s, Prediction Time: 53.852s Computed global error Bellman mean: 3.0315014591201596e-08 iter: 5 label KQLearning, Reward 77: -128.353, Len(game): 72, Training Time: 5.395s, Prediction Time: 55.087s Computed global error Bellman mean: 2.403641870596385e-07 iter: 4 label KQLearning, Reward 78: -59.733, Len(game): 80, Training Time: 5.421s, Prediction Time: 56.456s Computed global error Bellman mean: 0.015106520761554639 iter: 5 Computed global error Bellman mean: 0.048738974110085945 iter: 0 label KQLearning, Reward 79: -235.885, Len(game): 106, Training Time: 5.491s, Prediction Time: 58.292s Computed global error Bellman mean: 0.09022535538969577 iter: 5 Computed global error Bellman mean: 0.04019099592898272 iter: 5 label KQLearning, Reward 80: -73.021, Len(game): 52, Training Time: 5.532s, Prediction Time: 59.227s Computed global error Bellman mean: 2.947203161549867e-07 iter: 5 Computed global error Bellman mean: 0.006143285630891342 iter: 3 label KQLearning, Reward 81: -307.058, Len(game): 124, Training Time: 5.611s, Prediction Time: 61.439s Computed global error Bellman mean: 4.629798574282561e-07 iter: 5 label KQLearning, Reward 82: -25.641, Len(game): 132, Training Time: 5.678s, Prediction Time: 63.834s Computed global error Bellman mean: 2.378586373122265e-07 iter: 5 label KQLearning, Reward 83: -57.890, Len(game): 71, Training Time: 5.706s, Prediction Time: 65.157s Computed global error Bellman mean: 1.7407564755874163 iter: 5 Computed global error Bellman mean: 0.017441600045207832 iter: 5 label KQLearning, Reward 84: -102.269, Len(game): 80, Training Time: 5.785s, Prediction Time: 66.657s Computed global error Bellman mean: 8.440503129960384e-08 iter: 5 Computed global error Bellman mean: 0.017441371956869352 iter: 1 label KQLearning, Reward 85: -118.463, Len(game): 110, Training Time: 5.860s, Prediction Time: 68.724s Computed global error Bellman mean: 0.012457805780351211 iter: 5 Computed global error Bellman mean: 0.014751384531825552 iter: 4 label KQLearning, Reward 86: -110.380, Len(game): 129, Training Time: 6.043s, Prediction Time: 71.191s Computed global error Bellman mean: 0.03129491033117738 iter: 5 Computed global error Bellman mean: 0.022611835804580472 iter: 4 label KQLearning, Reward 87: -354.707, Len(game): 192, Training Time: 6.423s, Prediction Time: 74.929s Computed global error Bellman mean: 1.0985266453257158e-07 iter: 5 label KQLearning, Reward 88: -85.678, Len(game): 110, Training Time: 6.477s, Prediction Time: 77.135s Computed global error Bellman mean: 7.63840391769728e-07 iter: 5 label KQLearning, Reward 89: -163.239, Len(game): 188, Training Time: 6.617s, Prediction Time: 80.950s Computed global error Bellman mean: 1.7936836930029392e-07 iter: 5 label KQLearning, Reward 90: -51.148, Len(game): 84, Training Time: 6.656s, Prediction Time: 82.703s Computed global error Bellman mean: 4.589197083895158e-07 iter: 5 label KQLearning, Reward 91: -422.452, Len(game): 125, Training Time: 6.713s, Prediction Time: 85.309s Computed global error Bellman mean: 0.007104244655817143 iter: 5 label KQLearning, Reward 92: -68.175, Len(game): 78, Training Time: 6.747s, Prediction Time: 86.972s Computed global error Bellman mean: 2.9076078389845777e-07 iter: 4 label KQLearning, Reward 93: -120.549, Len(game): 105, Training Time: 6.788s, Prediction Time: 89.213s Computed global error Bellman mean: 0.754567174156408 iter: 5 Computed global error Bellman mean: 0.03965196013333954 iter: 5 label KQLearning, Reward 94: -108.527, Len(game): 117, Training Time: 6.933s, Prediction Time: 91.759s Computed global error Bellman mean: 2.395615999451938e-07 iter: 5 Computed global error Bellman mean: 0.014594064069726452 iter: 3 label KQLearning, Reward 95: -83.180, Len(game): 110, Training Time: 7.051s, Prediction Time: 94.168s Computed global error Bellman mean: 2.981905245218355e-08 iter: 5 Computed global error Bellman mean: 0.014594064069726452 iter: 0 label KQLearning, Reward 96: -143.282, Len(game): 82, Training Time: 7.108s, Prediction Time: 96.003s Computed global error Bellman mean: 0.007248935573219148 iter: 5 label KQLearning, Reward 97: -169.214, Len(game): 185, Training Time: 7.243s, Prediction Time: 100.149s Computed global error Bellman mean: 7.16126345106912e-07 iter: 5 label KQLearning, Reward 98: -65.753, Len(game): 152, Training Time: 7.323s, Prediction Time: 103.635s Computed global error Bellman mean: 0.019225974127145058 iter: 5 Computed global error Bellman mean: 0.023936736444295056 iter: 5 label KQLearning, Reward 99: -332.120, Len(game): 125, Training Time: 7.472s, Prediction Time: 106.541s 1 label PPOAgent, Reward 0: -154.002, Len(game): 91, Training Time: 0.026s, Prediction Time: 0.026s label PPOAgent, Reward 1: -127.024, Len(game): 74, Training Time: 0.049s, Prediction Time: 0.049s label PPOAgent, Reward 2: -191.153, Len(game): 67, Training Time: 0.070s, Prediction Time: 0.070s label PPOAgent, Reward 3: -124.929, Len(game): 101, Training Time: 0.101s, Prediction Time: 0.101s label PPOAgent, Reward 4: -242.717, Len(game): 96, Training Time: 0.130s, Prediction Time: 0.130s label PPOAgent, Reward 5: -307.849, Len(game): 103, Training Time: 0.161s, Prediction Time: 0.161s label PPOAgent, Reward 6: -416.849, Len(game): 115, Training Time: 0.195s, Prediction Time: 0.195s label PPOAgent, Reward 7: -108.819, Len(game): 74, Training Time: 0.214s, Prediction Time: 0.214s label PPOAgent, Reward 8: -84.504, Len(game): 77, Training Time: 0.234s, Prediction Time: 0.234s label PPOAgent, Reward 9: -103.856, Len(game): 83, Training Time: 0.255s, Prediction Time: 0.255s label PPOAgent, Reward 10: -259.775, Len(game): 102, Training Time: 0.281s, Prediction Time: 0.281s label PPOAgent, Reward 11: -158.886, Len(game): 96, Training Time: 0.305s, Prediction Time: 0.305s label PPOAgent, Reward 12: -445.462, Len(game): 107, Training Time: 0.332s, Prediction Time: 0.332s label PPOAgent, Reward 13: -238.863, Len(game): 71, Training Time: 0.393s, Prediction Time: 0.393s label PPOAgent, Reward 14: -124.438, Len(game): 100, Training Time: 0.423s, Prediction Time: 0.423s label PPOAgent, Reward 15: -139.580, Len(game): 62, Training Time: 0.443s, Prediction Time: 0.443s label PPOAgent, Reward 16: -80.564, Len(game): 95, Training Time: 0.472s, Prediction Time: 0.472s label PPOAgent, Reward 17: -82.568, Len(game): 74, Training Time: 0.495s, Prediction Time: 0.495s label PPOAgent, Reward 18: -95.051, Len(game): 81, Training Time: 0.520s, Prediction Time: 0.520s label PPOAgent, Reward 19: -258.006, Len(game): 90, Training Time: 0.549s, Prediction Time: 0.549s label PPOAgent, Reward 20: -241.132, Len(game): 105, Training Time: 0.575s, Prediction Time: 0.575s label PPOAgent, Reward 21: -144.110, Len(game): 77, Training Time: 0.595s, Prediction Time: 0.595s label PPOAgent, Reward 22: -103.296, Len(game): 82, Training Time: 0.616s, Prediction Time: 0.616s label PPOAgent, Reward 23: -191.619, Len(game): 121, Training Time: 0.647s, Prediction Time: 0.647s label PPOAgent, Reward 24: -300.092, Len(game): 77, Training Time: 0.667s, Prediction Time: 0.667s label PPOAgent, Reward 25: -149.666, Len(game): 77, Training Time: 0.686s, Prediction Time: 0.686s label PPOAgent, Reward 26: -106.092, Len(game): 81, Training Time: 0.706s, Prediction Time: 0.706s label PPOAgent, Reward 27: -124.350, Len(game): 73, Training Time: 0.764s, Prediction Time: 0.764s label PPOAgent, Reward 28: -232.704, Len(game): 83, Training Time: 0.789s, Prediction Time: 0.789s label PPOAgent, Reward 29: -73.539, Len(game): 107, Training Time: 0.821s, Prediction Time: 0.821s label PPOAgent, Reward 30: -365.718, Len(game): 118, Training Time: 0.855s, Prediction Time: 0.855s label PPOAgent, Reward 31: -124.252, Len(game): 74, Training Time: 0.877s, Prediction Time: 0.877s label PPOAgent, Reward 32: -116.795, Len(game): 78, Training Time: 0.901s, Prediction Time: 0.901s label PPOAgent, Reward 33: 51.603, Len(game): 125, Training Time: 0.936s, Prediction Time: 0.936s label PPOAgent, Reward 34: -194.867, Len(game): 86, Training Time: 0.957s, Prediction Time: 0.957s label PPOAgent, Reward 35: -371.477, Len(game): 82, Training Time: 0.979s, Prediction Time: 0.979s label PPOAgent, Reward 36: -378.084, Len(game): 79, Training Time: 0.999s, Prediction Time: 0.999s label PPOAgent, Reward 37: -94.966, Len(game): 89, Training Time: 1.022s, Prediction Time: 1.022s label PPOAgent, Reward 38: -134.562, Len(game): 107, Training Time: 1.049s, Prediction Time: 1.049s label PPOAgent, Reward 39: -109.103, Len(game): 61, Training Time: 1.064s, Prediction Time: 1.064s label PPOAgent, Reward 40: -212.743, Len(game): 66, Training Time: 1.118s, Prediction Time: 1.118s label PPOAgent, Reward 41: -119.781, Len(game): 72, Training Time: 1.139s, Prediction Time: 1.139s label PPOAgent, Reward 42: -132.433, Len(game): 89, Training Time: 1.166s, Prediction Time: 1.166s label PPOAgent, Reward 43: -11.774, Len(game): 104, Training Time: 1.197s, Prediction Time: 1.197s label PPOAgent, Reward 44: -143.701, Len(game): 91, Training Time: 1.226s, Prediction Time: 1.226s label PPOAgent, Reward 45: -293.914, Len(game): 89, Training Time: 1.253s, Prediction Time: 1.253s label PPOAgent, Reward 46: -150.273, Len(game): 75, Training Time: 1.275s, Prediction Time: 1.275s label PPOAgent, Reward 47: -74.135, Len(game): 64, Training Time: 1.293s, Prediction Time: 1.293s label PPOAgent, Reward 48: -190.206, Len(game): 62, Training Time: 1.309s, Prediction Time: 1.309s label PPOAgent, Reward 49: -58.671, Len(game): 64, Training Time: 1.326s, Prediction Time: 1.326s label PPOAgent, Reward 50: -113.440, Len(game): 62, Training Time: 1.341s, Prediction Time: 1.341s label PPOAgent, Reward 51: -394.934, Len(game): 99, Training Time: 1.366s, Prediction Time: 1.366s label PPOAgent, Reward 52: -173.129, Len(game): 103, Training Time: 1.392s, Prediction Time: 1.392s label PPOAgent, Reward 53: -79.432, Len(game): 59, Training Time: 1.407s, Prediction Time: 1.407s label PPOAgent, Reward 54: -129.777, Len(game): 66, Training Time: 1.425s, Prediction Time: 1.425s label PPOAgent, Reward 55: -140.662, Len(game): 74, Training Time: 1.443s, Prediction Time: 1.443s label PPOAgent, Reward 56: -115.539, Len(game): 68, Training Time: 1.499s, Prediction Time: 1.499s label PPOAgent, Reward 57: -112.054, Len(game): 65, Training Time: 1.518s, Prediction Time: 1.518s label PPOAgent, Reward 58: -306.989, Len(game): 94, Training Time: 1.545s, Prediction Time: 1.545s label PPOAgent, Reward 59: -19.252, Len(game): 94, Training Time: 1.573s, Prediction Time: 1.573s label PPOAgent, Reward 60: -101.722, Len(game): 75, Training Time: 1.597s, Prediction Time: 1.597s label PPOAgent, Reward 61: -137.490, Len(game): 55, Training Time: 1.615s, Prediction Time: 1.615s label PPOAgent, Reward 62: -96.619, Len(game): 58, Training Time: 1.633s, Prediction Time: 1.633s label PPOAgent, Reward 63: -110.281, Len(game): 68, Training Time: 1.654s, Prediction Time: 1.654s label PPOAgent, Reward 64: -324.520, Len(game): 87, Training Time: 1.678s, Prediction Time: 1.678s label PPOAgent, Reward 65: -115.957, Len(game): 94, Training Time: 1.701s, Prediction Time: 1.701s label PPOAgent, Reward 66: -140.492, Len(game): 77, Training Time: 1.721s, Prediction Time: 1.721s label PPOAgent, Reward 67: -86.799, Len(game): 54, Training Time: 1.734s, Prediction Time: 1.734s label PPOAgent, Reward 68: -262.537, Len(game): 96, Training Time: 1.758s, Prediction Time: 1.758s label PPOAgent, Reward 69: -109.956, Len(game): 95, Training Time: 1.782s, Prediction Time: 1.782s label PPOAgent, Reward 70: -327.549, Len(game): 115, Training Time: 1.811s, Prediction Time: 1.811s label PPOAgent, Reward 71: -157.178, Len(game): 106, Training Time: 1.877s, Prediction Time: 1.877s label PPOAgent, Reward 72: -202.787, Len(game): 82, Training Time: 1.902s, Prediction Time: 1.902s label PPOAgent, Reward 73: -93.070, Len(game): 60, Training Time: 1.920s, Prediction Time: 1.920s label PPOAgent, Reward 74: -125.885, Len(game): 63, Training Time: 1.939s, Prediction Time: 1.939s label PPOAgent, Reward 75: -84.186, Len(game): 65, Training Time: 1.958s, Prediction Time: 1.958s label PPOAgent, Reward 76: -140.366, Len(game): 70, Training Time: 1.979s, Prediction Time: 1.979s label PPOAgent, Reward 77: -246.796, Len(game): 105, Training Time: 2.012s, Prediction Time: 2.012s label PPOAgent, Reward 78: -76.461, Len(game): 64, Training Time: 2.032s, Prediction Time: 2.032s label PPOAgent, Reward 79: -218.941, Len(game): 112, Training Time: 2.061s, Prediction Time: 2.061s label PPOAgent, Reward 80: -152.219, Len(game): 79, Training Time: 2.080s, Prediction Time: 2.080s label PPOAgent, Reward 81: -85.561, Len(game): 58, Training Time: 2.095s, Prediction Time: 2.095s label PPOAgent, Reward 82: -345.654, Len(game): 93, Training Time: 2.119s, Prediction Time: 2.119s label PPOAgent, Reward 83: -263.042, Len(game): 102, Training Time: 2.145s, Prediction Time: 2.145s label PPOAgent, Reward 84: -113.060, Len(game): 76, Training Time: 2.165s, Prediction Time: 2.165s label PPOAgent, Reward 85: -119.133, Len(game): 61, Training Time: 2.180s, Prediction Time: 2.180s label PPOAgent, Reward 86: -111.076, Len(game): 75, Training Time: 2.237s, Prediction Time: 2.237s label PPOAgent, Reward 87: -197.642, Len(game): 86, Training Time: 2.263s, Prediction Time: 2.263s label PPOAgent, Reward 88: -111.857, Len(game): 67, Training Time: 2.283s, Prediction Time: 2.283s label PPOAgent, Reward 89: -72.098, Len(game): 91, Training Time: 2.311s, Prediction Time: 2.311s label PPOAgent, Reward 90: -94.863, Len(game): 88, Training Time: 2.337s, Prediction Time: 2.337s label PPOAgent, Reward 91: -102.801, Len(game): 64, Training Time: 2.357s, Prediction Time: 2.357s label PPOAgent, Reward 92: -109.546, Len(game): 59, Training Time: 2.376s, Prediction Time: 2.376s label PPOAgent, Reward 93: -146.651, Len(game): 75, Training Time: 2.398s, Prediction Time: 2.398s label PPOAgent, Reward 94: -69.519, Len(game): 61, Training Time: 2.414s, Prediction Time: 2.414s label PPOAgent, Reward 95: -130.846, Len(game): 80, Training Time: 2.434s, Prediction Time: 2.434s label PPOAgent, Reward 96: -87.744, Len(game): 61, Training Time: 2.451s, Prediction Time: 2.451s label PPOAgent, Reward 97: -176.966, Len(game): 104, Training Time: 2.476s, Prediction Time: 2.476s label PPOAgent, Reward 98: -129.096, Len(game): 106, Training Time: 2.503s, Prediction Time: 2.503s label PPOAgent, Reward 99: -90.169, Len(game): 73, Training Time: 2.521s, Prediction Time: 2.521s label Controller-based, Reward 0: -140.004, Len(game): 69, Training Time: 0.002s, Prediction Time: 0.002s label Controller-based, Reward 1: -133.286, Len(game): 78, Training Time: 0.004s, Prediction Time: 0.004s label Controller-based, Reward 2: -74.503, Len(game): 54, Training Time: 0.014s, Prediction Time: 0.004s label Controller-based, Reward 3: -115.268, Len(game): 53, Training Time: 0.024s, Prediction Time: 0.006s label Controller-based, Reward 4: -137.267, Len(game): 84, Training Time: 0.033s, Prediction Time: 0.009s label Controller-based, Reward 5: -3.014, Len(game): 65, Training Time: 0.042s, Prediction Time: 0.012s label Controller-based, Reward 6: -283.395, Len(game): 79, Training Time: 0.054s, Prediction Time: 0.014s label Controller-based, Reward 7: -52.856, Len(game): 57, Training Time: 0.064s, Prediction Time: 0.016s label Controller-based, Reward 8: -131.374, Len(game): 57, Training Time: 0.075s, Prediction Time: 0.019s label Controller-based, Reward 9: -117.601, Len(game): 65, Training Time: 0.088s, Prediction Time: 0.021s label Controller-based, Reward 10: -146.089, Len(game): 64, Training Time: 0.098s, Prediction Time: 0.023s label Controller-based, Reward 11: -159.764, Len(game): 65, Training Time: 0.109s, Prediction Time: 0.027s label Controller-based, Reward 12: -159.547, Len(game): 67, Training Time: 0.120s, Prediction Time: 0.030s label Controller-based, Reward 13: -148.711, Len(game): 60, Training Time: 0.134s, Prediction Time: 0.032s label Controller-based, Reward 14: -99.802, Len(game): 88, Training Time: 0.147s, Prediction Time: 0.035s label Controller-based, Reward 15: -77.539, Len(game): 86, Training Time: 0.160s, Prediction Time: 0.039s label Controller-based, Reward 16: -83.987, Len(game): 76, Training Time: 0.173s, Prediction Time: 0.041s label Controller-based, Reward 17: -95.219, Len(game): 76, Training Time: 0.186s, Prediction Time: 0.043s label Controller-based, Reward 18: -16.084, Len(game): 91, Training Time: 0.200s, Prediction Time: 0.046s label Controller-based, Reward 19: -34.542, Len(game): 66, Training Time: 0.215s, Prediction Time: 0.048s label Controller-based, Reward 20: -202.657, Len(game): 102, Training Time: 0.229s, Prediction Time: 0.051s label Controller-based, Reward 21: -337.431, Len(game): 133, Training Time: 0.244s, Prediction Time: 0.054s label Controller-based, Reward 22: -217.754, Len(game): 90, Training Time: 0.258s, Prediction Time: 0.057s label Controller-based, Reward 23: -131.460, Len(game): 104, Training Time: 0.272s, Prediction Time: 0.060s label Controller-based, Reward 24: -110.646, Len(game): 54, Training Time: 0.286s, Prediction Time: 0.061s label Controller-based, Reward 25: -137.962, Len(game): 70, Training Time: 0.299s, Prediction Time: 0.064s label Controller-based, Reward 26: -345.138, Len(game): 86, Training Time: 0.313s, Prediction Time: 0.066s label Controller-based, Reward 27: -205.343, Len(game): 72, Training Time: 0.326s, Prediction Time: 0.068s label Controller-based, Reward 28: -247.341, Len(game): 96, Training Time: 0.340s, Prediction Time: 0.072s label Controller-based, Reward 29: -288.672, Len(game): 112, Training Time: 0.354s, Prediction Time: 0.074s label Controller-based, Reward 30: -52.723, Len(game): 88, Training Time: 0.368s, Prediction Time: 0.076s label Controller-based, Reward 31: -191.440, Len(game): 92, Training Time: 0.382s, Prediction Time: 0.079s label Controller-based, Reward 32: -45.545, Len(game): 59, Training Time: 0.396s, Prediction Time: 0.080s label Controller-based, Reward 33: 14.740, Len(game): 65, Training Time: 0.408s, Prediction Time: 0.083s label Controller-based, Reward 34: -159.346, Len(game): 80, Training Time: 0.423s, Prediction Time: 0.086s label Controller-based, Reward 35: -46.395, Len(game): 80, Training Time: 0.436s, Prediction Time: 0.088s label Controller-based, Reward 36: -9.743, Len(game): 71, Training Time: 0.451s, Prediction Time: 0.090s label Controller-based, Reward 37: -13.123, Len(game): 94, Training Time: 0.465s, Prediction Time: 0.092s label Controller-based, Reward 38: -53.729, Len(game): 95, Training Time: 0.480s, Prediction Time: 0.095s label Controller-based, Reward 39: -38.773, Len(game): 93, Training Time: 0.494s, Prediction Time: 0.098s label Controller-based, Reward 40: 0.637, Len(game): 114, Training Time: 0.508s, Prediction Time: 0.103s label Controller-based, Reward 41: -24.424, Len(game): 113, Training Time: 0.524s, Prediction Time: 0.107s label Controller-based, Reward 42: 20.500, Len(game): 139, Training Time: 0.539s, Prediction Time: 0.110s label Controller-based, Reward 43: -142.386, Len(game): 183, Training Time: 0.555s, Prediction Time: 0.116s label Controller-based, Reward 44: -38.658, Len(game): 85, Training Time: 0.571s, Prediction Time: 0.119s label Controller-based, Reward 45: -39.666, Len(game): 97, Training Time: 0.587s, Prediction Time: 0.122s label Controller-based, Reward 46: -42.530, Len(game): 92, Training Time: 0.603s, Prediction Time: 0.124s label Controller-based, Reward 47: -36.293, Len(game): 123, Training Time: 0.619s, Prediction Time: 0.127s label Controller-based, Reward 48: -23.337, Len(game): 90, Training Time: 0.634s, Prediction Time: 0.130s label Controller-based, Reward 49: -74.321, Len(game): 90, Training Time: 0.650s, Prediction Time: 0.133s label Controller-based, Reward 50: 19.134, Len(game): 144, Training Time: 0.668s, Prediction Time: 0.136s label Controller-based, Reward 51: -101.110, Len(game): 142, Training Time: 0.685s, Prediction Time: 0.141s label Controller-based, Reward 52: -283.108, Len(game): 2000, Training Time: 0.727s, Prediction Time: 0.200s label Controller-based, Reward 53: -84.156, Len(game): 227, Training Time: 0.745s, Prediction Time: 0.207s label Controller-based, Reward 54: -3.956, Len(game): 106, Training Time: 0.762s, Prediction Time: 0.212s label Controller-based, Reward 55: -48.074, Len(game): 74, Training Time: 0.780s, Prediction Time: 0.215s label Controller-based, Reward 56: -9.968, Len(game): 65, Training Time: 0.797s, Prediction Time: 0.218s label Controller-based, Reward 57: -100.251, Len(game): 215, Training Time: 0.817s, Prediction Time: 0.223s label Controller-based, Reward 58: 235.041, Len(game): 324, Training Time: 0.837s, Prediction Time: 0.233s label Controller-based, Reward 59: 189.897, Len(game): 516, Training Time: 0.860s, Prediction Time: 0.247s label Controller-based, Reward 60: -104.188, Len(game): 184, Training Time: 0.879s, Prediction Time: 0.253s label Controller-based, Reward 61: -85.103, Len(game): 208, Training Time: 0.897s, Prediction Time: 0.258s label Controller-based, Reward 62: -15.177, Len(game): 346, Training Time: 0.918s, Prediction Time: 0.268s label Controller-based, Reward 63: -87.819, Len(game): 245, Training Time: 0.937s, Prediction Time: 0.273s label Controller-based, Reward 64: 192.459, Len(game): 289, Training Time: 0.956s, Prediction Time: 0.281s label Controller-based, Reward 65: -40.399, Len(game): 229, Training Time: 0.974s, Prediction Time: 0.289s label Controller-based, Reward 66: -85.762, Len(game): 138, Training Time: 0.992s, Prediction Time: 0.292s label Controller-based, Reward 67: -109.227, Len(game): 195, Training Time: 1.011s, Prediction Time: 0.299s label Controller-based, Reward 68: 167.952, Len(game): 770, Training Time: 1.045s, Prediction Time: 0.323s label Controller-based, Reward 69: -240.846, Len(game): 960, Training Time: 1.075s, Prediction Time: 0.358s label Controller-based, Reward 70: -23.430, Len(game): 293, Training Time: 1.096s, Prediction Time: 0.367s label Controller-based, Reward 71: 15.711, Len(game): 203, Training Time: 1.114s, Prediction Time: 0.374s label Controller-based, Reward 72: -18.079, Len(game): 213, Training Time: 1.136s, Prediction Time: 0.381s label Controller-based, Reward 73: -138.834, Len(game): 760, Training Time: 1.160s, Prediction Time: 0.401s label Controller-based, Reward 74: -78.313, Len(game): 212, Training Time: 1.178s, Prediction Time: 0.406s label Controller-based, Reward 75: -118.766, Len(game): 202, Training Time: 1.198s, Prediction Time: 0.411s label Controller-based, Reward 76: 14.496, Len(game): 2000, Training Time: 1.244s, Prediction Time: 0.487s label Controller-based, Reward 77: 86.466, Len(game): 2000, Training Time: 1.288s, Prediction Time: 0.552s label Controller-based, Reward 78: 50.990, Len(game): 2000, Training Time: 1.337s, Prediction Time: 0.627s label Controller-based, Reward 79: -126.798, Len(game): 170, Training Time: 1.358s, Prediction Time: 0.633s label Controller-based, Reward 80: 218.697, Len(game): 267, Training Time: 1.377s, Prediction Time: 0.640s label Controller-based, Reward 81: -195.316, Len(game): 490, Training Time: 1.400s, Prediction Time: 0.656s label Controller-based, Reward 82: -79.591, Len(game): 283, Training Time: 1.421s, Prediction Time: 0.663s label Controller-based, Reward 83: 161.744, Len(game): 469, Training Time: 1.443s, Prediction Time: 0.677s label Controller-based, Reward 84: -78.987, Len(game): 526, Training Time: 1.465s, Prediction Time: 0.689s label Controller-based, Reward 85: 124.039, Len(game): 835, Training Time: 1.493s, Prediction Time: 0.713s label Controller-based, Reward 86: -92.462, Len(game): 164, Training Time: 1.512s, Prediction Time: 0.719s label Controller-based, Reward 87: -101.355, Len(game): 244, Training Time: 1.531s, Prediction Time: 0.727s label Controller-based, Reward 88: 54.230, Len(game): 2000, Training Time: 1.575s, Prediction Time: 0.805s label Controller-based, Reward 89: -60.153, Len(game): 272, Training Time: 1.595s, Prediction Time: 0.814s label Controller-based, Reward 90: 142.720, Len(game): 413, Training Time: 1.620s, Prediction Time: 0.829s label Controller-based, Reward 91: -83.812, Len(game): 156, Training Time: 1.638s, Prediction Time: 0.834s label Controller-based, Reward 92: -79.527, Len(game): 181, Training Time: 1.657s, Prediction Time: 0.841s label Controller-based, Reward 93: -78.391, Len(game): 158, Training Time: 1.676s, Prediction Time: 0.845s label Controller-based, Reward 94: -184.310, Len(game): 573, Training Time: 1.700s, Prediction Time: 0.859s label Controller-based, Reward 95: -82.016, Len(game): 194, Training Time: 1.719s, Prediction Time: 0.864s label Controller-based, Reward 96: -55.855, Len(game): 338, Training Time: 1.740s, Prediction Time: 0.873s label Controller-based, Reward 97: -220.122, Len(game): 499, Training Time: 1.764s, Prediction Time: 0.889s label Controller-based, Reward 98: -96.453, Len(game): 196, Training Time: 1.781s, Prediction Time: 0.893s label Controller-based, Reward 99: 224.983, Len(game): 434, Training Time: 1.801s, Prediction Time: 0.906s label KACAgent, Reward 0: -77.953, Len(game): 73, Training Time: 0.009s, Prediction Time: 0.002s label KACAgent, Reward 1: -197.529, Len(game): 88, Training Time: 0.019s, Prediction Time: 0.018s label KACAgent, Reward 2: -144.463, Len(game): 89, Training Time: 0.033s, Prediction Time: 0.033s label KACAgent, Reward 3: -428.558, Len(game): 82, Training Time: 0.063s, Prediction Time: 0.054s label KACAgent, Reward 4: -49.328, Len(game): 76, Training Time: 0.099s, Prediction Time: 0.079s label KACAgent, Reward 5: -264.959, Len(game): 115, Training Time: 0.160s, Prediction Time: 0.120s label KACAgent, Reward 6: -182.674, Len(game): 74, Training Time: 0.234s, Prediction Time: 0.160s label KACAgent, Reward 7: -179.179, Len(game): 122, Training Time: 0.332s, Prediction Time: 0.228s label KACAgent, Reward 8: -58.507, Len(game): 88, Training Time: 0.465s, Prediction Time: 0.292s label KACAgent, Reward 9: -99.668, Len(game): 99, Training Time: 0.625s, Prediction Time: 0.380s label KACAgent, Reward 10: -170.758, Len(game): 103, Training Time: 0.814s, Prediction Time: 0.477s label KACAgent, Reward 11: -252.071, Len(game): 153, Training Time: 1.046s, Prediction Time: 0.626s label KACAgent, Reward 12: -207.480, Len(game): 123, Training Time: 1.347s, Prediction Time: 0.776s label KACAgent, Reward 13: -75.008, Len(game): 160, Training Time: 1.699s, Prediction Time: 0.981s label KACAgent, Reward 14: -311.284, Len(game): 115, Training Time: 2.129s, Prediction Time: 1.177s label KACAgent, Reward 15: -48.366, Len(game): 117, Training Time: 2.639s, Prediction Time: 1.409s label KACAgent, Reward 16: 13.890, Len(game): 158, Training Time: 3.200s, Prediction Time: 1.706s label KACAgent, Reward 17: -35.270, Len(game): 104, Training Time: 3.875s, Prediction Time: 1.992s label KACAgent, Reward 18: -79.153, Len(game): 128, Training Time: 4.600s, Prediction Time: 2.354s label KACAgent, Reward 19: -99.495, Len(game): 131, Training Time: 5.426s, Prediction Time: 2.737s label KACAgent, Reward 20: -119.606, Len(game): 123, Training Time: 6.320s, Prediction Time: 3.160s label KACAgent, Reward 21: -227.100, Len(game): 110, Training Time: 7.323s, Prediction Time: 3.619s label KACAgent, Reward 22: -120.408, Len(game): 121, Training Time: 8.419s, Prediction Time: 4.148s label KACAgent, Reward 23: 7.015, Len(game): 194, Training Time: 9.632s, Prediction Time: 4.790s label KACAgent, Reward 24: -63.053, Len(game): 105, Training Time: 10.849s, Prediction Time: 5.331s label KACAgent, Reward 25: -109.040, Len(game): 108, Training Time: 12.138s, Prediction Time: 5.945s label KACAgent, Reward 26: -75.487, Len(game): 107, Training Time: 13.514s, Prediction Time: 6.604s label KACAgent, Reward 27: 1.007, Len(game): 150, Training Time: 14.948s, Prediction Time: 7.348s label KACAgent, Reward 28: -73.049, Len(game): 113, Training Time: 16.539s, Prediction Time: 8.109s label KACAgent, Reward 29: -45.320, Len(game): 113, Training Time: 18.261s, Prediction Time: 8.940s label KACAgent, Reward 30: -87.183, Len(game): 90, Training Time: 20.130s, Prediction Time: 9.777s label KACAgent, Reward 31: -260.053, Len(game): 145, Training Time: 22.035s, Prediction Time: 10.758s label KACAgent, Reward 32: -189.274, Len(game): 170, Training Time: 24.126s, Prediction Time: 11.804s label KACAgent, Reward 33: -147.145, Len(game): 322, Training Time: 26.415s, Prediction Time: 13.104s label KACAgent, Reward 34: 15.210, Len(game): 131, Training Time: 29.152s, Prediction Time: 14.302s label KACAgent, Reward 35: -42.190, Len(game): 156, Training Time: 32.058s, Prediction Time: 15.708s label KACAgent, Reward 36: -67.462, Len(game): 150, Training Time: 35.142s, Prediction Time: 17.279s label KACAgent, Reward 37: 14.520, Len(game): 143, Training Time: 38.484s, Prediction Time: 18.877s label KACAgent, Reward 38: -70.947, Len(game): 143, Training Time: 42.025s, Prediction Time: 20.651s label KACAgent, Reward 39: -56.439, Len(game): 120, Training Time: 45.796s, Prediction Time: 22.540s label KACAgent, Reward 40: -84.625, Len(game): 2000, Training Time: 49.789s, Prediction Time: 27.106s label KACAgent, Reward 41: 10.722, Len(game): 181, Training Time: 55.736s, Prediction Time: 29.239s label KACAgent, Reward 42: -45.300, Len(game): 199, Training Time: 55.736s, Prediction Time: 32.498s label KACAgent, Reward 43: 5.563, Len(game): 146, Training Time: 55.736s, Prediction Time: 32.736s label KACAgent, Reward 44: -203.386, Len(game): 140, Training Time: 55.736s, Prediction Time: 32.965s label KACAgent, Reward 45: 12.574, Len(game): 120, Training Time: 55.736s, Prediction Time: 33.162s label KACAgent, Reward 46: -212.257, Len(game): 168, Training Time: 55.736s, Prediction Time: 33.439s label KACAgent, Reward 47: -33.197, Len(game): 121, Training Time: 55.736s, Prediction Time: 33.637s label KACAgent, Reward 48: 15.646, Len(game): 113, Training Time: 55.736s, Prediction Time: 33.821s label KACAgent, Reward 49: 2.483, Len(game): 154, Training Time: 55.736s, Prediction Time: 34.073s label KACAgent, Reward 50: -25.754, Len(game): 143, Training Time: 55.736s, Prediction Time: 34.304s label KACAgent, Reward 51: -27.675, Len(game): 155, Training Time: 55.736s, Prediction Time: 34.558s label KACAgent, Reward 52: -44.397, Len(game): 113, Training Time: 55.736s, Prediction Time: 34.741s label KACAgent, Reward 53: -139.682, Len(game): 149, Training Time: 55.736s, Prediction Time: 34.984s label KACAgent, Reward 54: -38.973, Len(game): 181, Training Time: 55.736s, Prediction Time: 35.278s label KACAgent, Reward 55: -28.929, Len(game): 135, Training Time: 55.736s, Prediction Time: 35.499s label KACAgent, Reward 56: -52.824, Len(game): 106, Training Time: 55.736s, Prediction Time: 35.674s label KACAgent, Reward 57: -210.850, Len(game): 156, Training Time: 55.736s, Prediction Time: 35.929s label KACAgent, Reward 58: -169.413, Len(game): 172, Training Time: 55.736s, Prediction Time: 36.207s label KACAgent, Reward 59: -36.120, Len(game): 156, Training Time: 55.736s, Prediction Time: 36.462s label KACAgent, Reward 60: -64.864, Len(game): 93, Training Time: 55.736s, Prediction Time: 36.613s label KACAgent, Reward 61: -174.224, Len(game): 183, Training Time: 55.736s, Prediction Time: 36.915s label KACAgent, Reward 62: 30.066, Len(game): 117, Training Time: 55.736s, Prediction Time: 37.105s label KACAgent, Reward 63: 23.879, Len(game): 139, Training Time: 55.736s, Prediction Time: 37.331s label KACAgent, Reward 64: -327.643, Len(game): 126, Training Time: 55.736s, Prediction Time: 37.538s label KACAgent, Reward 65: -46.474, Len(game): 125, Training Time: 55.736s, Prediction Time: 37.742s label KACAgent, Reward 66: -238.404, Len(game): 594, Training Time: 55.736s, Prediction Time: 38.721s label KACAgent, Reward 67: -81.705, Len(game): 109, Training Time: 55.736s, Prediction Time: 38.899s label KACAgent, Reward 68: -13.623, Len(game): 167, Training Time: 55.736s, Prediction Time: 39.169s label KACAgent, Reward 69: -257.633, Len(game): 185, Training Time: 55.736s, Prediction Time: 39.470s label KACAgent, Reward 70: -45.296, Len(game): 128, Training Time: 55.736s, Prediction Time: 39.681s label KACAgent, Reward 71: -7.928, Len(game): 130, Training Time: 55.736s, Prediction Time: 39.893s label KACAgent, Reward 72: -67.325, Len(game): 135, Training Time: 55.736s, Prediction Time: 40.114s label KACAgent, Reward 73: -79.229, Len(game): 228, Training Time: 55.736s, Prediction Time: 40.486s label KACAgent, Reward 74: 39.268, Len(game): 164, Training Time: 55.736s, Prediction Time: 40.753s label KACAgent, Reward 75: 17.092, Len(game): 164, Training Time: 55.736s, Prediction Time: 41.020s label KACAgent, Reward 76: 8.964, Len(game): 120, Training Time: 55.736s, Prediction Time: 41.217s label KACAgent, Reward 77: -98.428, Len(game): 2000, Training Time: 55.736s, Prediction Time: 44.502s label KACAgent, Reward 78: -49.424, Len(game): 112, Training Time: 55.736s, Prediction Time: 44.686s label KACAgent, Reward 79: -145.271, Len(game): 108, Training Time: 55.736s, Prediction Time: 44.862s label KACAgent, Reward 80: -38.801, Len(game): 164, Training Time: 55.736s, Prediction Time: 45.132s label KACAgent, Reward 81: -262.900, Len(game): 168, Training Time: 55.736s, Prediction Time: 45.405s label KACAgent, Reward 82: -41.493, Len(game): 157, Training Time: 55.736s, Prediction Time: 45.661s label KACAgent, Reward 83: -285.713, Len(game): 177, Training Time: 55.736s, Prediction Time: 45.949s label KACAgent, Reward 84: -1.833, Len(game): 146, Training Time: 55.736s, Prediction Time: 46.187s label KACAgent, Reward 85: -159.719, Len(game): 154, Training Time: 55.736s, Prediction Time: 46.439s label KACAgent, Reward 86: 6.576, Len(game): 154, Training Time: 55.736s, Prediction Time: 46.688s label KACAgent, Reward 87: -29.381, Len(game): 129, Training Time: 55.736s, Prediction Time: 46.898s label KACAgent, Reward 88: -53.275, Len(game): 146, Training Time: 55.736s, Prediction Time: 47.134s label KACAgent, Reward 89: -215.667, Len(game): 176, Training Time: 55.736s, Prediction Time: 47.422s label KACAgent, Reward 90: -71.019, Len(game): 124, Training Time: 55.736s, Prediction Time: 47.622s label KACAgent, Reward 91: -63.618, Len(game): 142, Training Time: 55.736s, Prediction Time: 47.853s label KACAgent, Reward 92: -57.553, Len(game): 142, Training Time: 55.736s, Prediction Time: 48.084s label KACAgent, Reward 93: -10.609, Len(game): 151, Training Time: 55.736s, Prediction Time: 48.328s label KACAgent, Reward 94: 3.260, Len(game): 124, Training Time: 55.736s, Prediction Time: 48.529s label KACAgent, Reward 95: -81.402, Len(game): 169, Training Time: 55.736s, Prediction Time: 48.803s label KACAgent, Reward 96: -172.443, Len(game): 147, Training Time: 55.736s, Prediction Time: 49.042s label KACAgent, Reward 97: -241.177, Len(game): 161, Training Time: 55.736s, Prediction Time: 49.304s label KACAgent, Reward 98: 6.522, Len(game): 126, Training Time: 55.736s, Prediction Time: 49.510s label KACAgent, Reward 99: 0.284, Len(game): 132, Training Time: 55.736s, Prediction Time: 49.725s label PolicyGradient, Reward 0: -376.577, Len(game): 81, Training Time: 0.008s, Prediction Time: 0.002s label PolicyGradient, Reward 1: -317.620, Len(game): 116, Training Time: 0.017s, Prediction Time: 0.022s label PolicyGradient, Reward 2: -110.302, Len(game): 74, Training Time: 0.123s, Prediction Time: 0.036s label PolicyGradient, Reward 3: -104.416, Len(game): 60, Training Time: 0.148s, Prediction Time: 0.053s label PolicyGradient, Reward 4: -436.721, Len(game): 93, Training Time: 0.184s, Prediction Time: 0.084s label PolicyGradient, Reward 5: -176.985, Len(game): 128, Training Time: 0.246s, Prediction Time: 0.130s label PolicyGradient, Reward 6: -93.553, Len(game): 70, Training Time: 0.335s, Prediction Time: 0.169s label PolicyGradient, Reward 7: -200.846, Len(game): 92, Training Time: 0.437s, Prediction Time: 0.225s label PolicyGradient, Reward 8: -196.270, Len(game): 89, Training Time: 0.562s, Prediction Time: 0.291s label PolicyGradient, Reward 9: -257.503, Len(game): 94, Training Time: 0.712s, Prediction Time: 0.373s label PolicyGradient, Reward 10: -155.851, Len(game): 97, Training Time: 0.886s, Prediction Time: 0.468s label PolicyGradient, Reward 11: -209.418, Len(game): 94, Training Time: 1.112s, Prediction Time: 0.575s label PolicyGradient, Reward 12: -112.545, Len(game): 66, Training Time: 1.364s, Prediction Time: 0.672s label PolicyGradient, Reward 13: -50.909, Len(game): 91, Training Time: 1.657s, Prediction Time: 0.805s label PolicyGradient, Reward 14: -87.790, Len(game): 119, Training Time: 1.988s, Prediction Time: 0.965s label PolicyGradient, Reward 15: -237.999, Len(game): 100, Training Time: 2.370s, Prediction Time: 1.145s label PolicyGradient, Reward 16: -120.887, Len(game): 154, Training Time: 2.798s, Prediction Time: 1.361s label PolicyGradient, Reward 17: -73.387, Len(game): 120, Training Time: 3.305s, Prediction Time: 1.579s label PolicyGradient, Reward 18: -124.230, Len(game): 172, Training Time: 3.880s, Prediction Time: 1.876s label PolicyGradient, Reward 19: -105.460, Len(game): 121, Training Time: 4.586s, Prediction Time: 2.163s label PolicyGradient, Reward 20: -54.722, Len(game): 123, Training Time: 5.342s, Prediction Time: 2.499s label PolicyGradient, Reward 21: -104.015, Len(game): 192, Training Time: 6.190s, Prediction Time: 2.925s label PolicyGradient, Reward 22: -126.083, Len(game): 196, Training Time: 7.165s, Prediction Time: 3.418s label PolicyGradient, Reward 23: -125.526, Len(game): 204, Training Time: 8.319s, Prediction Time: 3.970s label PolicyGradient, Reward 24: -172.179, Len(game): 170, Training Time: 9.679s, Prediction Time: 4.545s label PolicyGradient, Reward 25: -90.296, Len(game): 198, Training Time: 11.146s, Prediction Time: 5.236s label PolicyGradient, Reward 26: -189.260, Len(game): 170, Training Time: 12.845s, Prediction Time: 5.961s label PolicyGradient, Reward 27: 2.232, Len(game): 163, Training Time: 14.711s, Prediction Time: 6.824s label PolicyGradient, Reward 28: -17.712, Len(game): 156, Training Time: 16.873s, Prediction Time: 7.755s label PolicyGradient, Reward 29: -132.171, Len(game): 201, Training Time: 19.165s, Prediction Time: 8.836s label PolicyGradient, Reward 30: -80.358, Len(game): 280, Training Time: 21.705s, Prediction Time: 10.031s label PolicyGradient, Reward 31: 35.367, Len(game): 157, Training Time: 24.720s, Prediction Time: 11.337s label PolicyGradient, Reward 32: -172.774, Len(game): 280, Training Time: 27.898s, Prediction Time: 13.042s label PolicyGradient, Reward 33: -50.473, Len(game): 243, Training Time: 31.489s, Prediction Time: 14.744s label PolicyGradient, Reward 34: -270.038, Len(game): 296, Training Time: 35.514s, Prediction Time: 16.694s label PolicyGradient, Reward 35: -79.860, Len(game): 176, Training Time: 40.046s, Prediction Time: 18.645s label PolicyGradient, Reward 36: -59.044, Len(game): 363, Training Time: 45.408s, Prediction Time: 21.142s label PolicyGradient, Reward 37: -89.982, Len(game): 274, Training Time: 52.149s, Prediction Time: 24.229s label PolicyGradient, Reward 38: -178.406, Len(game): 174, Training Time: 52.149s, Prediction Time: 27.746s label PolicyGradient, Reward 39: -128.704, Len(game): 314, Training Time: 52.149s, Prediction Time: 28.246s label PolicyGradient, Reward 40: -102.935, Len(game): 275, Training Time: 52.149s, Prediction Time: 28.694s label PolicyGradient, Reward 41: -102.138, Len(game): 315, Training Time: 52.149s, Prediction Time: 29.203s label PolicyGradient, Reward 42: -122.916, Len(game): 225, Training Time: 52.149s, Prediction Time: 29.567s label PolicyGradient, Reward 43: -124.166, Len(game): 340, Training Time: 52.149s, Prediction Time: 30.116s label PolicyGradient, Reward 44: -94.108, Len(game): 219, Training Time: 52.149s, Prediction Time: 30.469s label PolicyGradient, Reward 45: -88.906, Len(game): 212, Training Time: 52.149s, Prediction Time: 30.815s label PolicyGradient, Reward 46: -165.790, Len(game): 239, Training Time: 52.149s, Prediction Time: 31.204s label PolicyGradient, Reward 47: -144.381, Len(game): 634, Training Time: 52.149s, Prediction Time: 32.231s label PolicyGradient, Reward 48: -155.797, Len(game): 167, Training Time: 52.149s, Prediction Time: 32.502s label PolicyGradient, Reward 49: -209.558, Len(game): 346, Training Time: 52.149s, Prediction Time: 33.057s label PolicyGradient, Reward 50: -150.038, Len(game): 309, Training Time: 52.149s, Prediction Time: 33.563s label PolicyGradient, Reward 51: -69.295, Len(game): 262, Training Time: 52.149s, Prediction Time: 33.979s label PolicyGradient, Reward 52: -15.160, Len(game): 152, Training Time: 52.149s, Prediction Time: 34.228s label PolicyGradient, Reward 53: -58.064, Len(game): 262, Training Time: 52.149s, Prediction Time: 34.651s label PolicyGradient, Reward 54: -116.293, Len(game): 286, Training Time: 52.149s, Prediction Time: 35.109s label PolicyGradient, Reward 55: -49.577, Len(game): 440, Training Time: 52.149s, Prediction Time: 35.819s label PolicyGradient, Reward 56: -201.275, Len(game): 354, Training Time: 52.149s, Prediction Time: 36.393s label PolicyGradient, Reward 57: -45.327, Len(game): 238, Training Time: 52.149s, Prediction Time: 36.780s label PolicyGradient, Reward 58: -293.379, Len(game): 560, Training Time: 52.149s, Prediction Time: 37.685s label PolicyGradient, Reward 59: -235.921, Len(game): 385, Training Time: 52.149s, Prediction Time: 38.309s label PolicyGradient, Reward 60: -297.602, Len(game): 277, Training Time: 52.149s, Prediction Time: 38.752s label PolicyGradient, Reward 61: -89.178, Len(game): 218, Training Time: 52.149s, Prediction Time: 39.106s label PolicyGradient, Reward 62: -182.739, Len(game): 331, Training Time: 52.149s, Prediction Time: 39.640s label PolicyGradient, Reward 63: -215.044, Len(game): 467, Training Time: 52.149s, Prediction Time: 40.396s label PolicyGradient, Reward 64: -43.587, Len(game): 275, Training Time: 52.149s, Prediction Time: 40.845s label PolicyGradient, Reward 65: -92.450, Len(game): 302, Training Time: 52.149s, Prediction Time: 41.336s label PolicyGradient, Reward 66: -137.102, Len(game): 348, Training Time: 52.149s, Prediction Time: 41.902s label PolicyGradient, Reward 67: -220.336, Len(game): 657, Training Time: 52.149s, Prediction Time: 42.971s label PolicyGradient, Reward 68: -90.526, Len(game): 205, Training Time: 52.149s, Prediction Time: 43.303s label PolicyGradient, Reward 69: -64.013, Len(game): 253, Training Time: 52.149s, Prediction Time: 43.713s label PolicyGradient, Reward 70: -302.684, Len(game): 304, Training Time: 52.149s, Prediction Time: 44.202s label PolicyGradient, Reward 71: -233.605, Len(game): 218, Training Time: 52.149s, Prediction Time: 44.554s label PolicyGradient, Reward 72: -143.846, Len(game): 161, Training Time: 52.149s, Prediction Time: 44.816s label PolicyGradient, Reward 73: -94.678, Len(game): 197, Training Time: 52.149s, Prediction Time: 45.133s label PolicyGradient, Reward 74: -94.786, Len(game): 201, Training Time: 52.149s, Prediction Time: 45.458s label PolicyGradient, Reward 75: -49.233, Len(game): 200, Training Time: 52.149s, Prediction Time: 45.782s label PolicyGradient, Reward 76: -269.528, Len(game): 441, Training Time: 52.149s, Prediction Time: 46.498s label PolicyGradient, Reward 77: -163.477, Len(game): 305, Training Time: 52.149s, Prediction Time: 46.989s label PolicyGradient, Reward 78: -136.235, Len(game): 229, Training Time: 52.149s, Prediction Time: 47.361s label PolicyGradient, Reward 79: -186.023, Len(game): 245, Training Time: 52.149s, Prediction Time: 47.758s label PolicyGradient, Reward 80: -139.848, Len(game): 255, Training Time: 52.149s, Prediction Time: 48.175s label PolicyGradient, Reward 81: -120.263, Len(game): 349, Training Time: 52.149s, Prediction Time: 48.742s label PolicyGradient, Reward 82: -199.564, Len(game): 190, Training Time: 52.149s, Prediction Time: 49.054s label PolicyGradient, Reward 83: -181.473, Len(game): 398, Training Time: 52.149s, Prediction Time: 49.699s label PolicyGradient, Reward 84: -31.888, Len(game): 333, Training Time: 52.149s, Prediction Time: 50.233s label PolicyGradient, Reward 85: -175.663, Len(game): 328, Training Time: 52.149s, Prediction Time: 50.759s label PolicyGradient, Reward 86: -97.013, Len(game): 292, Training Time: 52.149s, Prediction Time: 51.235s label PolicyGradient, Reward 87: -36.709, Len(game): 156, Training Time: 52.149s, Prediction Time: 51.488s label PolicyGradient, Reward 88: -77.101, Len(game): 173, Training Time: 52.149s, Prediction Time: 51.767s label PolicyGradient, Reward 89: -89.409, Len(game): 196, Training Time: 52.149s, Prediction Time: 52.081s label PolicyGradient, Reward 90: -104.524, Len(game): 252, Training Time: 52.149s, Prediction Time: 52.490s label PolicyGradient, Reward 91: -81.670, Len(game): 180, Training Time: 52.149s, Prediction Time: 52.780s label PolicyGradient, Reward 92: -103.460, Len(game): 364, Training Time: 52.149s, Prediction Time: 53.368s label PolicyGradient, Reward 93: -50.038, Len(game): 247, Training Time: 52.149s, Prediction Time: 53.768s label PolicyGradient, Reward 94: -164.729, Len(game): 391, Training Time: 52.149s, Prediction Time: 54.400s label PolicyGradient, Reward 95: -63.206, Len(game): 158, Training Time: 52.149s, Prediction Time: 54.655s label PolicyGradient, Reward 96: -70.394, Len(game): 321, Training Time: 52.149s, Prediction Time: 55.177s label PolicyGradient, Reward 97: 39.600, Len(game): 151, Training Time: 52.149s, Prediction Time: 55.425s label PolicyGradient, Reward 98: -199.498, Len(game): 300, Training Time: 52.149s, Prediction Time: 55.908s label PolicyGradient, Reward 99: -105.230, Len(game): 186, Training Time: 52.149s, Prediction Time: 56.213s label DQNAgent, Reward 0: -100.353, Len(game): 113, Training Time: 0.127s, Prediction Time: 0.004s label DQNAgent, Reward 1: -292.729, Len(game): 104, Training Time: 0.257s, Prediction Time: 0.009s label DQNAgent, Reward 2: -251.125, Len(game): 113, Training Time: 0.391s, Prediction Time: 0.015s label DQNAgent, Reward 3: -60.100, Len(game): 85, Training Time: 0.493s, Prediction Time: 0.019s label DQNAgent, Reward 4: -283.162, Len(game): 338, Training Time: 0.912s, Prediction Time: 0.039s label DQNAgent, Reward 5: -308.415, Len(game): 242, Training Time: 1.216s, Prediction Time: 0.056s label DQNAgent, Reward 6: 121.699, Len(game): 1173, Training Time: 2.656s, Prediction Time: 0.160s label DQNAgent, Reward 7: -104.280, Len(game): 585, Training Time: 3.388s, Prediction Time: 0.209s label DQNAgent, Reward 8: -248.759, Len(game): 1246, Training Time: 4.995s, Prediction Time: 0.315s label DQNAgent, Reward 9: -220.946, Len(game): 2000, Training Time: 7.546s, Prediction Time: 0.481s label DQNAgent, Reward 10: 208.528, Len(game): 644, Training Time: 8.384s, Prediction Time: 0.542s label DQNAgent, Reward 11: 209.618, Len(game): 363, Training Time: 8.854s, Prediction Time: 0.576s label DQNAgent, Reward 12: -98.795, Len(game): 137, Training Time: 9.031s, Prediction Time: 0.589s label DQNAgent, Reward 13: -21.690, Len(game): 146, Training Time: 9.219s, Prediction Time: 0.603s label DQNAgent, Reward 14: 209.860, Len(game): 365, Training Time: 9.696s, Prediction Time: 0.638s label DQNAgent, Reward 15: 150.999, Len(game): 536, Training Time: 10.412s, Prediction Time: 0.688s label DQNAgent, Reward 16: -114.041, Len(game): 210, Training Time: 10.682s, Prediction Time: 0.706s label DQNAgent, Reward 17: -60.068, Len(game): 110, Training Time: 10.827s, Prediction Time: 0.714s label DQNAgent, Reward 18: 177.521, Len(game): 239, Training Time: 11.141s, Prediction Time: 0.738s label DQNAgent, Reward 19: -150.340, Len(game): 2000, Training Time: 13.976s, Prediction Time: 0.939s label DQNAgent, Reward 20: -167.060, Len(game): 121, Training Time: 14.131s, Prediction Time: 0.949s label DQNAgent, Reward 21: 27.313, Len(game): 245, Training Time: 14.455s, Prediction Time: 0.971s label DQNAgent, Reward 22: -108.195, Len(game): 717, Training Time: 15.402s, Prediction Time: 1.036s label DQNAgent, Reward 23: -233.710, Len(game): 2000, Training Time: 18.018s, Prediction Time: 1.218s label DQNAgent, Reward 24: 119.225, Len(game): 852, Training Time: 19.131s, Prediction Time: 1.294s label DQNAgent, Reward 25: -83.181, Len(game): 395, Training Time: 19.649s, Prediction Time: 1.329s label DQNAgent, Reward 26: -157.777, Len(game): 1281, Training Time: 21.340s, Prediction Time: 1.445s label DQNAgent, Reward 27: -106.503, Len(game): 2000, Training Time: 23.965s, Prediction Time: 1.636s label DQNAgent, Reward 28: 94.857, Len(game): 950, Training Time: 25.228s, Prediction Time: 1.724s label DQNAgent, Reward 29: -262.092, Len(game): 2000, Training Time: 27.876s, Prediction Time: 1.899s label DQNAgent, Reward 30: -184.724, Len(game): 2000, Training Time: 30.531s, Prediction Time: 2.073s label DQNAgent, Reward 31: -179.814, Len(game): 2000, Training Time: 33.188s, Prediction Time: 2.250s label DQNAgent, Reward 32: -199.041, Len(game): 2000, Training Time: 35.850s, Prediction Time: 2.425s label DQNAgent, Reward 33: -188.248, Len(game): 2000, Training Time: 38.510s, Prediction Time: 2.602s label DQNAgent, Reward 34: 81.636, Len(game): 1669, Training Time: 40.770s, Prediction Time: 2.758s label DQNAgent, Reward 35: 234.293, Len(game): 420, Training Time: 41.335s, Prediction Time: 2.799s label DQNAgent, Reward 36: -208.190, Len(game): 2000, Training Time: 44.027s, Prediction Time: 2.976s label DQNAgent, Reward 37: -189.187, Len(game): 2000, Training Time: 46.709s, Prediction Time: 3.150s label DQNAgent, Reward 38: 195.834, Len(game): 748, Training Time: 47.724s, Prediction Time: 3.224s label DQNAgent, Reward 39: 209.305, Len(game): 479, Training Time: 48.367s, Prediction Time: 3.270s label DQNAgent, Reward 40: 231.972, Len(game): 450, Training Time: 48.979s, Prediction Time: 3.310s label DQNAgent, Reward 41: 239.903, Len(game): 344, Training Time: 49.447s, Prediction Time: 3.339s label DQNAgent, Reward 42: 105.298, Len(game): 2000, Training Time: 52.166s, Prediction Time: 3.541s label DQNAgent, Reward 43: 267.084, Len(game): 394, Training Time: 52.166s, Prediction Time: 3.578s label DQNAgent, Reward 44: 228.900, Len(game): 394, Training Time: 52.166s, Prediction Time: 3.615s label DQNAgent, Reward 45: 289.386, Len(game): 259, Training Time: 52.166s, Prediction Time: 3.639s label DQNAgent, Reward 46: 253.069, Len(game): 359, Training Time: 52.166s, Prediction Time: 3.672s label DQNAgent, Reward 47: -206.768, Len(game): 2000, Training Time: 52.166s, Prediction Time: 3.827s label DQNAgent, Reward 48: -207.327, Len(game): 2000, Training Time: 52.166s, Prediction Time: 3.973s label DQNAgent, Reward 49: 276.521, Len(game): 453, Training Time: 52.166s, Prediction Time: 4.007s label DQNAgent, Reward 50: -214.902, Len(game): 2000, Training Time: 52.166s, Prediction Time: 4.150s label DQNAgent, Reward 51: 208.379, Len(game): 427, Training Time: 52.166s, Prediction Time: 4.180s label DQNAgent, Reward 52: -195.631, Len(game): 765, Training Time: 52.166s, Prediction Time: 4.234s label DQNAgent, Reward 53: 215.616, Len(game): 514, Training Time: 52.166s, Prediction Time: 4.271s label DQNAgent, Reward 54: -197.951, Len(game): 2000, Training Time: 52.166s, Prediction Time: 4.414s label DQNAgent, Reward 55: -201.905, Len(game): 2000, Training Time: 52.166s, Prediction Time: 4.557s label DQNAgent, Reward 56: 291.084, Len(game): 260, Training Time: 52.166s, Prediction Time: 4.577s label DQNAgent, Reward 57: 223.495, Len(game): 585, Training Time: 52.166s, Prediction Time: 4.619s label DQNAgent, Reward 58: -190.981, Len(game): 2000, Training Time: 52.166s, Prediction Time: 4.763s label DQNAgent, Reward 59: -76.292, Len(game): 192, Training Time: 52.166s, Prediction Time: 4.777s label DQNAgent, Reward 60: 221.002, Len(game): 401, Training Time: 52.166s, Prediction Time: 4.808s label DQNAgent, Reward 61: 232.984, Len(game): 419, Training Time: 52.166s, Prediction Time: 4.838s label DQNAgent, Reward 62: -206.538, Len(game): 2000, Training Time: 52.166s, Prediction Time: 4.980s label DQNAgent, Reward 63: 240.155, Len(game): 438, Training Time: 52.166s, Prediction Time: 5.013s label DQNAgent, Reward 64: -226.895, Len(game): 2000, Training Time: 52.166s, Prediction Time: 5.157s label DQNAgent, Reward 65: 259.743, Len(game): 419, Training Time: 52.166s, Prediction Time: 5.188s label DQNAgent, Reward 66: 245.651, Len(game): 374, Training Time: 52.166s, Prediction Time: 5.215s label DQNAgent, Reward 67: 265.587, Len(game): 431, Training Time: 52.166s, Prediction Time: 5.247s label DQNAgent, Reward 68: -180.004, Len(game): 842, Training Time: 52.166s, Prediction Time: 5.306s label DQNAgent, Reward 69: -25.527, Len(game): 164, Training Time: 52.166s, Prediction Time: 5.318s label DQNAgent, Reward 70: -100.565, Len(game): 201, Training Time: 52.166s, Prediction Time: 5.331s label DQNAgent, Reward 71: -216.997, Len(game): 2000, Training Time: 52.166s, Prediction Time: 5.476s label DQNAgent, Reward 72: 246.259, Len(game): 388, Training Time: 52.166s, Prediction Time: 5.506s label DQNAgent, Reward 73: -119.716, Len(game): 584, Training Time: 52.166s, Prediction Time: 5.547s label DQNAgent, Reward 74: 258.951, Len(game): 357, Training Time: 52.166s, Prediction Time: 5.572s label DQNAgent, Reward 75: 213.073, Len(game): 399, Training Time: 52.166s, Prediction Time: 5.603s label DQNAgent, Reward 76: 189.632, Len(game): 838, Training Time: 52.166s, Prediction Time: 5.661s label DQNAgent, Reward 77: -241.961, Len(game): 2000, Training Time: 52.166s, Prediction Time: 5.805s label DQNAgent, Reward 78: 75.758, Len(game): 1708, Training Time: 52.166s, Prediction Time: 5.943s label DQNAgent, Reward 79: -227.349, Len(game): 2000, Training Time: 52.166s, Prediction Time: 6.086s label DQNAgent, Reward 80: -245.627, Len(game): 2000, Training Time: 52.166s, Prediction Time: 6.234s label DQNAgent, Reward 81: 207.837, Len(game): 460, Training Time: 52.166s, Prediction Time: 6.267s label DQNAgent, Reward 82: 272.286, Len(game): 478, Training Time: 52.166s, Prediction Time: 6.303s label DQNAgent, Reward 83: 169.565, Len(game): 642, Training Time: 52.166s, Prediction Time: 6.351s label DQNAgent, Reward 84: -222.107, Len(game): 2000, Training Time: 52.166s, Prediction Time: 6.494s label DQNAgent, Reward 85: 200.884, Len(game): 511, Training Time: 52.166s, Prediction Time: 6.533s label DQNAgent, Reward 86: 265.252, Len(game): 400, Training Time: 52.166s, Prediction Time: 6.562s label DQNAgent, Reward 87: -214.872, Len(game): 2000, Training Time: 52.166s, Prediction Time: 6.705s label DQNAgent, Reward 88: -190.053, Len(game): 2000, Training Time: 52.166s, Prediction Time: 6.844s label DQNAgent, Reward 89: 240.586, Len(game): 479, Training Time: 52.166s, Prediction Time: 6.878s label DQNAgent, Reward 90: -192.312, Len(game): 2000, Training Time: 52.166s, Prediction Time: 7.020s label DQNAgent, Reward 91: 219.191, Len(game): 448, Training Time: 52.166s, Prediction Time: 7.052s label DQNAgent, Reward 92: 3.452, Len(game): 1716, Training Time: 52.166s, Prediction Time: 7.176s label DQNAgent, Reward 93: -213.273, Len(game): 2000, Training Time: 52.166s, Prediction Time: 7.320s label DQNAgent, Reward 94: -243.764, Len(game): 2000, Training Time: 52.166s, Prediction Time: 7.461s label DQNAgent, Reward 95: -194.640, Len(game): 2000, Training Time: 52.166s, Prediction Time: 7.602s label DQNAgent, Reward 96: 201.674, Len(game): 444, Training Time: 52.166s, Prediction Time: 7.634s label DQNAgent, Reward 97: 279.104, Len(game): 193, Training Time: 52.166s, Prediction Time: 7.649s label DQNAgent, Reward 98: 254.010, Len(game): 355, Training Time: 52.166s, Prediction Time: 7.675s label DQNAgent, Reward 99: -217.626, Len(game): 2000, Training Time: 52.166s, Prediction Time: 7.818s Computed global error Bellman mean: 0.4570717447881489 iter: 5 Computed global error Bellman mean: 0.06250832986135778 iter: 4 label KQLearning, Reward 0: -94.652, Len(game): 117, Training Time: 0.138s, Prediction Time: 0.003s Computed global error Bellman mean: 0.3264326785246641 iter: 5 Computed global error Bellman mean: 0.06250832986135778 iter: 0 Computed global error Bellman mean: 0.006574753577857203 iter: 5 label KQLearning, Reward 1: -85.618, Len(game): 90, Training Time: 0.278s, Prediction Time: 0.028s Computed global error Bellman mean: 2.9554992113685035e-07 iter: 4 label KQLearning, Reward 2: -651.926, Len(game): 77, Training Time: 0.300s, Prediction Time: 0.103s Computed global error Bellman mean: 8.383290537888723e-08 iter: 3 label KQLearning, Reward 3: -220.218, Len(game): 101, Training Time: 0.326s, Prediction Time: 0.202s Computed global error Bellman mean: 5.367322145548921e-07 iter: 5 label KQLearning, Reward 4: -289.222, Len(game): 137, Training Time: 0.398s, Prediction Time: 0.378s Computed global error Bellman mean: 2.105239215464313 iter: 5 Computed global error Bellman mean: 0.08960391448271526 iter: 5 label KQLearning, Reward 5: -403.410, Len(game): 129, Training Time: 0.572s, Prediction Time: 0.606s Computed global error Bellman mean: 0.0012465311615857508 iter: 5 Computed global error Bellman mean: 0.018157787194138943 iter: 5 label KQLearning, Reward 6: -706.512, Len(game): 331, Training Time: 1.074s, Prediction Time: 1.260s Computed global error Bellman mean: 7.564830843865642e-07 iter: 5 Computed global error Bellman mean: 0.018157787194138943 iter: 0 label KQLearning, Reward 7: -115.241, Len(game): 157, Training Time: 1.193s, Prediction Time: 1.671s Computed global error Bellman mean: 9.80103593083591e-07 iter: 4 label KQLearning, Reward 8: -11.041, Len(game): 94, Training Time: 1.225s, Prediction Time: 1.953s Computed global error Bellman mean: 4.197550024370722e-07 iter: 5 label KQLearning, Reward 9: -19.129, Len(game): 128, Training Time: 1.290s, Prediction Time: 2.350s Computed global error Bellman mean: 2.0669793039081446e-07 iter: 5 label KQLearning, Reward 10: -301.420, Len(game): 117, Training Time: 1.354s, Prediction Time: 2.766s Computed global error Bellman mean: 0.08373126374953485 iter: 5 Computed global error Bellman mean: 0.05462393912900874 iter: 5 label KQLearning, Reward 11: -42.674, Len(game): 78, Training Time: 1.421s, Prediction Time: 3.079s Computed global error Bellman mean: 0.5468703081806887 iter: 5 Computed global error Bellman mean: 0.028781752528066643 iter: 5 Computed global error Bellman mean: 0.03715632726194866 iter: 5 label KQLearning, Reward 12: -109.428, Len(game): 82, Training Time: 1.549s, Prediction Time: 3.397s Computed global error Bellman mean: 8.808398602292144e-08 iter: 3 Computed global error Bellman mean: 0.012347614152382254 iter: 5 Computed global error Bellman mean: 0.0016687136170853616 iter: 5 label KQLearning, Reward 13: -86.860, Len(game): 73, Training Time: 1.654s, Prediction Time: 3.718s Computed global error Bellman mean: 1.4648319706077046e-07 iter: 3 Computed global error Bellman mean: 0.0062496450771168345 iter: 3 label KQLearning, Reward 14: -111.675, Len(game): 93, Training Time: 1.717s, Prediction Time: 4.127s Computed global error Bellman mean: 5.1603041547531387e-05 iter: 5 label KQLearning, Reward 15: -292.237, Len(game): 139, Training Time: 1.791s, Prediction Time: 4.771s Computed global error Bellman mean: 0.01138081093322849 iter: 5 Computed global error Bellman mean: 0.030162712067387184 iter: 0 label KQLearning, Reward 16: -175.876, Len(game): 123, Training Time: 1.877s, Prediction Time: 5.403s Computed global error Bellman mean: 0.03295929019194882 iter: 5 Computed global error Bellman mean: 0.02128038177079849 iter: 5 label KQLearning, Reward 17: -90.069, Len(game): 53, Training Time: 1.919s, Prediction Time: 5.697s Computed global error Bellman mean: 1.4094325009420564e-08 iter: 4 Computed global error Bellman mean: 0.017127490517891347 iter: 5 label KQLearning, Reward 18: -116.886, Len(game): 86, Training Time: 1.975s, Prediction Time: 6.162s Computed global error Bellman mean: 5.740827606455394e-08 iter: 5 label KQLearning, Reward 19: -144.746, Len(game): 78, Training Time: 2.009s, Prediction Time: 6.624s Computed global error Bellman mean: 0.022450273448984336 iter: 5 Computed global error Bellman mean: 0.043383487625418306 iter: 1 label KQLearning, Reward 20: -201.620, Len(game): 128, Training Time: 2.131s, Prediction Time: 7.407s Computed global error Bellman mean: 1.9583929515387393e-07 iter: 5 label KQLearning, Reward 21: -125.644, Len(game): 117, Training Time: 2.186s, Prediction Time: 8.182s Computed global error Bellman mean: 0.05815241736539713 iter: 5 Computed global error Bellman mean: 0.02677875059985211 iter: 5 label KQLearning, Reward 22: -217.014, Len(game): 118, Training Time: 2.323s, Prediction Time: 8.978s Computed global error Bellman mean: 2.530994727251449e-08 iter: 3 Computed global error Bellman mean: 0.02677600244381529 iter: 2 label KQLearning, Reward 23: -123.391, Len(game): 57, Training Time: 2.398s, Prediction Time: 9.386s Computed global error Bellman mean: 3.548205378400837e-07 iter: 5 label KQLearning, Reward 24: -2.588, Len(game): 92, Training Time: 2.436s, Prediction Time: 10.063s Computed global error Bellman mean: 4.144973016135494e-07 iter: 5 label KQLearning, Reward 25: -97.150, Len(game): 70, Training Time: 2.464s, Prediction Time: 10.591s Computed global error Bellman mean: 0.6790841272588155 iter: 5 Computed global error Bellman mean: 0.01880443414888047 iter: 5 label KQLearning, Reward 26: -129.596, Len(game): 118, Training Time: 2.606s, Prediction Time: 11.489s Computed global error Bellman mean: 6.261852524217356e-07 iter: 5 Computed global error Bellman mean: 0.01880443414888047 iter: 0 label KQLearning, Reward 27: -157.306, Len(game): 140, Training Time: 2.706s, Prediction Time: 12.604s Computed global error Bellman mean: 3.1689305608928463e-07 iter: 4 label KQLearning, Reward 28: -119.922, Len(game): 75, Training Time: 2.736s, Prediction Time: 13.218s Computed global error Bellman mean: 0.009533756980243 iter: 5 label KQLearning, Reward 29: 39.082, Len(game): 122, Training Time: 2.798s, Prediction Time: 14.270s Computed global error Bellman mean: 0.003404561565090435 iter: 5 label KQLearning, Reward 30: -234.000, Len(game): 158, Training Time: 2.890s, Prediction Time: 15.667s Computed global error Bellman mean: 0.03960780091341623 iter: 5 Computed global error Bellman mean: 0.009449045477446339 iter: 5 label KQLearning, Reward 31: -117.435, Len(game): 144, Training Time: 3.122s, Prediction Time: 17.007s Computed global error Bellman mean: 0.18783579814596527 iter: 5 Computed global error Bellman mean: 0.02035434571222894 iter: 5 label KQLearning, Reward 32: -319.152, Len(game): 71, Training Time: 3.187s, Prediction Time: 17.696s Computed global error Bellman mean: 0.0154158768209216 iter: 5 Computed global error Bellman mean: 0.020309987993327658 iter: 1 Computed global error Bellman mean: 0.0073802352033646304 iter: 5 label KQLearning, Reward 33: -119.783, Len(game): 74, Training Time: 3.281s, Prediction Time: 18.423s Computed global error Bellman mean: 3.162234209349161e-07 iter: 5 label KQLearning, Reward 34: -53.407, Len(game): 77, Training Time: 3.315s, Prediction Time: 19.196s Computed global error Bellman mean: 6.136186387437214e-07 iter: 5 label KQLearning, Reward 35: -60.238, Len(game): 97, Training Time: 3.359s, Prediction Time: 20.199s Computed global error Bellman mean: 5.47988723438321e-05 iter: 5 label KQLearning, Reward 36: -116.926, Len(game): 89, Training Time: 3.405s, Prediction Time: 21.140s Computed global error Bellman mean: 1.208702274299961e-07 iter: 5 label KQLearning, Reward 37: -84.143, Len(game): 97, Training Time: 3.451s, Prediction Time: 22.185s Computed global error Bellman mean: 1.0152060020348307e-07 iter: 5 label KQLearning, Reward 38: -146.181, Len(game): 79, Training Time: 3.485s, Prediction Time: 23.051s Computed global error Bellman mean: 7.532708188289975e-08 iter: 3 label KQLearning, Reward 39: -121.399, Len(game): 79, Training Time: 3.512s, Prediction Time: 23.937s Computed global error Bellman mean: 3.2772637445152575e-07 iter: 5 label KQLearning, Reward 40: -29.127, Len(game): 128, Training Time: 3.578s, Prediction Time: 25.412s Computed global error Bellman mean: 0.0002285378999159503 iter: 5 label KQLearning, Reward 41: -164.180, Len(game): 157, Training Time: 3.666s, Prediction Time: 27.241s Computed global error Bellman mean: 0.0021274911285027523 iter: 5 label KQLearning, Reward 42: -128.157, Len(game): 66, Training Time: 3.693s, Prediction Time: 28.052s Computed global error Bellman mean: 0.15732504860907268 iter: 5 Computed global error Bellman mean: 0.007424960495000498 iter: 5 label KQLearning, Reward 43: -309.978, Len(game): 103, Training Time: 3.808s, Prediction Time: 29.293s Computed global error Bellman mean: 2.840977078238798e-07 iter: 5 label KQLearning, Reward 44: -42.808, Len(game): 119, Training Time: 3.858s, Prediction Time: 30.801s Computed global error Bellman mean: 0.00030923682043483086 iter: 5 label KQLearning, Reward 45: -205.911, Len(game): 218, Training Time: 4.032s, Prediction Time: 33.577s Computed global error Bellman mean: 2.1282839249811437e-07 iter: 5 label KQLearning, Reward 46: -203.133, Len(game): 115, Training Time: 4.089s, Prediction Time: 35.100s Computed global error Bellman mean: 5.204880500451022e-07 iter: 5 label KQLearning, Reward 47: -84.245, Len(game): 171, Training Time: 4.192s, Prediction Time: 37.424s Computed global error Bellman mean: 2.503797588159602e-07 iter: 4 label KQLearning, Reward 48: -18.125, Len(game): 107, Training Time: 4.237s, Prediction Time: 38.926s Computed global error Bellman mean: 0.011789465241330416 iter: 5 Computed global error Bellman mean: 0.05543863172482571 iter: 0 label KQLearning, Reward 49: -110.104, Len(game): 123, Training Time: 4.322s, Prediction Time: 40.700s Computed global error Bellman mean: 0.1959760255735043 iter: 5 Computed global error Bellman mean: 0.008643040231840576 iter: 5 label KQLearning, Reward 50: -98.471, Len(game): 52, Training Time: 4.363s, Prediction Time: 41.465s Computed global error Bellman mean: 2.7521039868641024e-08 iter: 4 label KQLearning, Reward 51: -178.284, Len(game): 73, Training Time: 4.388s, Prediction Time: 42.550s Computed global error Bellman mean: 0.0004555004023591073 iter: 5 label KQLearning, Reward 52: -146.419, Len(game): 179, Training Time: 4.499s, Prediction Time: 45.209s Computed global error Bellman mean: 1.0163691674797325e-07 iter: 4 label KQLearning, Reward 53: -121.030, Len(game): 89, Training Time: 4.536s, Prediction Time: 46.580s Computed global error Bellman mean: 0.012641620007352687 iter: 5 Computed global error Bellman mean: 0.046800591951224296 iter: 0 label KQLearning, Reward 54: -97.419, Len(game): 140, Training Time: 4.646s, Prediction Time: 48.763s Computed global error Bellman mean: 3.572373777531067e-05 iter: 5 label KQLearning, Reward 55: -501.496, Len(game): 176, Training Time: 4.744s, Prediction Time: 51.561s Computed global error Bellman mean: 5.092050690066715e-07 iter: 4 label KQLearning, Reward 56: -156.934, Len(game): 119, Training Time: 4.792s, Prediction Time: 53.510s Computed global error Bellman mean: 1.3249586081752544e-07 iter: 4 label KQLearning, Reward 57: -59.142, Len(game): 101, Training Time: 4.832s, Prediction Time: 55.197s Computed global error Bellman mean: 0.0008537668046471069 iter: 5 label KQLearning, Reward 58: -59.348, Len(game): 104, Training Time: 4.881s, Prediction Time: 56.964s Computed global error Bellman mean: 0.0003649535757385929 iter: 5 label KQLearning, Reward 59: -90.430, Len(game): 105, Training Time: 4.924s, Prediction Time: 58.753s Computed global error Bellman mean: 9.117884074816971e-06 iter: 5 label KQLearning, Reward 60: -155.125, Len(game): 162, Training Time: 5.018s, Prediction Time: 61.601s Computed global error Bellman mean: 1.0825032417671743e-07 iter: 5 label KQLearning, Reward 61: -82.988, Len(game): 96, Training Time: 5.060s, Prediction Time: 63.307s Computed global error Bellman mean: 0.0013346809601749632 iter: 5 label KQLearning, Reward 62: -222.612, Len(game): 139, Training Time: 5.138s, Prediction Time: 65.804s Computed global error Bellman mean: 5.090026332355291e-07 iter: 5 label KQLearning, Reward 63: -152.451, Len(game): 154, Training Time: 5.220s, Prediction Time: 68.643s Computed global error Bellman mean: 0.004513168539569411 iter: 5 label KQLearning, Reward 64: -294.110, Len(game): 143, Training Time: 5.300s, Prediction Time: 71.337s Computed global error Bellman mean: 1.9014556235134658e-07 iter: 4 label KQLearning, Reward 65: -123.407, Len(game): 149, Training Time: 5.373s, Prediction Time: 74.203s Computed global error Bellman mean: 2.484734640619861e-07 iter: 5 label KQLearning, Reward 66: -105.602, Len(game): 118, Training Time: 5.434s, Prediction Time: 76.510s Computed global error Bellman mean: 4.619221917740615e-07 iter: 5 label KQLearning, Reward 67: -116.142, Len(game): 115, Training Time: 5.491s, Prediction Time: 78.795s Computed global error Bellman mean: 0.001422132753413292 iter: 5 label KQLearning, Reward 68: -93.970, Len(game): 98, Training Time: 5.537s, Prediction Time: 80.762s Computed global error Bellman mean: 2.61457428500898e-07 iter: 5 label KQLearning, Reward 69: -22.622, Len(game): 93, Training Time: 5.577s, Prediction Time: 82.677s Computed global error Bellman mean: 0.0025050210505242108 iter: 5 label KQLearning, Reward 70: -5.392, Len(game): 119, Training Time: 5.637s, Prediction Time: 85.150s Computed global error Bellman mean: 3.583561777771582e-07 iter: 5 label KQLearning, Reward 71: -129.371, Len(game): 128, Training Time: 5.701s, Prediction Time: 87.859s Computed global error Bellman mean: 0.004820714404071528 iter: 5 label KQLearning, Reward 72: -72.912, Len(game): 108, Training Time: 5.751s, Prediction Time: 90.181s Computed global error Bellman mean: 9.461471605248522e-05 iter: 5 label KQLearning, Reward 73: -105.822, Len(game): 122, Training Time: 5.811s, Prediction Time: 92.829s Computed global error Bellman mean: 0.09486958154788548 iter: 5 Computed global error Bellman mean: 0.0016687594018044314 iter: 1 label KQLearning, Reward 74: -94.668, Len(game): 59, Training Time: 5.846s, Prediction Time: 94.156s Computed global error Bellman mean: 0.00031950302598816345 iter: 5 label KQLearning, Reward 75: -181.803, Len(game): 103, Training Time: 5.892s, Prediction Time: 96.432s Computed global error Bellman mean: 0.011202575501709945 iter: 5 Computed global error Bellman mean: 0.013921933623654334 iter: 5 label KQLearning, Reward 76: -150.242, Len(game): 155, Training Time: 6.137s, Prediction Time: 99.916s Computed global error Bellman mean: 0.021038602882958946 iter: 5 Computed global error Bellman mean: 0.09897872040415234 iter: 0 label KQLearning, Reward 77: -13.402, Len(game): 370, Training Time: 6.852s, Prediction Time: 108.332s Computed global error Bellman mean: 0.007980741227699435 iter: 5 label KQLearning, Reward 78: -163.447, Len(game): 100, Training Time: 6.899s, Prediction Time: 110.691s Computed global error Bellman mean: 1.8182842907339178e-07 iter: 5 label KQLearning, Reward 79: -133.992, Len(game): 138, Training Time: 6.973s, Prediction Time: 113.949s Computed global error Bellman mean: 0.05116569406321372 iter: 5 Computed global error Bellman mean: 0.0012370798462112086 iter: 5 label KQLearning, Reward 80: -196.670, Len(game): 200, Training Time: 7.398s, Prediction Time: 118.742s Computed global error Bellman mean: 0.00019546730587924641 iter: 5 label KQLearning, Reward 81: -35.824, Len(game): 108, Training Time: 7.452s, Prediction Time: 121.398s Computed global error Bellman mean: 3.0157625469636436e-07 iter: 5 label KQLearning, Reward 82: -111.068, Len(game): 113, Training Time: 7.509s, Prediction Time: 124.204s Computed global error Bellman mean: 0.012549751185760634 iter: 5 Computed global error Bellman mean: 0.02876021067227448 iter: 1 label KQLearning, Reward 83: -98.277, Len(game): 117, Training Time: 7.606s, Prediction Time: 127.154s Computed global error Bellman mean: 0.0021161812979306215 iter: 5 label KQLearning, Reward 84: -31.986, Len(game): 100, Training Time: 7.647s, Prediction Time: 129.722s Computed global error Bellman mean: 0.007686386470016417 iter: 5 label KQLearning, Reward 85: -129.342, Len(game): 199, Training Time: 7.796s, Prediction Time: 134.826s Computed global error Bellman mean: 0.006022229526358748 iter: 5 label KQLearning, Reward 86: -33.197, Len(game): 215, Training Time: 7.971s, Prediction Time: 140.428s Computed global error Bellman mean: 0.024062407919038683 iter: 5 Computed global error Bellman mean: 0.05594900899344003 iter: 0 label KQLearning, Reward 87: -72.977, Len(game): 93, Training Time: 8.021s, Prediction Time: 142.897s Computed global error Bellman mean: 0.004463153589297446 iter: 5 label KQLearning, Reward 88: -14.426, Len(game): 126, Training Time: 8.086s, Prediction Time: 146.300s Computed global error Bellman mean: 2.2844102611080386e-07 iter: 5 label KQLearning, Reward 89: -162.264, Len(game): 160, Training Time: 8.179s, Prediction Time: 150.642s Computed global error Bellman mean: 6.328115239436929e-07 iter: 5 label KQLearning, Reward 90: -121.643, Len(game): 119, Training Time: 8.238s, Prediction Time: 153.919s Computed global error Bellman mean: 0.0013526667514851798 iter: 5 label KQLearning, Reward 91: -170.828, Len(game): 130, Training Time: 8.303s, Prediction Time: 157.533s Computed global error Bellman mean: 3.914867129444644e-07 iter: 5 label KQLearning, Reward 92: -127.942, Len(game): 242, Training Time: 8.512s, Prediction Time: 164.247s Computed global error Bellman mean: 0.014867821455283158 iter: 5 Computed global error Bellman mean: 0.001951332574971139 iter: 5 label KQLearning, Reward 93: -23.825, Len(game): 88, Training Time: 8.600s, Prediction Time: 166.739s Computed global error Bellman mean: 0.0022563746800983893 iter: 5 label KQLearning, Reward 94: -79.366, Len(game): 113, Training Time: 8.654s, Prediction Time: 169.979s Computed global error Bellman mean: 0.007368897319150435 iter: 5 label KQLearning, Reward 95: -106.928, Len(game): 112, Training Time: 8.704s, Prediction Time: 173.217s Computed global error Bellman mean: 0.4083079100777788 iter: 5 Computed global error Bellman mean: 0.04476234941152717 iter: 2 label KQLearning, Reward 96: -128.432, Len(game): 88, Training Time: 8.774s, Prediction Time: 175.802s Computed global error Bellman mean: 0.022882453257802766 iter: 5 Computed global error Bellman mean: 0.04476234941152717 iter: 0 Computed global error Bellman mean: 0.01976457817603097 iter: 3 label KQLearning, Reward 97: -123.442, Len(game): 255, Training Time: 9.365s, Prediction Time: 183.280s Computed global error Bellman mean: 3.740304578463439e-07 iter: 5 label KQLearning, Reward 98: -221.741, Len(game): 189, Training Time: 9.506s, Prediction Time: 188.951s Computed global error Bellman mean: 0.032296455106274734 iter: 5 Computed global error Bellman mean: 0.0494154502474197 iter: 1 label KQLearning, Reward 99: -204.370, Len(game): 97, Training Time: 9.580s, Prediction Time: 191.913s 2 .. rst-class:: sphx-glr-timing **Total running time of the script:** (21 minutes 1.483 seconds) .. _sphx_glr_download_auto_ch8_ch8_lunarlander.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: ch8_lunarlander.ipynb ` .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: ch8_lunarlander.py ` .. container:: sphx-glr-download sphx-glr-download-zip :download:`Download zipped: ch8_lunarlander.zip ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_