.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "auto_ch8\ch8_cartpole.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note :ref:`Go to the end ` to download the full example code. .. rst-class:: sphx-glr-example-title .. _sphx_glr_auto_ch8_ch8_cartpole.py: ========================== 8.2 Experiments - Cartpole ========================== We use the OpenAI Gym library to instanciate the gymnasium CartPole-v1 environment and reproduce the figure from chapter 8_XXX. We train the following agents: - PPO - DQN - Controller-based - Kernel Actor-Critic - Kernel Q-Learning - Kernel Q-Learning HJB - Kernel Policy-Gradient We show how you can tweak some methods in each algorithm to tune them to the environment. For a detailed documentation on KAgents, see **codpy documentation**. .. GENERATED FROM PYTHON SOURCE LINES 19-31 .. code-block:: Python # Importing necessary modules import sys from matplotlib import pyplot as plt import numpy as np import codpy.core as core import codpy.KQLearning as KQLearning from ignore_utils import * .. GENERATED FROM PYTHON SOURCE LINES 32-34 KQLearning ------------------------ .. GENERATED FROM PYTHON SOURCE LINES 34-83 .. code-block:: Python class KQLearningCP(KQLearning.KQLearning): def format(self, sarsd, max_training_game_size=None, **kwargs): """ In Cartpole, we only want to keep a certain amount of timesteps for each episode. The original format approach keep all the data. """ states, actions, next_states, rewards, dones = [ core.get_matrix(e) for e in sarsd ] actions = KQLearning.rl_hot_encoder(actions, self.actions_dim) returns = self.compute_returns( states, actions, next_states, rewards, dones, **kwargs ) dones = core.get_matrix(dones, dtype=bool) if max_training_game_size is not None: states, actions, next_states, rewards, returns, dones = ( states[:max_training_game_size], actions[:max_training_game_size], next_states[:max_training_game_size], rewards[:max_training_game_size], returns[:max_training_game_size], dones[:max_training_game_size], ) return states, actions, next_states, rewards, returns, dones def train(self, game, max_training_game_size =sys.maxsize,tol=1e-4,**kwargs): """ In cartpole we don't want clustering so we override the train method. """ states, actions, next_states, rewards, dones = game # In cartpole we skip training if we already solved the environment. if len(states) >= kwargs.get("max_game", 1e12): print("no training") return states, actions, next_states, rewards, returns, dones = self.format(game, max_training_game_size=max_training_game_size,**kwargs) if self.critic.is_valid(): returns = self.critic(np.concatenate([states,actions],axis=1)) self.replay_buffer.push(states, actions, next_states, rewards, returns, dones) games = self.replay_buffer.memory # self.critic here is a kernel, and it fit on the entire replay buffer to solve for Bellman equations. self.critic = self.optimal_states_values_function(games,verbose=True,**kwargs) return .. GENERATED FROM PYTHON SOURCE LINES 84-86 PolicyGradient ------------------------ .. GENERATED FROM PYTHON SOURCE LINES 86-120 .. code-block:: Python class PolicyGradientCP(KQLearning.PolicyGradient): def format(self, sarsd, max_training_game_size=None, **kwargs): states, actions, next_states, rewards, dones = [ core.get_matrix(e) for e in sarsd ] actions = KQLearning.rl_hot_encoder(actions, self.actions_dim) returns = self.compute_returns( states, actions, next_states, rewards, dones, **kwargs ) dones = core.get_matrix(dones, dtype=bool) if max_training_game_size is not None: states, actions, next_states, rewards, returns, dones = ( states[:max_training_game_size], actions[:max_training_game_size], next_states[:max_training_game_size], rewards[:max_training_game_size], returns[:max_training_game_size], dones[:max_training_game_size], ) return states, actions, next_states, rewards, returns, dones def train(self, game, **kwargs): states, actions, next_states, rewards, dones = game if len(states) >= kwargs.get("max_game", 1e12): print("no training") return super().train(game,clip=1., **kwargs) .. GENERATED FROM PYTHON SOURCE LINES 121-123 KActorCritic ------------------------ .. GENERATED FROM PYTHON SOURCE LINES 123-167 .. code-block:: Python class KActorCriticCP(KQLearning.KActorCritic): def format(self, sarsd, max_training_game_size=None, **kwargs): """ Format the game data by keeping only up to max_trainin_game_size timesteps. Parameters: - sarsd: tuple collection of game data (states, actions, next_states, rewards, dones). - max_training_game_size: maximum number of timesteps to keep for training. Returns: - states, actions, next_states, rewards, returns, dones: formatted game data. """ states, actions, next_states, rewards, dones = [ core.get_matrix(e) for e in sarsd ] actions = KQLearning.rl_hot_encoder(actions, self.actions_dim) returns = self.compute_returns( states, actions, next_states, rewards, dones, **kwargs ) dones = core.get_matrix(dones, dtype=bool) if max_training_game_size is not None: states, actions, next_states, rewards, returns, dones = ( states[:max_training_game_size], actions[:max_training_game_size], next_states[:max_training_game_size], rewards[:max_training_game_size], returns[:max_training_game_size], dones[:max_training_game_size], ) return states, actions, next_states, rewards, returns, dones def train(self, game, **kwargs): """ Skips training if the game was too long. (for cartpole, this means we already solved the environment.) """ states, actions, next_states, rewards, dones = game if len(states) >= kwargs.get("max_game", 1e12): print("no training") return super().train(game, clip=1.,**kwargs) .. GENERATED FROM PYTHON SOURCE LINES 168-170 HJB ------------------------ .. GENERATED FROM PYTHON SOURCE LINES 170-215 .. code-block:: Python class KQLearningHJBCP(KQLearning.KQLearningHJB): def format(self, sarsd, max_training_game_size=None, **kwargs): states, actions, next_states, rewards, dones = [ core.get_matrix(e) for e in sarsd ] actions = KQLearning.rl_hot_encoder(actions, self.actions_dim) returns = self.compute_returns( states, actions, next_states, rewards, dones, **kwargs ) dones = core.get_matrix(dones, dtype=bool) if max_training_game_size is not None: states, actions, next_states, rewards, returns, dones = ( states[:max_training_game_size], actions[:max_training_game_size], next_states[:max_training_game_size], rewards[:max_training_game_size], returns[:max_training_game_size], dones[:max_training_game_size], ) return states, actions, next_states, rewards, returns, dones def train(self, game, max_training_game_size =sys.maxsize,tol=1e-4,**kwargs): states, actions, next_states, rewards, dones = game if len(states) >= kwargs.get("max_game", 1e12): print("no training") return states, actions, next_states, rewards, returns, dones = self.format(game, max_training_game_size=max_training_game_size,**kwargs) self.replay_buffer.push(states, actions, next_states, rewards, returns, dones) games = self.replay_buffer.memory states, actions, next_states, rewards, returns, dones = games if self.critic.is_valid(): #This function returns False if the kernel hasn't be properly initialized, i.e x and fx haven't been set. # We compute returns using the critic instead of MC returns. returns = self.critic(np.concatenate([states,actions],axis=1)) games = states, actions, next_states, rewards, returns, dones self.critic = self.optimal_states_values_function(games,verbose=True,**kwargs) return .. GENERATED FROM PYTHON SOURCE LINES 216-218 KController ------------------------ .. GENERATED FROM PYTHON SOURCE LINES 218-387 .. code-block:: Python class heuristic_ControllerCP: """ This class defines an expert-based heuristic controller for the CartPole environment. """ # This is the number of parameters to be optimized dim = 4 def __init__(self, w=None, **kwargs): if w is None: self.w = np.ones([self.dim]) * 0.5 else: self.w = w pass def get_distribution(self): """ This will be called by the optimizer. You need to define a way to sample from the parameters distribution, and get the support. """ class uniform: def __init__(self, shape1): self.shape1 = shape1 def __call__(self, n): return 2 * np.random.uniform(size=[n, self.shape1]) - 1 def support(self, v): return v return uniform(self.w.shape[0]) def get_thetas(self): return self.w def set_thetas(self, w): self.w = w.flatten() def __call__(self, s, **kwargs): """ Will be used to make inference. This is where you define the action to be taken. Parameters: - s : state of the environment, a numpy array of shape (n, state_dim). Returns: - prod: int, action to be taken """ prod = (self.w * s).sum() prod = int((np.sign(prod) + 1) / 2) return prod class KControllerCP(KQLearning.KController): """ This is the main class which will optimize the heuristic controller. """ def __init__(self, state_dim, actions_dim, **kwargs): # This is where you would pass any other custom controller controller = heuristic_ControllerCP(state_dim=state_dim, **kwargs) super().__init__(state_dim, actions_dim, controller, **kwargs) def get_function(self, **kwargs): """ The optimizer will find the best parameters which maximizes this function. This is where you would tweak the function to be maximized. """ self.expectation_estimator = self.get_expectation_estimator(self.x, self.y, **kwargs) def function(x): expectation = self.expectation_estimator(x) distance = self.expectation_estimator.distance(x) return expectation * distance return function def format(self, sarsd, **kwargs): """ In the case of the controller, the agent only sees the sum of the rewards for an entire episode. All other game data won't be used for training. The format function still need to output a tuple. """ state, action, next_state, reward, done = [ core.get_matrix(e) for e in sarsd ] reward[done.astype(bool)] = 0 action = KQLearning.rl_hot_encoder(action, self.actions_dim) action = core.get_matrix(self.controller.get_thetas()).T done = core.get_matrix(done, dtype=bool) return ( core.get_matrix(state.mean(axis=0)).T, core.get_matrix(action.mean(axis=0)).T, core.get_matrix(next_state.mean(axis=0)).T, core.get_matrix(reward.sum(axis=0)).T, core.get_matrix(done.mean(axis=0)).T, ) def train(self, game, **kwargs): # Similarily, you can skip training if the game is too long to save training time. states, actions, next_states, rewards, dones = game if len(states) >= kwargs.get("max_game", 1e12): print("no training") return super().train(game, **kwargs) if __name__ == "__main__": # Define agents here, which will be trained in the benchmark. If game_dictionnary is empty, the benchmark will try to load data from the .pkl file game_dictionary = { "PPOAgent": PPOAgent, "PolicyGradient": PolicyGradientCP, "Controller-based": KControllerCP, "KACAgent": KActorCriticCP, "DQNAgent": DQNAgent, "KQLearningHJBCP": KQLearningHJBCP, "KQLearning": KQLearningCP, } # Define your agent's parameters here. This dict will be passed in each agent's __init__() method. extras = { # "D":4, "KActor": {"n_batch": 1000000, "max_nystrom": 1000, "reg": 1e-9, "order": None}, "KCritic": { "n_batch": 1000000, "max_nystrom": 1000, "reg": 1e-9, "order": None, }, "Rewards": { "n_batch": 1000000, "max_nystrom": 1000, "reg": 1e-9, "order": None, }, "DQNAgent": { # 'reward_function': mc_reward_function, "episodes": 500, "policy_param": 64, "target_param": 64, }, "KController": { "reg": 1e-3, "order": None, }, "HJBModel": { # "latent_shape":[100,50], "max_size": 100000, "n_batch": 1000000, "max_nystrom": 1000, "reg": 1e-9, "order": None, "state_dim": 4, }, "max_game": 1000, "max_training_game_size": 1000, "gamma": 0.99, "capacity": 200000000, # "seed": 42, } seed = extras.get("seed", None) np.random.seed(seed) Benchmark()( game_dictionary, "CartPole-v1", num_games=100, num_repeats=3, max_time=3, axis="episode", # file_name="results_CP_final.pkl", **extras, ) plt.show() pass .. rst-class:: sphx-glr-horizontal * .. image-sg:: /auto_ch8/images/sphx_glr_ch8_cartpole_001.png :alt: Cumulative Reward over 100 Games :srcset: /auto_ch8/images/sphx_glr_ch8_cartpole_001.png :class: sphx-glr-multi-img * .. image-sg:: /auto_ch8/images/sphx_glr_ch8_cartpole_002.png :alt: Training Time per Game over 100 Games :srcset: /auto_ch8/images/sphx_glr_ch8_cartpole_002.png :class: sphx-glr-multi-img .. rst-class:: sphx-glr-script-out .. code-block:: none label PPOAgent, Reward 0: 27.000, Len(game): 27, Training Time: 0.006s, Prediction Time: 0.006s label PPOAgent, Reward 1: 25.000, Len(game): 25, Training Time: 0.010s, Prediction Time: 0.010s label PPOAgent, Reward 2: 16.000, Len(game): 16, Training Time: 0.013s, Prediction Time: 0.013s label PPOAgent, Reward 3: 13.000, Len(game): 13, Training Time: 0.016s, Prediction Time: 0.016s label PPOAgent, Reward 4: 15.000, Len(game): 15, Training Time: 0.019s, Prediction Time: 0.019s label PPOAgent, Reward 5: 12.000, Len(game): 12, Training Time: 0.021s, Prediction Time: 0.021s label PPOAgent, Reward 6: 17.000, Len(game): 17, Training Time: 0.024s, Prediction Time: 0.024s label PPOAgent, Reward 7: 33.000, Len(game): 33, Training Time: 0.030s, Prediction Time: 0.030s label PPOAgent, Reward 8: 16.000, Len(game): 16, Training Time: 0.032s, Prediction Time: 0.032s label PPOAgent, Reward 9: 27.000, Len(game): 27, Training Time: 0.037s, Prediction Time: 0.037s label PPOAgent, Reward 10: 12.000, Len(game): 12, Training Time: 0.040s, Prediction Time: 0.040s label PPOAgent, Reward 11: 14.000, Len(game): 14, Training Time: 0.042s, Prediction Time: 0.042s label PPOAgent, Reward 12: 12.000, Len(game): 12, Training Time: 0.045s, Prediction Time: 0.045s label PPOAgent, Reward 13: 20.000, Len(game): 20, Training Time: 0.048s, Prediction Time: 0.048s label PPOAgent, Reward 14: 16.000, Len(game): 16, Training Time: 0.052s, Prediction Time: 0.052s label PPOAgent, Reward 15: 37.000, Len(game): 37, Training Time: 0.058s, Prediction Time: 0.058s label PPOAgent, Reward 16: 25.000, Len(game): 25, Training Time: 0.063s, Prediction Time: 0.063s label PPOAgent, Reward 17: 10.000, Len(game): 10, Training Time: 0.065s, Prediction Time: 0.065s label PPOAgent, Reward 18: 17.000, Len(game): 17, Training Time: 0.068s, Prediction Time: 0.068s label PPOAgent, Reward 19: 17.000, Len(game): 17, Training Time: 0.072s, Prediction Time: 0.072s label PPOAgent, Reward 20: 18.000, Len(game): 18, Training Time: 0.075s, Prediction Time: 0.075s label PPOAgent, Reward 21: 16.000, Len(game): 16, Training Time: 0.078s, Prediction Time: 0.078s label PPOAgent, Reward 22: 15.000, Len(game): 15, Training Time: 0.081s, Prediction Time: 0.081s label PPOAgent, Reward 23: 14.000, Len(game): 14, Training Time: 0.083s, Prediction Time: 0.083s label PPOAgent, Reward 24: 15.000, Len(game): 15, Training Time: 0.087s, Prediction Time: 0.087s label PPOAgent, Reward 25: 21.000, Len(game): 21, Training Time: 0.091s, Prediction Time: 0.091s label PPOAgent, Reward 26: 20.000, Len(game): 20, Training Time: 0.094s, Prediction Time: 0.094s label PPOAgent, Reward 27: 11.000, Len(game): 11, Training Time: 0.096s, Prediction Time: 0.096s label PPOAgent, Reward 28: 12.000, Len(game): 12, Training Time: 0.099s, Prediction Time: 0.099s label PPOAgent, Reward 29: 19.000, Len(game): 19, Training Time: 0.102s, Prediction Time: 0.102s label PPOAgent, Reward 30: 10.000, Len(game): 10, Training Time: 0.104s, Prediction Time: 0.104s label PPOAgent, Reward 31: 17.000, Len(game): 17, Training Time: 0.108s, Prediction Time: 0.108s label PPOAgent, Reward 32: 29.000, Len(game): 29, Training Time: 0.113s, Prediction Time: 0.113s label PPOAgent, Reward 33: 15.000, Len(game): 15, Training Time: 0.117s, Prediction Time: 0.117s label PPOAgent, Reward 34: 15.000, Len(game): 15, Training Time: 0.119s, Prediction Time: 0.119s label PPOAgent, Reward 35: 15.000, Len(game): 15, Training Time: 0.122s, Prediction Time: 0.122s label PPOAgent, Reward 36: 32.000, Len(game): 32, Training Time: 0.128s, Prediction Time: 0.128s label PPOAgent, Reward 37: 22.000, Len(game): 22, Training Time: 0.133s, Prediction Time: 0.133s label PPOAgent, Reward 38: 19.000, Len(game): 19, Training Time: 0.137s, Prediction Time: 0.137s label PPOAgent, Reward 39: 12.000, Len(game): 12, Training Time: 0.138s, Prediction Time: 0.138s label PPOAgent, Reward 40: 16.000, Len(game): 16, Training Time: 0.141s, Prediction Time: 0.141s label PPOAgent, Reward 41: 12.000, Len(game): 12, Training Time: 0.143s, Prediction Time: 0.143s label PPOAgent, Reward 42: 31.000, Len(game): 31, Training Time: 0.150s, Prediction Time: 0.150s label PPOAgent, Reward 43: 12.000, Len(game): 12, Training Time: 0.151s, Prediction Time: 0.151s label PPOAgent, Reward 44: 10.000, Len(game): 10, Training Time: 0.153s, Prediction Time: 0.153s label PPOAgent, Reward 45: 50.000, Len(game): 50, Training Time: 0.162s, Prediction Time: 0.162s label PPOAgent, Reward 46: 17.000, Len(game): 17, Training Time: 0.165s, Prediction Time: 0.165s label PPOAgent, Reward 47: 28.000, Len(game): 28, Training Time: 0.172s, Prediction Time: 0.172s label PPOAgent, Reward 48: 19.000, Len(game): 19, Training Time: 0.175s, Prediction Time: 0.175s label PPOAgent, Reward 49: 13.000, Len(game): 13, Training Time: 0.178s, Prediction Time: 0.178s label PPOAgent, Reward 50: 20.000, Len(game): 20, Training Time: 0.182s, Prediction Time: 0.182s label PPOAgent, Reward 51: 9.000, Len(game): 9, Training Time: 0.184s, Prediction Time: 0.184s label PPOAgent, Reward 52: 16.000, Len(game): 16, Training Time: 0.188s, Prediction Time: 0.188s label PPOAgent, Reward 53: 42.000, Len(game): 42, Training Time: 0.195s, Prediction Time: 0.195s label PPOAgent, Reward 54: 23.000, Len(game): 23, Training Time: 0.200s, Prediction Time: 0.200s label PPOAgent, Reward 55: 9.000, Len(game): 9, Training Time: 0.202s, Prediction Time: 0.202s label PPOAgent, Reward 56: 19.000, Len(game): 19, Training Time: 0.206s, Prediction Time: 0.206s label PPOAgent, Reward 57: 12.000, Len(game): 12, Training Time: 0.208s, Prediction Time: 0.208s label PPOAgent, Reward 58: 35.000, Len(game): 35, Training Time: 0.215s, Prediction Time: 0.215s label PPOAgent, Reward 59: 18.000, Len(game): 18, Training Time: 0.218s, Prediction Time: 0.218s label PPOAgent, Reward 60: 17.000, Len(game): 17, Training Time: 0.221s, Prediction Time: 0.221s label PPOAgent, Reward 61: 18.000, Len(game): 18, Training Time: 0.225s, Prediction Time: 0.225s label PPOAgent, Reward 62: 15.000, Len(game): 15, Training Time: 0.228s, Prediction Time: 0.228s label PPOAgent, Reward 63: 24.000, Len(game): 24, Training Time: 0.273s, Prediction Time: 0.273s label PPOAgent, Reward 64: 79.000, Len(game): 79, Training Time: 0.288s, Prediction Time: 0.288s label PPOAgent, Reward 65: 46.000, Len(game): 46, Training Time: 0.296s, Prediction Time: 0.296s label PPOAgent, Reward 66: 9.000, Len(game): 9, Training Time: 0.298s, Prediction Time: 0.298s label PPOAgent, Reward 67: 37.000, Len(game): 37, Training Time: 0.305s, Prediction Time: 0.305s label PPOAgent, Reward 68: 45.000, Len(game): 45, Training Time: 0.314s, Prediction Time: 0.314s label PPOAgent, Reward 69: 13.000, Len(game): 13, Training Time: 0.316s, Prediction Time: 0.316s label PPOAgent, Reward 70: 30.000, Len(game): 30, Training Time: 0.322s, Prediction Time: 0.322s label PPOAgent, Reward 71: 9.000, Len(game): 9, Training Time: 0.324s, Prediction Time: 0.324s label PPOAgent, Reward 72: 27.000, Len(game): 27, Training Time: 0.330s, Prediction Time: 0.330s label PPOAgent, Reward 73: 10.000, Len(game): 10, Training Time: 0.332s, Prediction Time: 0.332s label PPOAgent, Reward 74: 32.000, Len(game): 32, Training Time: 0.339s, Prediction Time: 0.339s label PPOAgent, Reward 75: 22.000, Len(game): 22, Training Time: 0.343s, Prediction Time: 0.343s label PPOAgent, Reward 76: 13.000, Len(game): 13, Training Time: 0.345s, Prediction Time: 0.345s label PPOAgent, Reward 77: 28.000, Len(game): 28, Training Time: 0.351s, Prediction Time: 0.351s label PPOAgent, Reward 78: 11.000, Len(game): 11, Training Time: 0.353s, Prediction Time: 0.353s label PPOAgent, Reward 79: 20.000, Len(game): 20, Training Time: 0.357s, Prediction Time: 0.357s label PPOAgent, Reward 80: 33.000, Len(game): 33, Training Time: 0.362s, Prediction Time: 0.362s label PPOAgent, Reward 81: 13.000, Len(game): 13, Training Time: 0.366s, Prediction Time: 0.366s label PPOAgent, Reward 82: 16.000, Len(game): 16, Training Time: 0.369s, Prediction Time: 0.369s label PPOAgent, Reward 83: 19.000, Len(game): 19, Training Time: 0.373s, Prediction Time: 0.373s label PPOAgent, Reward 84: 15.000, Len(game): 15, Training Time: 0.376s, Prediction Time: 0.376s label PPOAgent, Reward 85: 33.000, Len(game): 33, Training Time: 0.383s, Prediction Time: 0.383s label PPOAgent, Reward 86: 25.000, Len(game): 25, Training Time: 0.388s, Prediction Time: 0.388s label PPOAgent, Reward 87: 27.000, Len(game): 27, Training Time: 0.392s, Prediction Time: 0.392s label PPOAgent, Reward 88: 26.000, Len(game): 26, Training Time: 0.398s, Prediction Time: 0.398s label PPOAgent, Reward 89: 40.000, Len(game): 40, Training Time: 0.405s, Prediction Time: 0.405s label PPOAgent, Reward 90: 41.000, Len(game): 41, Training Time: 0.413s, Prediction Time: 0.413s label PPOAgent, Reward 91: 14.000, Len(game): 14, Training Time: 0.415s, Prediction Time: 0.415s label PPOAgent, Reward 92: 69.000, Len(game): 69, Training Time: 0.428s, Prediction Time: 0.428s label PPOAgent, Reward 93: 99.000, Len(game): 99, Training Time: 0.447s, Prediction Time: 0.447s label PPOAgent, Reward 94: 18.000, Len(game): 18, Training Time: 0.450s, Prediction Time: 0.450s label PPOAgent, Reward 95: 20.000, Len(game): 20, Training Time: 0.454s, Prediction Time: 0.454s label PPOAgent, Reward 96: 63.000, Len(game): 63, Training Time: 0.464s, Prediction Time: 0.464s label PPOAgent, Reward 97: 16.000, Len(game): 16, Training Time: 0.468s, Prediction Time: 0.468s label PPOAgent, Reward 98: 20.000, Len(game): 20, Training Time: 0.472s, Prediction Time: 0.472s label PPOAgent, Reward 99: 14.000, Len(game): 14, Training Time: 0.474s, Prediction Time: 0.474s label PolicyGradient, Reward 0: 25.000, Len(game): 25, Training Time: 0.005s, Prediction Time: 0.000s label PolicyGradient, Reward 1: 30.000, Len(game): 30, Training Time: 0.010s, Prediction Time: 0.005s label PolicyGradient, Reward 2: 12.000, Len(game): 12, Training Time: 0.016s, Prediction Time: 0.008s label PolicyGradient, Reward 3: 62.000, Len(game): 62, Training Time: 0.027s, Prediction Time: 0.019s label PolicyGradient, Reward 4: 17.000, Len(game): 17, Training Time: 0.036s, Prediction Time: 0.025s label PolicyGradient, Reward 5: 12.000, Len(game): 12, Training Time: 0.047s, Prediction Time: 0.029s label PolicyGradient, Reward 6: 17.000, Len(game): 17, Training Time: 0.059s, Prediction Time: 0.035s label PolicyGradient, Reward 7: 32.000, Len(game): 32, Training Time: 0.072s, Prediction Time: 0.044s label PolicyGradient, Reward 8: 12.000, Len(game): 12, Training Time: 0.086s, Prediction Time: 0.049s label PolicyGradient, Reward 9: 39.000, Len(game): 39, Training Time: 0.104s, Prediction Time: 0.061s label PolicyGradient, Reward 10: 18.000, Len(game): 18, Training Time: 0.126s, Prediction Time: 0.070s label PolicyGradient, Reward 11: 14.000, Len(game): 14, Training Time: 0.147s, Prediction Time: 0.079s label PolicyGradient, Reward 12: 41.000, Len(game): 41, Training Time: 0.175s, Prediction Time: 0.096s label PolicyGradient, Reward 13: 55.000, Len(game): 55, Training Time: 0.216s, Prediction Time: 0.117s label PolicyGradient, Reward 14: 10.000, Len(game): 10, Training Time: 0.254s, Prediction Time: 0.132s label PolicyGradient, Reward 15: 18.000, Len(game): 18, Training Time: 0.298s, Prediction Time: 0.150s label PolicyGradient, Reward 16: 12.000, Len(game): 12, Training Time: 0.343s, Prediction Time: 0.166s label PolicyGradient, Reward 17: 89.000, Len(game): 89, Training Time: 0.405s, Prediction Time: 0.206s label PolicyGradient, Reward 18: 11.000, Len(game): 11, Training Time: 0.464s, Prediction Time: 0.229s label PolicyGradient, Reward 19: 94.000, Len(game): 94, Training Time: 0.546s, Prediction Time: 0.276s label PolicyGradient, Reward 20: 26.000, Len(game): 26, Training Time: 0.636s, Prediction Time: 0.317s label PolicyGradient, Reward 21: 62.000, Len(game): 62, Training Time: 0.737s, Prediction Time: 0.367s label PolicyGradient, Reward 22: 77.000, Len(game): 77, Training Time: 0.852s, Prediction Time: 0.428s label PolicyGradient, Reward 23: 13.000, Len(game): 13, Training Time: 0.972s, Prediction Time: 0.478s label PolicyGradient, Reward 24: 97.000, Len(game): 97, Training Time: 1.125s, Prediction Time: 0.563s label PolicyGradient, Reward 25: 108.000, Len(game): 108, Training Time: 1.319s, Prediction Time: 0.665s label PolicyGradient, Reward 26: 22.000, Len(game): 22, Training Time: 1.512s, Prediction Time: 0.742s label PolicyGradient, Reward 27: 102.000, Len(game): 102, Training Time: 1.718s, Prediction Time: 0.863s label PolicyGradient, Reward 28: 109.000, Len(game): 109, Training Time: 1.996s, Prediction Time: 0.999s label PolicyGradient, Reward 29: 117.000, Len(game): 117, Training Time: 2.310s, Prediction Time: 1.162s label PolicyGradient, Reward 30: 16.000, Len(game): 16, Training Time: 2.625s, Prediction Time: 1.292s label PolicyGradient, Reward 31: 149.000, Len(game): 149, Training Time: 2.999s, Prediction Time: 1.518s label PolicyGradient, Reward 32: 105.000, Len(game): 105, Training Time: 3.434s, Prediction Time: 1.740s label PolicyGradient, Reward 33: 153.000, Len(game): 153, Training Time: 3.434s, Prediction Time: 2.021s label PolicyGradient, Reward 34: 117.000, Len(game): 117, Training Time: 3.434s, Prediction Time: 2.108s label PolicyGradient, Reward 35: 12.000, Len(game): 12, Training Time: 3.434s, Prediction Time: 2.116s label PolicyGradient, Reward 36: 265.000, Len(game): 265, Training Time: 3.434s, Prediction Time: 2.288s label PolicyGradient, Reward 37: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 2.299s label PolicyGradient, Reward 38: 119.000, Len(game): 119, Training Time: 3.434s, Prediction Time: 2.381s label PolicyGradient, Reward 39: 16.000, Len(game): 16, Training Time: 3.434s, Prediction Time: 2.393s label PolicyGradient, Reward 40: 176.000, Len(game): 176, Training Time: 3.434s, Prediction Time: 2.522s label PolicyGradient, Reward 41: 122.000, Len(game): 122, Training Time: 3.434s, Prediction Time: 2.603s label PolicyGradient, Reward 42: 103.000, Len(game): 103, Training Time: 3.434s, Prediction Time: 2.672s label PolicyGradient, Reward 43: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 2.683s label PolicyGradient, Reward 44: 34.000, Len(game): 34, Training Time: 3.434s, Prediction Time: 2.706s label PolicyGradient, Reward 45: 210.000, Len(game): 210, Training Time: 3.434s, Prediction Time: 2.847s label PolicyGradient, Reward 46: 128.000, Len(game): 128, Training Time: 3.434s, Prediction Time: 2.930s label PolicyGradient, Reward 47: 122.000, Len(game): 122, Training Time: 3.434s, Prediction Time: 3.011s label PolicyGradient, Reward 48: 219.000, Len(game): 219, Training Time: 3.434s, Prediction Time: 3.157s label PolicyGradient, Reward 49: 37.000, Len(game): 37, Training Time: 3.434s, Prediction Time: 3.182s label PolicyGradient, Reward 50: 39.000, Len(game): 39, Training Time: 3.434s, Prediction Time: 3.209s label PolicyGradient, Reward 51: 130.000, Len(game): 130, Training Time: 3.434s, Prediction Time: 3.295s label PolicyGradient, Reward 52: 104.000, Len(game): 104, Training Time: 3.434s, Prediction Time: 3.364s label PolicyGradient, Reward 53: 46.000, Len(game): 46, Training Time: 3.434s, Prediction Time: 3.397s label PolicyGradient, Reward 54: 18.000, Len(game): 18, Training Time: 3.434s, Prediction Time: 3.409s label PolicyGradient, Reward 55: 13.000, Len(game): 13, Training Time: 3.434s, Prediction Time: 3.417s label PolicyGradient, Reward 56: 111.000, Len(game): 111, Training Time: 3.434s, Prediction Time: 3.495s label PolicyGradient, Reward 57: 130.000, Len(game): 130, Training Time: 3.434s, Prediction Time: 3.582s label PolicyGradient, Reward 58: 112.000, Len(game): 112, Training Time: 3.434s, Prediction Time: 3.658s label PolicyGradient, Reward 59: 128.000, Len(game): 128, Training Time: 3.434s, Prediction Time: 3.745s label PolicyGradient, Reward 60: 122.000, Len(game): 122, Training Time: 3.434s, Prediction Time: 3.825s label PolicyGradient, Reward 61: 10.000, Len(game): 10, Training Time: 3.434s, Prediction Time: 3.832s label PolicyGradient, Reward 62: 16.000, Len(game): 16, Training Time: 3.434s, Prediction Time: 3.842s label PolicyGradient, Reward 63: 20.000, Len(game): 20, Training Time: 3.434s, Prediction Time: 3.855s label PolicyGradient, Reward 64: 115.000, Len(game): 115, Training Time: 3.434s, Prediction Time: 3.932s label PolicyGradient, Reward 65: 42.000, Len(game): 42, Training Time: 3.434s, Prediction Time: 3.960s label PolicyGradient, Reward 66: 148.000, Len(game): 148, Training Time: 3.434s, Prediction Time: 4.060s label PolicyGradient, Reward 67: 43.000, Len(game): 43, Training Time: 3.434s, Prediction Time: 4.089s label PolicyGradient, Reward 68: 124.000, Len(game): 124, Training Time: 3.434s, Prediction Time: 4.173s label PolicyGradient, Reward 69: 63.000, Len(game): 63, Training Time: 3.434s, Prediction Time: 4.214s label PolicyGradient, Reward 70: 112.000, Len(game): 112, Training Time: 3.434s, Prediction Time: 4.290s label PolicyGradient, Reward 71: 114.000, Len(game): 114, Training Time: 3.434s, Prediction Time: 4.366s label PolicyGradient, Reward 72: 17.000, Len(game): 17, Training Time: 3.434s, Prediction Time: 4.378s label PolicyGradient, Reward 73: 142.000, Len(game): 142, Training Time: 3.434s, Prediction Time: 4.477s label PolicyGradient, Reward 74: 152.000, Len(game): 152, Training Time: 3.434s, Prediction Time: 4.578s label PolicyGradient, Reward 75: 224.000, Len(game): 224, Training Time: 3.434s, Prediction Time: 4.730s label PolicyGradient, Reward 76: 106.000, Len(game): 106, Training Time: 3.434s, Prediction Time: 4.800s label PolicyGradient, Reward 77: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 4.810s label PolicyGradient, Reward 78: 149.000, Len(game): 149, Training Time: 3.434s, Prediction Time: 4.908s label PolicyGradient, Reward 79: 81.000, Len(game): 81, Training Time: 3.434s, Prediction Time: 4.962s label PolicyGradient, Reward 80: 109.000, Len(game): 109, Training Time: 3.434s, Prediction Time: 5.035s label PolicyGradient, Reward 81: 116.000, Len(game): 116, Training Time: 3.434s, Prediction Time: 5.114s label PolicyGradient, Reward 82: 37.000, Len(game): 37, Training Time: 3.434s, Prediction Time: 5.139s label PolicyGradient, Reward 83: 219.000, Len(game): 219, Training Time: 3.434s, Prediction Time: 5.286s label PolicyGradient, Reward 84: 33.000, Len(game): 33, Training Time: 3.434s, Prediction Time: 5.307s label PolicyGradient, Reward 85: 61.000, Len(game): 61, Training Time: 3.434s, Prediction Time: 5.348s label PolicyGradient, Reward 86: 21.000, Len(game): 21, Training Time: 3.434s, Prediction Time: 5.362s label PolicyGradient, Reward 87: 138.000, Len(game): 138, Training Time: 3.434s, Prediction Time: 5.455s label PolicyGradient, Reward 88: 42.000, Len(game): 42, Training Time: 3.434s, Prediction Time: 5.482s label PolicyGradient, Reward 89: 109.000, Len(game): 109, Training Time: 3.434s, Prediction Time: 5.555s label PolicyGradient, Reward 90: 113.000, Len(game): 113, Training Time: 3.434s, Prediction Time: 5.630s label PolicyGradient, Reward 91: 67.000, Len(game): 67, Training Time: 3.434s, Prediction Time: 5.674s label PolicyGradient, Reward 92: 51.000, Len(game): 51, Training Time: 3.434s, Prediction Time: 5.708s label PolicyGradient, Reward 93: 128.000, Len(game): 128, Training Time: 3.434s, Prediction Time: 5.794s label PolicyGradient, Reward 94: 77.000, Len(game): 77, Training Time: 3.434s, Prediction Time: 5.844s label PolicyGradient, Reward 95: 43.000, Len(game): 43, Training Time: 3.434s, Prediction Time: 5.873s label PolicyGradient, Reward 96: 40.000, Len(game): 40, Training Time: 3.434s, Prediction Time: 5.899s label PolicyGradient, Reward 97: 45.000, Len(game): 45, Training Time: 3.434s, Prediction Time: 5.930s label PolicyGradient, Reward 98: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 5.941s label PolicyGradient, Reward 99: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 5.950s label Controller-based, Reward 0: 112.000, Len(game): 112, Training Time: 0.002s, Prediction Time: 0.001s label Controller-based, Reward 1: 79.000, Len(game): 79, Training Time: 0.003s, Prediction Time: 0.001s label Controller-based, Reward 2: 9.000, Len(game): 9, Training Time: 0.011s, Prediction Time: 0.001s label Controller-based, Reward 3: 69.000, Len(game): 69, Training Time: 0.021s, Prediction Time: 0.003s no training label Controller-based, Reward 4: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.011s no training label Controller-based, Reward 5: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.020s no training label Controller-based, Reward 6: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.029s no training label Controller-based, Reward 7: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.036s no training label Controller-based, Reward 8: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.046s no training label Controller-based, Reward 9: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.054s no training label Controller-based, Reward 10: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.063s no training label Controller-based, Reward 11: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.071s no training label Controller-based, Reward 12: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.079s no training label Controller-based, Reward 13: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.085s no training label Controller-based, Reward 14: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.095s no training label Controller-based, Reward 15: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.103s no training label Controller-based, Reward 16: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.112s no training label Controller-based, Reward 17: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.120s no training label Controller-based, Reward 18: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.129s no training label Controller-based, Reward 19: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.137s no training label Controller-based, Reward 20: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.146s no training label Controller-based, Reward 21: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.153s no training label Controller-based, Reward 22: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.162s no training label Controller-based, Reward 23: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.171s no training label Controller-based, Reward 24: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.179s no training label Controller-based, Reward 25: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.187s no training label Controller-based, Reward 26: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.195s no training label Controller-based, Reward 27: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.203s no training label Controller-based, Reward 28: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.212s no training label Controller-based, Reward 29: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.220s no training label Controller-based, Reward 30: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.228s no training label Controller-based, Reward 31: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.237s no training label Controller-based, Reward 32: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.245s no training label Controller-based, Reward 33: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.253s no training label Controller-based, Reward 34: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.261s no training label Controller-based, Reward 35: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.270s no training label Controller-based, Reward 36: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.278s no training label Controller-based, Reward 37: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.286s no training label Controller-based, Reward 38: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.295s no training label Controller-based, Reward 39: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.303s no training label Controller-based, Reward 40: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.312s no training label Controller-based, Reward 41: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.320s no training label Controller-based, Reward 42: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.328s no training label Controller-based, Reward 43: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.336s no training label Controller-based, Reward 44: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.344s no training label Controller-based, Reward 45: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.352s no training label Controller-based, Reward 46: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.360s no training label Controller-based, Reward 47: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.369s no training label Controller-based, Reward 48: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.377s no training label Controller-based, Reward 49: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.385s no training label Controller-based, Reward 50: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.393s no training label Controller-based, Reward 51: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.401s no training label Controller-based, Reward 52: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.409s no training label Controller-based, Reward 53: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.418s no training label Controller-based, Reward 54: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.425s no training label Controller-based, Reward 55: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.433s no training label Controller-based, Reward 56: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.442s no training label Controller-based, Reward 57: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.450s no training label Controller-based, Reward 58: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.458s no training label Controller-based, Reward 59: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.466s no training label Controller-based, Reward 60: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.475s no training label Controller-based, Reward 61: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.483s no training label Controller-based, Reward 62: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.491s no training label Controller-based, Reward 63: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.499s no training label Controller-based, Reward 64: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.507s no training label Controller-based, Reward 65: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.516s no training label Controller-based, Reward 66: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.524s no training label Controller-based, Reward 67: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.532s no training label Controller-based, Reward 68: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.541s no training label Controller-based, Reward 69: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.549s no training label Controller-based, Reward 70: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.557s no training label Controller-based, Reward 71: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.566s no training label Controller-based, Reward 72: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.574s no training label Controller-based, Reward 73: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.582s no training label Controller-based, Reward 74: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.591s no training label Controller-based, Reward 75: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.598s no training label Controller-based, Reward 76: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.607s no training label Controller-based, Reward 77: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.615s no training label Controller-based, Reward 78: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.623s no training label Controller-based, Reward 79: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.632s no training label Controller-based, Reward 80: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.640s no training label Controller-based, Reward 81: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.648s no training label Controller-based, Reward 82: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.656s no training label Controller-based, Reward 83: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.664s no training label Controller-based, Reward 84: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.672s no training label Controller-based, Reward 85: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.681s no training label Controller-based, Reward 86: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.689s no training label Controller-based, Reward 87: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.697s no training label Controller-based, Reward 88: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.705s no training label Controller-based, Reward 89: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.714s no training label Controller-based, Reward 90: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.723s no training label Controller-based, Reward 91: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.732s no training label Controller-based, Reward 92: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.740s no training label Controller-based, Reward 93: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.749s no training label Controller-based, Reward 94: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.758s no training label Controller-based, Reward 95: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.766s no training label Controller-based, Reward 96: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.775s no training label Controller-based, Reward 97: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.783s no training label Controller-based, Reward 98: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.790s no training label Controller-based, Reward 99: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.799s label KACAgent, Reward 0: 20.000, Len(game): 20, Training Time: 0.004s, Prediction Time: 0.001s label KACAgent, Reward 1: 40.000, Len(game): 40, Training Time: 0.009s, Prediction Time: 0.007s label KACAgent, Reward 2: 15.000, Len(game): 15, Training Time: 0.015s, Prediction Time: 0.010s label KACAgent, Reward 3: 26.000, Len(game): 26, Training Time: 0.022s, Prediction Time: 0.016s label KACAgent, Reward 4: 113.000, Len(game): 113, Training Time: 0.037s, Prediction Time: 0.038s label KACAgent, Reward 5: 97.000, Len(game): 97, Training Time: 0.060s, Prediction Time: 0.061s label KACAgent, Reward 6: 101.000, Len(game): 101, Training Time: 0.097s, Prediction Time: 0.093s label KACAgent, Reward 7: 388.000, Len(game): 388, Training Time: 0.207s, Prediction Time: 0.219s label KACAgent, Reward 8: 284.000, Len(game): 284, Training Time: 0.385s, Prediction Time: 0.376s label KACAgent, Reward 9: 71.000, Len(game): 71, Training Time: 0.574s, Prediction Time: 0.489s label KACAgent, Reward 10: 130.000, Len(game): 130, Training Time: 0.809s, Prediction Time: 0.644s label KACAgent, Reward 11: 118.000, Len(game): 118, Training Time: 1.099s, Prediction Time: 0.830s label KACAgent, Reward 12: 262.000, Len(game): 262, Training Time: 1.471s, Prediction Time: 1.125s label KACAgent, Reward 13: 197.000, Len(game): 197, Training Time: 1.927s, Prediction Time: 1.463s label KACAgent, Reward 14: 217.000, Len(game): 217, Training Time: 2.488s, Prediction Time: 1.863s label KACAgent, Reward 15: 219.000, Len(game): 219, Training Time: 3.157s, Prediction Time: 2.340s label KACAgent, Reward 16: 299.000, Len(game): 299, Training Time: 3.157s, Prediction Time: 2.966s label KACAgent, Reward 17: 194.000, Len(game): 194, Training Time: 3.157s, Prediction Time: 3.123s label KACAgent, Reward 18: 637.000, Len(game): 637, Training Time: 3.157s, Prediction Time: 3.638s label KACAgent, Reward 19: 191.000, Len(game): 191, Training Time: 3.157s, Prediction Time: 3.793s label KACAgent, Reward 20: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 4.595s label KACAgent, Reward 21: 161.000, Len(game): 161, Training Time: 3.157s, Prediction Time: 4.727s label KACAgent, Reward 22: 335.000, Len(game): 335, Training Time: 3.157s, Prediction Time: 4.997s label KACAgent, Reward 23: 557.000, Len(game): 557, Training Time: 3.157s, Prediction Time: 5.442s label KACAgent, Reward 24: 414.000, Len(game): 414, Training Time: 3.157s, Prediction Time: 5.776s label KACAgent, Reward 25: 194.000, Len(game): 194, Training Time: 3.157s, Prediction Time: 5.932s label KACAgent, Reward 26: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 6.739s label KACAgent, Reward 27: 254.000, Len(game): 254, Training Time: 3.157s, Prediction Time: 6.943s label KACAgent, Reward 28: 204.000, Len(game): 204, Training Time: 3.157s, Prediction Time: 7.108s label KACAgent, Reward 29: 223.000, Len(game): 223, Training Time: 3.157s, Prediction Time: 7.285s label KACAgent, Reward 30: 234.000, Len(game): 234, Training Time: 3.157s, Prediction Time: 7.475s label KACAgent, Reward 31: 167.000, Len(game): 167, Training Time: 3.157s, Prediction Time: 7.612s label KACAgent, Reward 32: 316.000, Len(game): 316, Training Time: 3.157s, Prediction Time: 7.864s label KACAgent, Reward 33: 453.000, Len(game): 453, Training Time: 3.157s, Prediction Time: 8.228s label KACAgent, Reward 34: 146.000, Len(game): 146, Training Time: 3.157s, Prediction Time: 8.344s label KACAgent, Reward 35: 199.000, Len(game): 199, Training Time: 3.157s, Prediction Time: 8.503s label KACAgent, Reward 36: 193.000, Len(game): 193, Training Time: 3.157s, Prediction Time: 8.658s label KACAgent, Reward 37: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 9.461s label KACAgent, Reward 38: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 10.260s label KACAgent, Reward 39: 500.000, Len(game): 500, Training Time: 3.157s, Prediction Time: 10.659s label KACAgent, Reward 40: 211.000, Len(game): 211, Training Time: 3.157s, Prediction Time: 10.827s label KACAgent, Reward 41: 216.000, Len(game): 216, Training Time: 3.157s, Prediction Time: 11.004s label KACAgent, Reward 42: 225.000, Len(game): 225, Training Time: 3.157s, Prediction Time: 11.185s label KACAgent, Reward 43: 364.000, Len(game): 364, Training Time: 3.157s, Prediction Time: 11.476s label KACAgent, Reward 44: 152.000, Len(game): 152, Training Time: 3.157s, Prediction Time: 11.597s label KACAgent, Reward 45: 135.000, Len(game): 135, Training Time: 3.157s, Prediction Time: 11.703s label KACAgent, Reward 46: 367.000, Len(game): 367, Training Time: 3.157s, Prediction Time: 11.999s label KACAgent, Reward 47: 232.000, Len(game): 232, Training Time: 3.157s, Prediction Time: 12.187s label KACAgent, Reward 48: 252.000, Len(game): 252, Training Time: 3.157s, Prediction Time: 12.390s label KACAgent, Reward 49: 234.000, Len(game): 234, Training Time: 3.157s, Prediction Time: 12.576s label KACAgent, Reward 50: 311.000, Len(game): 311, Training Time: 3.157s, Prediction Time: 12.826s label KACAgent, Reward 51: 174.000, Len(game): 174, Training Time: 3.157s, Prediction Time: 12.968s label KACAgent, Reward 52: 251.000, Len(game): 251, Training Time: 3.157s, Prediction Time: 13.171s label KACAgent, Reward 53: 212.000, Len(game): 212, Training Time: 3.157s, Prediction Time: 13.341s label KACAgent, Reward 54: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 14.142s label KACAgent, Reward 55: 343.000, Len(game): 343, Training Time: 3.157s, Prediction Time: 14.415s label KACAgent, Reward 56: 476.000, Len(game): 476, Training Time: 3.157s, Prediction Time: 14.795s label KACAgent, Reward 57: 195.000, Len(game): 195, Training Time: 3.157s, Prediction Time: 14.949s label KACAgent, Reward 58: 177.000, Len(game): 177, Training Time: 3.157s, Prediction Time: 15.091s label KACAgent, Reward 59: 207.000, Len(game): 207, Training Time: 3.157s, Prediction Time: 15.256s label KACAgent, Reward 60: 135.000, Len(game): 135, Training Time: 3.157s, Prediction Time: 15.364s label KACAgent, Reward 61: 185.000, Len(game): 185, Training Time: 3.157s, Prediction Time: 15.511s label KACAgent, Reward 62: 201.000, Len(game): 201, Training Time: 3.157s, Prediction Time: 15.672s label KACAgent, Reward 63: 344.000, Len(game): 344, Training Time: 3.157s, Prediction Time: 15.947s label KACAgent, Reward 64: 197.000, Len(game): 197, Training Time: 3.157s, Prediction Time: 16.103s label KACAgent, Reward 65: 265.000, Len(game): 265, Training Time: 3.157s, Prediction Time: 16.320s label KACAgent, Reward 66: 168.000, Len(game): 168, Training Time: 3.157s, Prediction Time: 16.455s label KACAgent, Reward 67: 166.000, Len(game): 166, Training Time: 3.157s, Prediction Time: 16.587s label KACAgent, Reward 68: 191.000, Len(game): 191, Training Time: 3.157s, Prediction Time: 16.742s label KACAgent, Reward 69: 257.000, Len(game): 257, Training Time: 3.157s, Prediction Time: 16.951s label KACAgent, Reward 70: 221.000, Len(game): 221, Training Time: 3.157s, Prediction Time: 17.127s label KACAgent, Reward 71: 161.000, Len(game): 161, Training Time: 3.157s, Prediction Time: 17.254s label KACAgent, Reward 72: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 18.057s label KACAgent, Reward 73: 315.000, Len(game): 315, Training Time: 3.157s, Prediction Time: 18.309s label KACAgent, Reward 74: 229.000, Len(game): 229, Training Time: 3.157s, Prediction Time: 18.493s label KACAgent, Reward 75: 830.000, Len(game): 830, Training Time: 3.157s, Prediction Time: 19.158s label KACAgent, Reward 76: 206.000, Len(game): 206, Training Time: 3.157s, Prediction Time: 19.324s label KACAgent, Reward 77: 353.000, Len(game): 353, Training Time: 3.157s, Prediction Time: 19.610s label KACAgent, Reward 78: 184.000, Len(game): 184, Training Time: 3.157s, Prediction Time: 19.765s label KACAgent, Reward 79: 186.000, Len(game): 186, Training Time: 3.157s, Prediction Time: 19.912s label KACAgent, Reward 80: 387.000, Len(game): 387, Training Time: 3.157s, Prediction Time: 20.222s label KACAgent, Reward 81: 189.000, Len(game): 189, Training Time: 3.157s, Prediction Time: 20.371s label KACAgent, Reward 82: 185.000, Len(game): 185, Training Time: 3.157s, Prediction Time: 20.520s label KACAgent, Reward 83: 328.000, Len(game): 328, Training Time: 3.157s, Prediction Time: 20.782s label KACAgent, Reward 84: 353.000, Len(game): 353, Training Time: 3.157s, Prediction Time: 21.065s label KACAgent, Reward 85: 264.000, Len(game): 264, Training Time: 3.157s, Prediction Time: 21.276s label KACAgent, Reward 86: 221.000, Len(game): 221, Training Time: 3.157s, Prediction Time: 21.453s label KACAgent, Reward 87: 180.000, Len(game): 180, Training Time: 3.157s, Prediction Time: 21.596s label KACAgent, Reward 88: 310.000, Len(game): 310, Training Time: 3.157s, Prediction Time: 21.846s label KACAgent, Reward 89: 365.000, Len(game): 365, Training Time: 3.157s, Prediction Time: 22.138s label KACAgent, Reward 90: 248.000, Len(game): 248, Training Time: 3.157s, Prediction Time: 22.336s label KACAgent, Reward 91: 688.000, Len(game): 688, Training Time: 3.157s, Prediction Time: 22.885s label KACAgent, Reward 92: 162.000, Len(game): 162, Training Time: 3.157s, Prediction Time: 23.014s label KACAgent, Reward 93: 173.000, Len(game): 173, Training Time: 3.157s, Prediction Time: 23.153s label KACAgent, Reward 94: 165.000, Len(game): 165, Training Time: 3.157s, Prediction Time: 23.284s label KACAgent, Reward 95: 226.000, Len(game): 226, Training Time: 3.157s, Prediction Time: 23.464s label KACAgent, Reward 96: 200.000, Len(game): 200, Training Time: 3.157s, Prediction Time: 23.624s label KACAgent, Reward 97: 334.000, Len(game): 334, Training Time: 3.157s, Prediction Time: 23.894s label KACAgent, Reward 98: 310.000, Len(game): 310, Training Time: 3.157s, Prediction Time: 24.143s label KACAgent, Reward 99: 364.000, Len(game): 364, Training Time: 3.157s, Prediction Time: 24.436s label DQNAgent, Reward 0: 33.000, Len(game): 33, Training Time: 0.000s, Prediction Time: 0.000s label DQNAgent, Reward 1: 17.000, Len(game): 17, Training Time: 0.000s, Prediction Time: 0.000s label DQNAgent, Reward 2: 36.000, Len(game): 36, Training Time: 0.026s, Prediction Time: 0.001s label DQNAgent, Reward 3: 11.000, Len(game): 11, Training Time: 0.038s, Prediction Time: 0.002s label DQNAgent, Reward 4: 30.000, Len(game): 30, Training Time: 0.076s, Prediction Time: 0.003s label DQNAgent, Reward 5: 28.000, Len(game): 28, Training Time: 0.107s, Prediction Time: 0.004s label DQNAgent, Reward 6: 17.000, Len(game): 17, Training Time: 0.128s, Prediction Time: 0.005s label DQNAgent, Reward 7: 20.000, Len(game): 20, Training Time: 0.154s, Prediction Time: 0.006s label DQNAgent, Reward 8: 34.000, Len(game): 34, Training Time: 0.192s, Prediction Time: 0.006s label DQNAgent, Reward 9: 17.000, Len(game): 17, Training Time: 0.210s, Prediction Time: 0.007s label DQNAgent, Reward 10: 14.000, Len(game): 14, Training Time: 0.225s, Prediction Time: 0.008s label DQNAgent, Reward 11: 14.000, Len(game): 14, Training Time: 0.241s, Prediction Time: 0.008s label DQNAgent, Reward 12: 17.000, Len(game): 17, Training Time: 0.260s, Prediction Time: 0.009s label DQNAgent, Reward 13: 31.000, Len(game): 31, Training Time: 0.294s, Prediction Time: 0.010s label DQNAgent, Reward 14: 11.000, Len(game): 11, Training Time: 0.307s, Prediction Time: 0.010s label DQNAgent, Reward 15: 10.000, Len(game): 10, Training Time: 0.319s, Prediction Time: 0.010s label DQNAgent, Reward 16: 21.000, Len(game): 21, Training Time: 0.343s, Prediction Time: 0.010s label DQNAgent, Reward 17: 27.000, Len(game): 27, Training Time: 0.373s, Prediction Time: 0.011s label DQNAgent, Reward 18: 25.000, Len(game): 25, Training Time: 0.401s, Prediction Time: 0.012s label DQNAgent, Reward 19: 14.000, Len(game): 14, Training Time: 0.417s, Prediction Time: 0.012s label DQNAgent, Reward 20: 15.000, Len(game): 15, Training Time: 0.434s, Prediction Time: 0.012s label DQNAgent, Reward 21: 17.000, Len(game): 17, Training Time: 0.452s, Prediction Time: 0.013s label DQNAgent, Reward 22: 14.000, Len(game): 14, Training Time: 0.468s, Prediction Time: 0.014s label DQNAgent, Reward 23: 29.000, Len(game): 29, Training Time: 0.502s, Prediction Time: 0.015s label DQNAgent, Reward 24: 28.000, Len(game): 28, Training Time: 0.534s, Prediction Time: 0.016s label DQNAgent, Reward 25: 78.000, Len(game): 78, Training Time: 0.630s, Prediction Time: 0.018s label DQNAgent, Reward 26: 21.000, Len(game): 21, Training Time: 0.654s, Prediction Time: 0.018s label DQNAgent, Reward 27: 23.000, Len(game): 23, Training Time: 0.681s, Prediction Time: 0.019s label DQNAgent, Reward 28: 41.000, Len(game): 41, Training Time: 0.727s, Prediction Time: 0.020s label DQNAgent, Reward 29: 40.000, Len(game): 40, Training Time: 0.773s, Prediction Time: 0.021s label DQNAgent, Reward 30: 54.000, Len(game): 54, Training Time: 0.835s, Prediction Time: 0.022s label DQNAgent, Reward 31: 65.000, Len(game): 65, Training Time: 0.908s, Prediction Time: 0.024s label DQNAgent, Reward 32: 86.000, Len(game): 86, Training Time: 1.010s, Prediction Time: 0.026s label DQNAgent, Reward 33: 27.000, Len(game): 27, Training Time: 1.039s, Prediction Time: 0.027s label DQNAgent, Reward 34: 166.000, Len(game): 166, Training Time: 1.230s, Prediction Time: 0.032s label DQNAgent, Reward 35: 48.000, Len(game): 48, Training Time: 1.287s, Prediction Time: 0.034s label DQNAgent, Reward 36: 135.000, Len(game): 135, Training Time: 1.457s, Prediction Time: 0.040s label DQNAgent, Reward 37: 118.000, Len(game): 118, Training Time: 1.607s, Prediction Time: 0.045s label DQNAgent, Reward 38: 159.000, Len(game): 159, Training Time: 1.790s, Prediction Time: 0.053s label DQNAgent, Reward 39: 222.000, Len(game): 222, Training Time: 2.040s, Prediction Time: 0.061s label DQNAgent, Reward 40: 219.000, Len(game): 219, Training Time: 2.294s, Prediction Time: 0.070s label DQNAgent, Reward 41: 180.000, Len(game): 180, Training Time: 2.503s, Prediction Time: 0.077s label DQNAgent, Reward 42: 178.000, Len(game): 178, Training Time: 2.706s, Prediction Time: 0.084s label DQNAgent, Reward 43: 176.000, Len(game): 176, Training Time: 2.900s, Prediction Time: 0.092s label DQNAgent, Reward 44: 247.000, Len(game): 247, Training Time: 3.171s, Prediction Time: 0.102s label DQNAgent, Reward 45: 195.000, Len(game): 195, Training Time: 3.171s, Prediction Time: 0.111s label DQNAgent, Reward 46: 161.000, Len(game): 161, Training Time: 3.171s, Prediction Time: 0.118s label DQNAgent, Reward 47: 205.000, Len(game): 205, Training Time: 3.171s, Prediction Time: 0.128s label DQNAgent, Reward 48: 232.000, Len(game): 232, Training Time: 3.171s, Prediction Time: 0.139s label DQNAgent, Reward 49: 236.000, Len(game): 236, Training Time: 3.171s, Prediction Time: 0.149s label DQNAgent, Reward 50: 216.000, Len(game): 216, Training Time: 3.171s, Prediction Time: 0.158s label DQNAgent, Reward 51: 178.000, Len(game): 178, Training Time: 3.171s, Prediction Time: 0.166s label DQNAgent, Reward 52: 188.000, Len(game): 188, Training Time: 3.171s, Prediction Time: 0.175s label DQNAgent, Reward 53: 205.000, Len(game): 205, Training Time: 3.171s, Prediction Time: 0.183s label DQNAgent, Reward 54: 239.000, Len(game): 239, Training Time: 3.171s, Prediction Time: 0.193s label DQNAgent, Reward 55: 235.000, Len(game): 235, Training Time: 3.171s, Prediction Time: 0.204s label DQNAgent, Reward 56: 160.000, Len(game): 160, Training Time: 3.171s, Prediction Time: 0.212s label DQNAgent, Reward 57: 173.000, Len(game): 173, Training Time: 3.171s, Prediction Time: 0.219s label DQNAgent, Reward 58: 196.000, Len(game): 196, Training Time: 3.171s, Prediction Time: 0.227s label DQNAgent, Reward 59: 171.000, Len(game): 171, Training Time: 3.171s, Prediction Time: 0.234s label DQNAgent, Reward 60: 168.000, Len(game): 168, Training Time: 3.171s, Prediction Time: 0.241s label DQNAgent, Reward 61: 223.000, Len(game): 223, Training Time: 3.171s, Prediction Time: 0.250s label DQNAgent, Reward 62: 215.000, Len(game): 215, Training Time: 3.171s, Prediction Time: 0.260s label DQNAgent, Reward 63: 182.000, Len(game): 182, Training Time: 3.171s, Prediction Time: 0.267s label DQNAgent, Reward 64: 171.000, Len(game): 171, Training Time: 3.171s, Prediction Time: 0.274s label DQNAgent, Reward 65: 268.000, Len(game): 268, Training Time: 3.171s, Prediction Time: 0.285s label DQNAgent, Reward 66: 244.000, Len(game): 244, Training Time: 3.171s, Prediction Time: 0.295s label DQNAgent, Reward 67: 162.000, Len(game): 162, Training Time: 3.171s, Prediction Time: 0.301s label DQNAgent, Reward 68: 184.000, Len(game): 184, Training Time: 3.171s, Prediction Time: 0.308s label DQNAgent, Reward 69: 228.000, Len(game): 228, Training Time: 3.171s, Prediction Time: 0.318s label DQNAgent, Reward 70: 173.000, Len(game): 173, Training Time: 3.171s, Prediction Time: 0.325s label DQNAgent, Reward 71: 161.000, Len(game): 161, Training Time: 3.171s, Prediction Time: 0.332s label DQNAgent, Reward 72: 300.000, Len(game): 300, Training Time: 3.171s, Prediction Time: 0.344s label DQNAgent, Reward 73: 228.000, Len(game): 228, Training Time: 3.171s, Prediction Time: 0.353s label DQNAgent, Reward 74: 172.000, Len(game): 172, Training Time: 3.171s, Prediction Time: 0.360s label DQNAgent, Reward 75: 260.000, Len(game): 260, Training Time: 3.171s, Prediction Time: 0.371s label DQNAgent, Reward 76: 214.000, Len(game): 214, Training Time: 3.171s, Prediction Time: 0.379s label DQNAgent, Reward 77: 251.000, Len(game): 251, Training Time: 3.171s, Prediction Time: 0.389s label DQNAgent, Reward 78: 186.000, Len(game): 186, Training Time: 3.171s, Prediction Time: 0.397s label DQNAgent, Reward 79: 243.000, Len(game): 243, Training Time: 3.171s, Prediction Time: 0.405s label DQNAgent, Reward 80: 226.000, Len(game): 226, Training Time: 3.171s, Prediction Time: 0.415s label DQNAgent, Reward 81: 240.000, Len(game): 240, Training Time: 3.171s, Prediction Time: 0.426s label DQNAgent, Reward 82: 184.000, Len(game): 184, Training Time: 3.171s, Prediction Time: 0.433s label DQNAgent, Reward 83: 240.000, Len(game): 240, Training Time: 3.171s, Prediction Time: 0.443s label DQNAgent, Reward 84: 211.000, Len(game): 211, Training Time: 3.171s, Prediction Time: 0.451s label DQNAgent, Reward 85: 255.000, Len(game): 255, Training Time: 3.171s, Prediction Time: 0.461s label DQNAgent, Reward 86: 283.000, Len(game): 283, Training Time: 3.171s, Prediction Time: 0.473s label DQNAgent, Reward 87: 189.000, Len(game): 189, Training Time: 3.171s, Prediction Time: 0.480s label DQNAgent, Reward 88: 214.000, Len(game): 214, Training Time: 3.171s, Prediction Time: 0.489s label DQNAgent, Reward 89: 282.000, Len(game): 282, Training Time: 3.171s, Prediction Time: 0.501s label DQNAgent, Reward 90: 214.000, Len(game): 214, Training Time: 3.171s, Prediction Time: 0.509s label DQNAgent, Reward 91: 164.000, Len(game): 164, Training Time: 3.171s, Prediction Time: 0.517s label DQNAgent, Reward 92: 235.000, Len(game): 235, Training Time: 3.171s, Prediction Time: 0.525s label DQNAgent, Reward 93: 191.000, Len(game): 191, Training Time: 3.171s, Prediction Time: 0.534s label DQNAgent, Reward 94: 214.000, Len(game): 214, Training Time: 3.171s, Prediction Time: 0.543s label DQNAgent, Reward 95: 229.000, Len(game): 229, Training Time: 3.171s, Prediction Time: 0.552s label DQNAgent, Reward 96: 199.000, Len(game): 199, Training Time: 3.171s, Prediction Time: 0.560s label DQNAgent, Reward 97: 190.000, Len(game): 190, Training Time: 3.171s, Prediction Time: 0.568s label DQNAgent, Reward 98: 289.000, Len(game): 289, Training Time: 3.171s, Prediction Time: 0.579s label DQNAgent, Reward 99: 212.000, Len(game): 212, Training Time: 3.171s, Prediction Time: 0.588s Computed global error Bellman mean: 1.1717717636598608e-07 iter: 3 label KQLearningHJBCP, Reward 0: 22.000, Len(game): 22, Training Time: 0.009s, Prediction Time: 0.000s Computed global error Bellman mean: 1.1552543799808745e-07 iter: 6 label KQLearningHJBCP, Reward 1: 78.000, Len(game): 78, Training Time: 0.059s, Prediction Time: 0.011s Computed global error Bellman mean: 3.0488343749310424e-07 iter: 8 label KQLearningHJBCP, Reward 2: 71.000, Len(game): 71, Training Time: 0.203s, Prediction Time: 0.023s Computed global error Bellman mean: 2.2164816109864766e-07 iter: 5 label KQLearningHJBCP, Reward 3: 82.000, Len(game): 82, Training Time: 0.392s, Prediction Time: 0.042s Computed global error Bellman mean: 2.685937866324866e-07 iter: 5 label KQLearningHJBCP, Reward 4: 76.000, Len(game): 76, Training Time: 0.717s, Prediction Time: 0.063s Computed global error Bellman mean: 2.2103691660173973e-07 iter: 5 label KQLearningHJBCP, Reward 5: 83.000, Len(game): 83, Training Time: 1.269s, Prediction Time: 0.088s Computed global error Bellman mean: 2.3048877059193143e-07 iter: 6 label KQLearningHJBCP, Reward 6: 195.000, Len(game): 195, Training Time: 2.550s, Prediction Time: 0.156s Computed global error Bellman mean: 0.10596339659857741 iter: 10 label KQLearningHJBCP, Reward 7: 217.000, Len(game): 217, Training Time: 5.986s, Prediction Time: 0.239s label KQLearningHJBCP, Reward 8: 149.000, Len(game): 149, Training Time: 5.986s, Prediction Time: 0.312s label KQLearningHJBCP, Reward 9: 93.000, Len(game): 93, Training Time: 5.986s, Prediction Time: 0.358s label KQLearningHJBCP, Reward 10: 92.000, Len(game): 92, Training Time: 5.986s, Prediction Time: 0.400s label KQLearningHJBCP, Reward 11: 96.000, Len(game): 96, Training Time: 5.986s, Prediction Time: 0.448s label KQLearningHJBCP, Reward 12: 123.000, Len(game): 123, Training Time: 5.986s, Prediction Time: 0.510s label KQLearningHJBCP, Reward 13: 116.000, Len(game): 116, Training Time: 5.986s, Prediction Time: 0.566s label KQLearningHJBCP, Reward 14: 92.000, Len(game): 92, Training Time: 5.986s, Prediction Time: 0.612s label KQLearningHJBCP, Reward 15: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 0.662s label KQLearningHJBCP, Reward 16: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 0.710s label KQLearningHJBCP, Reward 17: 114.000, Len(game): 114, Training Time: 5.986s, Prediction Time: 0.769s label KQLearningHJBCP, Reward 18: 154.000, Len(game): 154, Training Time: 5.986s, Prediction Time: 0.847s label KQLearningHJBCP, Reward 19: 130.000, Len(game): 130, Training Time: 5.986s, Prediction Time: 0.911s label KQLearningHJBCP, Reward 20: 99.000, Len(game): 99, Training Time: 5.986s, Prediction Time: 0.959s label KQLearningHJBCP, Reward 21: 105.000, Len(game): 105, Training Time: 5.986s, Prediction Time: 1.014s label KQLearningHJBCP, Reward 22: 95.000, Len(game): 95, Training Time: 5.986s, Prediction Time: 1.059s label KQLearningHJBCP, Reward 23: 130.000, Len(game): 130, Training Time: 5.986s, Prediction Time: 1.123s label KQLearningHJBCP, Reward 24: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 1.174s label KQLearningHJBCP, Reward 25: 101.000, Len(game): 101, Training Time: 5.986s, Prediction Time: 1.228s label KQLearningHJBCP, Reward 26: 68.000, Len(game): 68, Training Time: 5.986s, Prediction Time: 1.261s label KQLearningHJBCP, Reward 27: 108.000, Len(game): 108, Training Time: 5.986s, Prediction Time: 1.315s label KQLearningHJBCP, Reward 28: 144.000, Len(game): 144, Training Time: 5.986s, Prediction Time: 1.386s label KQLearningHJBCP, Reward 29: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 1.432s label KQLearningHJBCP, Reward 30: 99.000, Len(game): 99, Training Time: 5.986s, Prediction Time: 1.481s label KQLearningHJBCP, Reward 31: 122.000, Len(game): 122, Training Time: 5.986s, Prediction Time: 1.541s label KQLearningHJBCP, Reward 32: 136.000, Len(game): 136, Training Time: 5.986s, Prediction Time: 1.609s label KQLearningHJBCP, Reward 33: 100.000, Len(game): 100, Training Time: 5.986s, Prediction Time: 1.659s label KQLearningHJBCP, Reward 34: 126.000, Len(game): 126, Training Time: 5.986s, Prediction Time: 1.721s label KQLearningHJBCP, Reward 35: 78.000, Len(game): 78, Training Time: 5.986s, Prediction Time: 1.761s label KQLearningHJBCP, Reward 36: 100.000, Len(game): 100, Training Time: 5.986s, Prediction Time: 1.813s label KQLearningHJBCP, Reward 37: 101.000, Len(game): 101, Training Time: 5.986s, Prediction Time: 1.863s label KQLearningHJBCP, Reward 38: 176.000, Len(game): 176, Training Time: 5.986s, Prediction Time: 1.950s label KQLearningHJBCP, Reward 39: 105.000, Len(game): 105, Training Time: 5.986s, Prediction Time: 2.003s label KQLearningHJBCP, Reward 40: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 2.053s label KQLearningHJBCP, Reward 41: 127.000, Len(game): 127, Training Time: 5.986s, Prediction Time: 2.116s label KQLearningHJBCP, Reward 42: 148.000, Len(game): 148, Training Time: 5.986s, Prediction Time: 2.191s label KQLearningHJBCP, Reward 43: 99.000, Len(game): 99, Training Time: 5.986s, Prediction Time: 2.240s label KQLearningHJBCP, Reward 44: 106.000, Len(game): 106, Training Time: 5.986s, Prediction Time: 2.293s label KQLearningHJBCP, Reward 45: 164.000, Len(game): 164, Training Time: 5.986s, Prediction Time: 2.375s label KQLearningHJBCP, Reward 46: 127.000, Len(game): 127, Training Time: 5.986s, Prediction Time: 2.439s label KQLearningHJBCP, Reward 47: 97.000, Len(game): 97, Training Time: 5.986s, Prediction Time: 2.486s label KQLearningHJBCP, Reward 48: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 2.532s label KQLearningHJBCP, Reward 49: 108.000, Len(game): 108, Training Time: 5.986s, Prediction Time: 2.585s label KQLearningHJBCP, Reward 50: 119.000, Len(game): 119, Training Time: 5.986s, Prediction Time: 2.645s label KQLearningHJBCP, Reward 51: 89.000, Len(game): 89, Training Time: 5.986s, Prediction Time: 2.689s label KQLearningHJBCP, Reward 52: 117.000, Len(game): 117, Training Time: 5.986s, Prediction Time: 2.747s label KQLearningHJBCP, Reward 53: 97.000, Len(game): 97, Training Time: 5.986s, Prediction Time: 2.794s label KQLearningHJBCP, Reward 54: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 2.843s label KQLearningHJBCP, Reward 55: 148.000, Len(game): 148, Training Time: 5.986s, Prediction Time: 2.917s label KQLearningHJBCP, Reward 56: 101.000, Len(game): 101, Training Time: 5.986s, Prediction Time: 2.968s label KQLearningHJBCP, Reward 57: 90.000, Len(game): 90, Training Time: 5.986s, Prediction Time: 3.012s label KQLearningHJBCP, Reward 58: 90.000, Len(game): 90, Training Time: 5.986s, Prediction Time: 3.056s label KQLearningHJBCP, Reward 59: 110.000, Len(game): 110, Training Time: 5.986s, Prediction Time: 3.111s label KQLearningHJBCP, Reward 60: 166.000, Len(game): 166, Training Time: 5.986s, Prediction Time: 3.193s label KQLearningHJBCP, Reward 61: 105.000, Len(game): 105, Training Time: 5.986s, Prediction Time: 3.246s label KQLearningHJBCP, Reward 62: 124.000, Len(game): 124, Training Time: 5.986s, Prediction Time: 3.310s label KQLearningHJBCP, Reward 63: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 3.359s label KQLearningHJBCP, Reward 64: 93.000, Len(game): 93, Training Time: 5.986s, Prediction Time: 3.406s label KQLearningHJBCP, Reward 65: 122.000, Len(game): 122, Training Time: 5.986s, Prediction Time: 3.465s label KQLearningHJBCP, Reward 66: 105.000, Len(game): 105, Training Time: 5.986s, Prediction Time: 3.517s label KQLearningHJBCP, Reward 67: 163.000, Len(game): 163, Training Time: 5.986s, Prediction Time: 3.596s label KQLearningHJBCP, Reward 68: 95.000, Len(game): 95, Training Time: 5.986s, Prediction Time: 3.645s label KQLearningHJBCP, Reward 69: 116.000, Len(game): 116, Training Time: 5.986s, Prediction Time: 3.702s label KQLearningHJBCP, Reward 70: 111.000, Len(game): 111, Training Time: 5.986s, Prediction Time: 3.757s label KQLearningHJBCP, Reward 71: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 3.807s label KQLearningHJBCP, Reward 72: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 3.857s label KQLearningHJBCP, Reward 73: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 3.905s label KQLearningHJBCP, Reward 74: 101.000, Len(game): 101, Training Time: 5.986s, Prediction Time: 3.955s label KQLearningHJBCP, Reward 75: 111.000, Len(game): 111, Training Time: 5.986s, Prediction Time: 4.010s label KQLearningHJBCP, Reward 76: 89.000, Len(game): 89, Training Time: 5.986s, Prediction Time: 4.054s label KQLearningHJBCP, Reward 77: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 4.100s label KQLearningHJBCP, Reward 78: 89.000, Len(game): 89, Training Time: 5.986s, Prediction Time: 4.145s label KQLearningHJBCP, Reward 79: 123.000, Len(game): 123, Training Time: 5.986s, Prediction Time: 4.206s label KQLearningHJBCP, Reward 80: 104.000, Len(game): 104, Training Time: 5.986s, Prediction Time: 4.259s label KQLearningHJBCP, Reward 81: 114.000, Len(game): 114, Training Time: 5.986s, Prediction Time: 4.316s label KQLearningHJBCP, Reward 82: 110.000, Len(game): 110, Training Time: 5.986s, Prediction Time: 4.370s label KQLearningHJBCP, Reward 83: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 4.421s label KQLearningHJBCP, Reward 84: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 4.470s label KQLearningHJBCP, Reward 85: 120.000, Len(game): 120, Training Time: 5.986s, Prediction Time: 4.532s label KQLearningHJBCP, Reward 86: 111.000, Len(game): 111, Training Time: 5.986s, Prediction Time: 4.587s label KQLearningHJBCP, Reward 87: 116.000, Len(game): 116, Training Time: 5.986s, Prediction Time: 4.644s label KQLearningHJBCP, Reward 88: 83.000, Len(game): 83, Training Time: 5.986s, Prediction Time: 4.685s label KQLearningHJBCP, Reward 89: 95.000, Len(game): 95, Training Time: 5.986s, Prediction Time: 4.732s label KQLearningHJBCP, Reward 90: 107.000, Len(game): 107, Training Time: 5.986s, Prediction Time: 4.785s label KQLearningHJBCP, Reward 91: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 4.834s label KQLearningHJBCP, Reward 92: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 4.883s label KQLearningHJBCP, Reward 93: 97.000, Len(game): 97, Training Time: 5.986s, Prediction Time: 4.931s label KQLearningHJBCP, Reward 94: 108.000, Len(game): 108, Training Time: 5.986s, Prediction Time: 4.984s label KQLearningHJBCP, Reward 95: 115.000, Len(game): 115, Training Time: 5.986s, Prediction Time: 5.041s label KQLearningHJBCP, Reward 96: 106.000, Len(game): 106, Training Time: 5.986s, Prediction Time: 5.094s label KQLearningHJBCP, Reward 97: 111.000, Len(game): 111, Training Time: 5.986s, Prediction Time: 5.149s label KQLearningHJBCP, Reward 98: 104.000, Len(game): 104, Training Time: 5.986s, Prediction Time: 5.202s label KQLearningHJBCP, Reward 99: 81.000, Len(game): 81, Training Time: 5.986s, Prediction Time: 5.244s Computed global error Bellman mean: 1.0343403334293693 iter: 0 label KQLearning, Reward 0: 20.000, Len(game): 20, Training Time: 0.003s, Prediction Time: 0.000s Computed global error Bellman mean: 0.11291406057087777 iter: 5 label KQLearning, Reward 1: 191.000, Len(game): 191, Training Time: 0.102s, Prediction Time: 0.028s Computed global error Bellman mean: 0.019494447560319525 iter: 5 label KQLearning, Reward 2: 32.000, Len(game): 32, Training Time: 0.206s, Prediction Time: 0.036s Computed global error Bellman mean: 0.05866904492151179 iter: 5 label KQLearning, Reward 3: 123.000, Len(game): 123, Training Time: 0.498s, Prediction Time: 0.069s Computed global error Bellman mean: 0.007963787895601543 iter: 5 label KQLearning, Reward 4: 162.000, Len(game): 162, Training Time: 1.013s, Prediction Time: 0.128s Computed global error Bellman mean: 0.7506316675407901 iter: 5 label KQLearning, Reward 5: 109.000, Len(game): 109, Training Time: 1.777s, Prediction Time: 0.168s Computed global error Bellman mean: 0.10648151504465554 iter: 5 label KQLearning, Reward 6: 264.000, Len(game): 264, Training Time: 3.193s, Prediction Time: 0.276s label KQLearning, Reward 7: 191.000, Len(game): 191, Training Time: 3.193s, Prediction Time: 0.377s label KQLearning, Reward 8: 233.000, Len(game): 233, Training Time: 3.193s, Prediction Time: 0.499s label KQLearning, Reward 9: 247.000, Len(game): 247, Training Time: 3.193s, Prediction Time: 0.629s label KQLearning, Reward 10: 209.000, Len(game): 209, Training Time: 3.193s, Prediction Time: 0.738s label KQLearning, Reward 11: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 1.262s label KQLearning, Reward 12: 155.000, Len(game): 155, Training Time: 3.193s, Prediction Time: 1.343s label KQLearning, Reward 13: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 1.864s label KQLearning, Reward 14: 266.000, Len(game): 266, Training Time: 3.193s, Prediction Time: 2.003s label KQLearning, Reward 15: 224.000, Len(game): 224, Training Time: 3.193s, Prediction Time: 2.120s label KQLearning, Reward 16: 237.000, Len(game): 237, Training Time: 3.193s, Prediction Time: 2.243s label KQLearning, Reward 17: 65.000, Len(game): 65, Training Time: 3.193s, Prediction Time: 2.277s label KQLearning, Reward 18: 255.000, Len(game): 255, Training Time: 3.193s, Prediction Time: 2.410s label KQLearning, Reward 19: 78.000, Len(game): 78, Training Time: 3.193s, Prediction Time: 2.450s label KQLearning, Reward 20: 229.000, Len(game): 229, Training Time: 3.193s, Prediction Time: 2.569s label KQLearning, Reward 21: 228.000, Len(game): 228, Training Time: 3.193s, Prediction Time: 2.688s label KQLearning, Reward 22: 225.000, Len(game): 225, Training Time: 3.193s, Prediction Time: 2.806s label KQLearning, Reward 23: 311.000, Len(game): 311, Training Time: 3.193s, Prediction Time: 2.967s label KQLearning, Reward 24: 265.000, Len(game): 265, Training Time: 3.193s, Prediction Time: 3.105s label KQLearning, Reward 25: 209.000, Len(game): 209, Training Time: 3.193s, Prediction Time: 3.213s label KQLearning, Reward 26: 67.000, Len(game): 67, Training Time: 3.193s, Prediction Time: 3.247s label KQLearning, Reward 27: 235.000, Len(game): 235, Training Time: 3.193s, Prediction Time: 3.368s label KQLearning, Reward 28: 193.000, Len(game): 193, Training Time: 3.193s, Prediction Time: 3.467s label KQLearning, Reward 29: 215.000, Len(game): 215, Training Time: 3.193s, Prediction Time: 3.581s label KQLearning, Reward 30: 256.000, Len(game): 256, Training Time: 3.193s, Prediction Time: 3.717s label KQLearning, Reward 31: 259.000, Len(game): 259, Training Time: 3.193s, Prediction Time: 3.852s label KQLearning, Reward 32: 241.000, Len(game): 241, Training Time: 3.193s, Prediction Time: 3.978s label KQLearning, Reward 33: 271.000, Len(game): 271, Training Time: 3.193s, Prediction Time: 4.122s label KQLearning, Reward 34: 200.000, Len(game): 200, Training Time: 3.193s, Prediction Time: 4.227s label KQLearning, Reward 35: 225.000, Len(game): 225, Training Time: 3.193s, Prediction Time: 4.345s label KQLearning, Reward 36: 234.000, Len(game): 234, Training Time: 3.193s, Prediction Time: 4.468s label KQLearning, Reward 37: 189.000, Len(game): 189, Training Time: 3.193s, Prediction Time: 4.566s label KQLearning, Reward 38: 72.000, Len(game): 72, Training Time: 3.193s, Prediction Time: 4.604s label KQLearning, Reward 39: 201.000, Len(game): 201, Training Time: 3.193s, Prediction Time: 4.709s label KQLearning, Reward 40: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 5.232s label KQLearning, Reward 41: 187.000, Len(game): 187, Training Time: 3.193s, Prediction Time: 5.330s label KQLearning, Reward 42: 193.000, Len(game): 193, Training Time: 3.193s, Prediction Time: 5.432s label KQLearning, Reward 43: 239.000, Len(game): 239, Training Time: 3.193s, Prediction Time: 5.559s label KQLearning, Reward 44: 226.000, Len(game): 226, Training Time: 3.193s, Prediction Time: 5.676s label KQLearning, Reward 45: 211.000, Len(game): 211, Training Time: 3.193s, Prediction Time: 5.786s label KQLearning, Reward 46: 262.000, Len(game): 262, Training Time: 3.193s, Prediction Time: 5.921s label KQLearning, Reward 47: 225.000, Len(game): 225, Training Time: 3.193s, Prediction Time: 6.039s label KQLearning, Reward 48: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 6.566s label KQLearning, Reward 49: 203.000, Len(game): 203, Training Time: 3.193s, Prediction Time: 6.673s label KQLearning, Reward 50: 245.000, Len(game): 245, Training Time: 3.193s, Prediction Time: 6.802s label KQLearning, Reward 51: 275.000, Len(game): 275, Training Time: 3.193s, Prediction Time: 6.949s label KQLearning, Reward 52: 257.000, Len(game): 257, Training Time: 3.193s, Prediction Time: 7.083s label KQLearning, Reward 53: 77.000, Len(game): 77, Training Time: 3.193s, Prediction Time: 7.124s label KQLearning, Reward 54: 252.000, Len(game): 252, Training Time: 3.193s, Prediction Time: 7.256s label KQLearning, Reward 55: 214.000, Len(game): 214, Training Time: 3.193s, Prediction Time: 7.368s label KQLearning, Reward 56: 165.000, Len(game): 165, Training Time: 3.193s, Prediction Time: 7.454s label KQLearning, Reward 57: 231.000, Len(game): 231, Training Time: 3.193s, Prediction Time: 7.574s label KQLearning, Reward 58: 178.000, Len(game): 178, Training Time: 3.193s, Prediction Time: 7.665s label KQLearning, Reward 59: 239.000, Len(game): 239, Training Time: 3.193s, Prediction Time: 7.794s label KQLearning, Reward 60: 161.000, Len(game): 161, Training Time: 3.193s, Prediction Time: 7.879s label KQLearning, Reward 61: 93.000, Len(game): 93, Training Time: 3.193s, Prediction Time: 7.928s label KQLearning, Reward 62: 207.000, Len(game): 207, Training Time: 3.193s, Prediction Time: 8.034s label KQLearning, Reward 63: 199.000, Len(game): 199, Training Time: 3.193s, Prediction Time: 8.139s label KQLearning, Reward 64: 258.000, Len(game): 258, Training Time: 3.193s, Prediction Time: 8.277s label KQLearning, Reward 65: 168.000, Len(game): 168, Training Time: 3.193s, Prediction Time: 8.365s label KQLearning, Reward 66: 211.000, Len(game): 211, Training Time: 3.193s, Prediction Time: 8.477s label KQLearning, Reward 67: 71.000, Len(game): 71, Training Time: 3.193s, Prediction Time: 8.513s label KQLearning, Reward 68: 70.000, Len(game): 70, Training Time: 3.193s, Prediction Time: 8.550s label KQLearning, Reward 69: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 9.076s label KQLearning, Reward 70: 217.000, Len(game): 217, Training Time: 3.193s, Prediction Time: 9.190s label KQLearning, Reward 71: 262.000, Len(game): 262, Training Time: 3.193s, Prediction Time: 9.327s label KQLearning, Reward 72: 234.000, Len(game): 234, Training Time: 3.193s, Prediction Time: 9.449s label KQLearning, Reward 73: 65.000, Len(game): 65, Training Time: 3.193s, Prediction Time: 9.484s label KQLearning, Reward 74: 217.000, Len(game): 217, Training Time: 3.193s, Prediction Time: 9.597s label KQLearning, Reward 75: 238.000, Len(game): 238, Training Time: 3.193s, Prediction Time: 9.720s label KQLearning, Reward 76: 243.000, Len(game): 243, Training Time: 3.193s, Prediction Time: 9.848s label KQLearning, Reward 77: 217.000, Len(game): 217, Training Time: 3.193s, Prediction Time: 9.964s label KQLearning, Reward 78: 219.000, Len(game): 219, Training Time: 3.193s, Prediction Time: 10.080s label KQLearning, Reward 79: 252.000, Len(game): 252, Training Time: 3.193s, Prediction Time: 10.213s label KQLearning, Reward 80: 207.000, Len(game): 207, Training Time: 3.193s, Prediction Time: 10.322s label KQLearning, Reward 81: 100.000, Len(game): 100, Training Time: 3.193s, Prediction Time: 10.373s label KQLearning, Reward 82: 225.000, Len(game): 225, Training Time: 3.193s, Prediction Time: 10.492s label KQLearning, Reward 83: 153.000, Len(game): 153, Training Time: 3.193s, Prediction Time: 10.573s label KQLearning, Reward 84: 333.000, Len(game): 333, Training Time: 3.193s, Prediction Time: 10.749s label KQLearning, Reward 85: 246.000, Len(game): 246, Training Time: 3.193s, Prediction Time: 10.880s label KQLearning, Reward 86: 307.000, Len(game): 307, Training Time: 3.193s, Prediction Time: 11.040s label KQLearning, Reward 87: 245.000, Len(game): 245, Training Time: 3.193s, Prediction Time: 11.169s label KQLearning, Reward 88: 269.000, Len(game): 269, Training Time: 3.193s, Prediction Time: 11.309s label KQLearning, Reward 89: 206.000, Len(game): 206, Training Time: 3.193s, Prediction Time: 11.417s label KQLearning, Reward 90: 173.000, Len(game): 173, Training Time: 3.193s, Prediction Time: 11.507s label KQLearning, Reward 91: 165.000, Len(game): 165, Training Time: 3.193s, Prediction Time: 11.593s label KQLearning, Reward 92: 211.000, Len(game): 211, Training Time: 3.193s, Prediction Time: 11.701s label KQLearning, Reward 93: 248.000, Len(game): 248, Training Time: 3.193s, Prediction Time: 11.829s label KQLearning, Reward 94: 219.000, Len(game): 219, Training Time: 3.193s, Prediction Time: 11.946s label KQLearning, Reward 95: 200.000, Len(game): 200, Training Time: 3.193s, Prediction Time: 12.049s label KQLearning, Reward 96: 226.000, Len(game): 226, Training Time: 3.193s, Prediction Time: 12.169s label KQLearning, Reward 97: 232.000, Len(game): 232, Training Time: 3.193s, Prediction Time: 12.288s label KQLearning, Reward 98: 230.000, Len(game): 230, Training Time: 3.193s, Prediction Time: 12.407s label KQLearning, Reward 99: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 12.936s 0 label PPOAgent, Reward 0: 12.000, Len(game): 12, Training Time: 0.003s, Prediction Time: 0.003s label PPOAgent, Reward 1: 23.000, Len(game): 23, Training Time: 0.007s, Prediction Time: 0.007s label PPOAgent, Reward 2: 12.000, Len(game): 12, Training Time: 0.010s, Prediction Time: 0.010s label PPOAgent, Reward 3: 14.000, Len(game): 14, Training Time: 0.013s, Prediction Time: 0.013s label PPOAgent, Reward 4: 24.000, Len(game): 24, Training Time: 0.018s, Prediction Time: 0.018s label PPOAgent, Reward 5: 63.000, Len(game): 63, Training Time: 0.030s, Prediction Time: 0.030s label PPOAgent, Reward 6: 35.000, Len(game): 35, Training Time: 0.037s, Prediction Time: 0.037s label PPOAgent, Reward 7: 21.000, Len(game): 21, Training Time: 0.040s, Prediction Time: 0.040s label PPOAgent, Reward 8: 14.000, Len(game): 14, Training Time: 0.043s, Prediction Time: 0.043s label PPOAgent, Reward 9: 11.000, Len(game): 11, Training Time: 0.045s, Prediction Time: 0.045s label PPOAgent, Reward 10: 14.000, Len(game): 14, Training Time: 0.048s, Prediction Time: 0.048s label PPOAgent, Reward 11: 15.000, Len(game): 15, Training Time: 0.050s, Prediction Time: 0.050s label PPOAgent, Reward 12: 15.000, Len(game): 15, Training Time: 0.053s, Prediction Time: 0.053s label PPOAgent, Reward 13: 16.000, Len(game): 16, Training Time: 0.057s, Prediction Time: 0.057s label PPOAgent, Reward 14: 12.000, Len(game): 12, Training Time: 0.059s, Prediction Time: 0.059s label PPOAgent, Reward 15: 14.000, Len(game): 14, Training Time: 0.062s, Prediction Time: 0.062s label PPOAgent, Reward 16: 14.000, Len(game): 14, Training Time: 0.064s, Prediction Time: 0.064s label PPOAgent, Reward 17: 21.000, Len(game): 21, Training Time: 0.068s, Prediction Time: 0.068s label PPOAgent, Reward 18: 15.000, Len(game): 15, Training Time: 0.071s, Prediction Time: 0.071s label PPOAgent, Reward 19: 31.000, Len(game): 31, Training Time: 0.077s, Prediction Time: 0.077s label PPOAgent, Reward 20: 11.000, Len(game): 11, Training Time: 0.078s, Prediction Time: 0.078s label PPOAgent, Reward 21: 16.000, Len(game): 16, Training Time: 0.081s, Prediction Time: 0.081s label PPOAgent, Reward 22: 10.000, Len(game): 10, Training Time: 0.082s, Prediction Time: 0.082s label PPOAgent, Reward 23: 43.000, Len(game): 43, Training Time: 0.091s, Prediction Time: 0.091s label PPOAgent, Reward 24: 22.000, Len(game): 22, Training Time: 0.095s, Prediction Time: 0.095s label PPOAgent, Reward 25: 27.000, Len(game): 27, Training Time: 0.100s, Prediction Time: 0.100s label PPOAgent, Reward 26: 19.000, Len(game): 19, Training Time: 0.103s, Prediction Time: 0.103s label PPOAgent, Reward 27: 16.000, Len(game): 16, Training Time: 0.106s, Prediction Time: 0.106s label PPOAgent, Reward 28: 27.000, Len(game): 27, Training Time: 0.112s, Prediction Time: 0.112s label PPOAgent, Reward 29: 37.000, Len(game): 37, Training Time: 0.119s, Prediction Time: 0.119s label PPOAgent, Reward 30: 15.000, Len(game): 15, Training Time: 0.121s, Prediction Time: 0.121s label PPOAgent, Reward 31: 18.000, Len(game): 18, Training Time: 0.124s, Prediction Time: 0.124s label PPOAgent, Reward 32: 14.000, Len(game): 14, Training Time: 0.127s, Prediction Time: 0.127s label PPOAgent, Reward 33: 22.000, Len(game): 22, Training Time: 0.130s, Prediction Time: 0.130s label PPOAgent, Reward 34: 8.000, Len(game): 8, Training Time: 0.133s, Prediction Time: 0.133s label PPOAgent, Reward 35: 44.000, Len(game): 44, Training Time: 0.140s, Prediction Time: 0.140s label PPOAgent, Reward 36: 13.000, Len(game): 13, Training Time: 0.143s, Prediction Time: 0.143s label PPOAgent, Reward 37: 17.000, Len(game): 17, Training Time: 0.146s, Prediction Time: 0.146s label PPOAgent, Reward 38: 20.000, Len(game): 20, Training Time: 0.150s, Prediction Time: 0.150s label PPOAgent, Reward 39: 53.000, Len(game): 53, Training Time: 0.160s, Prediction Time: 0.160s label PPOAgent, Reward 40: 22.000, Len(game): 22, Training Time: 0.164s, Prediction Time: 0.164s label PPOAgent, Reward 41: 47.000, Len(game): 47, Training Time: 0.174s, Prediction Time: 0.174s label PPOAgent, Reward 42: 36.000, Len(game): 36, Training Time: 0.181s, Prediction Time: 0.181s label PPOAgent, Reward 43: 19.000, Len(game): 19, Training Time: 0.184s, Prediction Time: 0.184s label PPOAgent, Reward 44: 11.000, Len(game): 11, Training Time: 0.186s, Prediction Time: 0.186s label PPOAgent, Reward 45: 12.000, Len(game): 12, Training Time: 0.189s, Prediction Time: 0.189s label PPOAgent, Reward 46: 38.000, Len(game): 38, Training Time: 0.196s, Prediction Time: 0.196s label PPOAgent, Reward 47: 37.000, Len(game): 37, Training Time: 0.203s, Prediction Time: 0.203s label PPOAgent, Reward 48: 17.000, Len(game): 17, Training Time: 0.205s, Prediction Time: 0.205s label PPOAgent, Reward 49: 46.000, Len(game): 46, Training Time: 0.212s, Prediction Time: 0.212s label PPOAgent, Reward 50: 16.000, Len(game): 16, Training Time: 0.215s, Prediction Time: 0.215s label PPOAgent, Reward 51: 24.000, Len(game): 24, Training Time: 0.219s, Prediction Time: 0.219s label PPOAgent, Reward 52: 18.000, Len(game): 18, Training Time: 0.221s, Prediction Time: 0.221s label PPOAgent, Reward 53: 25.000, Len(game): 25, Training Time: 0.265s, Prediction Time: 0.265s label PPOAgent, Reward 54: 16.000, Len(game): 16, Training Time: 0.268s, Prediction Time: 0.268s label PPOAgent, Reward 55: 37.000, Len(game): 37, Training Time: 0.275s, Prediction Time: 0.275s label PPOAgent, Reward 56: 20.000, Len(game): 20, Training Time: 0.279s, Prediction Time: 0.279s label PPOAgent, Reward 57: 28.000, Len(game): 28, Training Time: 0.284s, Prediction Time: 0.284s label PPOAgent, Reward 58: 15.000, Len(game): 15, Training Time: 0.287s, Prediction Time: 0.287s label PPOAgent, Reward 59: 11.000, Len(game): 11, Training Time: 0.289s, Prediction Time: 0.289s label PPOAgent, Reward 60: 17.000, Len(game): 17, Training Time: 0.293s, Prediction Time: 0.293s label PPOAgent, Reward 61: 13.000, Len(game): 13, Training Time: 0.295s, Prediction Time: 0.295s label PPOAgent, Reward 62: 24.000, Len(game): 24, Training Time: 0.299s, Prediction Time: 0.299s label PPOAgent, Reward 63: 15.000, Len(game): 15, Training Time: 0.301s, Prediction Time: 0.301s label PPOAgent, Reward 64: 14.000, Len(game): 14, Training Time: 0.303s, Prediction Time: 0.303s label PPOAgent, Reward 65: 10.000, Len(game): 10, Training Time: 0.304s, Prediction Time: 0.304s label PPOAgent, Reward 66: 15.000, Len(game): 15, Training Time: 0.308s, Prediction Time: 0.308s label PPOAgent, Reward 67: 9.000, Len(game): 9, Training Time: 0.309s, Prediction Time: 0.309s label PPOAgent, Reward 68: 16.000, Len(game): 16, Training Time: 0.312s, Prediction Time: 0.312s label PPOAgent, Reward 69: 18.000, Len(game): 18, Training Time: 0.315s, Prediction Time: 0.315s label PPOAgent, Reward 70: 15.000, Len(game): 15, Training Time: 0.319s, Prediction Time: 0.319s label PPOAgent, Reward 71: 24.000, Len(game): 24, Training Time: 0.325s, Prediction Time: 0.325s label PPOAgent, Reward 72: 51.000, Len(game): 51, Training Time: 0.335s, Prediction Time: 0.335s label PPOAgent, Reward 73: 14.000, Len(game): 14, Training Time: 0.338s, Prediction Time: 0.338s label PPOAgent, Reward 74: 32.000, Len(game): 32, Training Time: 0.344s, Prediction Time: 0.344s label PPOAgent, Reward 75: 16.000, Len(game): 16, Training Time: 0.347s, Prediction Time: 0.347s label PPOAgent, Reward 76: 89.000, Len(game): 89, Training Time: 0.366s, Prediction Time: 0.366s label PPOAgent, Reward 77: 17.000, Len(game): 17, Training Time: 0.369s, Prediction Time: 0.369s label PPOAgent, Reward 78: 21.000, Len(game): 21, Training Time: 0.373s, Prediction Time: 0.373s label PPOAgent, Reward 79: 17.000, Len(game): 17, Training Time: 0.376s, Prediction Time: 0.376s label PPOAgent, Reward 80: 26.000, Len(game): 26, Training Time: 0.381s, Prediction Time: 0.381s label PPOAgent, Reward 81: 31.000, Len(game): 31, Training Time: 0.388s, Prediction Time: 0.388s label PPOAgent, Reward 82: 20.000, Len(game): 20, Training Time: 0.391s, Prediction Time: 0.391s label PPOAgent, Reward 83: 14.000, Len(game): 14, Training Time: 0.394s, Prediction Time: 0.394s label PPOAgent, Reward 84: 13.000, Len(game): 13, Training Time: 0.397s, Prediction Time: 0.397s label PPOAgent, Reward 85: 15.000, Len(game): 15, Training Time: 0.400s, Prediction Time: 0.400s label PPOAgent, Reward 86: 16.000, Len(game): 16, Training Time: 0.403s, Prediction Time: 0.403s label PPOAgent, Reward 87: 19.000, Len(game): 19, Training Time: 0.407s, Prediction Time: 0.407s label PPOAgent, Reward 88: 13.000, Len(game): 13, Training Time: 0.409s, Prediction Time: 0.409s label PPOAgent, Reward 89: 12.000, Len(game): 12, Training Time: 0.412s, Prediction Time: 0.412s label PPOAgent, Reward 90: 20.000, Len(game): 20, Training Time: 0.415s, Prediction Time: 0.415s label PPOAgent, Reward 91: 20.000, Len(game): 20, Training Time: 0.420s, Prediction Time: 0.420s label PPOAgent, Reward 92: 56.000, Len(game): 56, Training Time: 0.431s, Prediction Time: 0.431s label PPOAgent, Reward 93: 11.000, Len(game): 11, Training Time: 0.434s, Prediction Time: 0.434s label PPOAgent, Reward 94: 26.000, Len(game): 26, Training Time: 0.438s, Prediction Time: 0.438s label PPOAgent, Reward 95: 11.000, Len(game): 11, Training Time: 0.440s, Prediction Time: 0.440s label PPOAgent, Reward 96: 21.000, Len(game): 21, Training Time: 0.444s, Prediction Time: 0.444s label PPOAgent, Reward 97: 36.000, Len(game): 36, Training Time: 0.451s, Prediction Time: 0.451s label PPOAgent, Reward 98: 26.000, Len(game): 26, Training Time: 0.455s, Prediction Time: 0.455s label PPOAgent, Reward 99: 15.000, Len(game): 15, Training Time: 0.458s, Prediction Time: 0.458s label PolicyGradient, Reward 0: 10.000, Len(game): 10, Training Time: 0.004s, Prediction Time: 0.000s label PolicyGradient, Reward 1: 13.000, Len(game): 13, Training Time: 0.007s, Prediction Time: 0.003s label PolicyGradient, Reward 2: 13.000, Len(game): 13, Training Time: 0.011s, Prediction Time: 0.006s label PolicyGradient, Reward 3: 27.000, Len(game): 27, Training Time: 0.017s, Prediction Time: 0.011s label PolicyGradient, Reward 4: 10.000, Len(game): 10, Training Time: 0.023s, Prediction Time: 0.013s label PolicyGradient, Reward 5: 15.000, Len(game): 15, Training Time: 0.032s, Prediction Time: 0.016s label PolicyGradient, Reward 6: 12.000, Len(game): 12, Training Time: 0.040s, Prediction Time: 0.018s label PolicyGradient, Reward 7: 12.000, Len(game): 12, Training Time: 0.047s, Prediction Time: 0.021s label PolicyGradient, Reward 8: 17.000, Len(game): 17, Training Time: 0.056s, Prediction Time: 0.026s label PolicyGradient, Reward 9: 17.000, Len(game): 17, Training Time: 0.065s, Prediction Time: 0.031s label PolicyGradient, Reward 10: 15.000, Len(game): 15, Training Time: 0.075s, Prediction Time: 0.036s label PolicyGradient, Reward 11: 23.000, Len(game): 23, Training Time: 0.087s, Prediction Time: 0.043s label PolicyGradient, Reward 12: 59.000, Len(game): 59, Training Time: 0.103s, Prediction Time: 0.058s label PolicyGradient, Reward 13: 37.000, Len(game): 37, Training Time: 0.124s, Prediction Time: 0.070s label PolicyGradient, Reward 14: 21.000, Len(game): 21, Training Time: 0.146s, Prediction Time: 0.079s label PolicyGradient, Reward 15: 42.000, Len(game): 42, Training Time: 0.171s, Prediction Time: 0.095s label PolicyGradient, Reward 16: 15.000, Len(game): 15, Training Time: 0.197s, Prediction Time: 0.106s label PolicyGradient, Reward 17: 19.000, Len(game): 19, Training Time: 0.230s, Prediction Time: 0.118s label PolicyGradient, Reward 18: 63.000, Len(game): 63, Training Time: 0.274s, Prediction Time: 0.148s label PolicyGradient, Reward 19: 30.000, Len(game): 30, Training Time: 0.324s, Prediction Time: 0.169s label PolicyGradient, Reward 20: 74.000, Len(game): 74, Training Time: 0.392s, Prediction Time: 0.206s label PolicyGradient, Reward 21: 28.000, Len(game): 28, Training Time: 0.457s, Prediction Time: 0.235s label PolicyGradient, Reward 22: 96.000, Len(game): 96, Training Time: 0.547s, Prediction Time: 0.292s label PolicyGradient, Reward 23: 27.000, Len(game): 27, Training Time: 0.633s, Prediction Time: 0.330s label PolicyGradient, Reward 24: 16.000, Len(game): 16, Training Time: 0.731s, Prediction Time: 0.367s label PolicyGradient, Reward 25: 33.000, Len(game): 33, Training Time: 0.831s, Prediction Time: 0.414s label PolicyGradient, Reward 26: 13.000, Len(game): 13, Training Time: 0.942s, Prediction Time: 0.459s label PolicyGradient, Reward 27: 47.000, Len(game): 47, Training Time: 1.070s, Prediction Time: 0.521s label PolicyGradient, Reward 28: 92.000, Len(game): 92, Training Time: 1.212s, Prediction Time: 0.606s label PolicyGradient, Reward 29: 34.000, Len(game): 34, Training Time: 1.364s, Prediction Time: 0.670s label PolicyGradient, Reward 30: 128.000, Len(game): 128, Training Time: 1.567s, Prediction Time: 0.790s label PolicyGradient, Reward 31: 49.000, Len(game): 49, Training Time: 1.766s, Prediction Time: 0.890s label PolicyGradient, Reward 32: 124.000, Len(game): 124, Training Time: 2.011s, Prediction Time: 1.034s label PolicyGradient, Reward 33: 45.000, Len(game): 45, Training Time: 2.287s, Prediction Time: 1.158s label PolicyGradient, Reward 34: 124.000, Len(game): 124, Training Time: 2.597s, Prediction Time: 1.329s label PolicyGradient, Reward 35: 25.000, Len(game): 25, Training Time: 2.924s, Prediction Time: 1.462s label PolicyGradient, Reward 36: 17.000, Len(game): 17, Training Time: 3.234s, Prediction Time: 1.612s label PolicyGradient, Reward 37: 38.000, Len(game): 38, Training Time: 3.234s, Prediction Time: 1.772s label PolicyGradient, Reward 38: 48.000, Len(game): 48, Training Time: 3.234s, Prediction Time: 1.801s label PolicyGradient, Reward 39: 47.000, Len(game): 47, Training Time: 3.234s, Prediction Time: 1.832s label PolicyGradient, Reward 40: 22.000, Len(game): 22, Training Time: 3.234s, Prediction Time: 1.845s label PolicyGradient, Reward 41: 25.000, Len(game): 25, Training Time: 3.234s, Prediction Time: 1.861s label PolicyGradient, Reward 42: 38.000, Len(game): 38, Training Time: 3.234s, Prediction Time: 1.884s label PolicyGradient, Reward 43: 36.000, Len(game): 36, Training Time: 3.234s, Prediction Time: 1.907s label PolicyGradient, Reward 44: 23.000, Len(game): 23, Training Time: 3.234s, Prediction Time: 1.924s label PolicyGradient, Reward 45: 23.000, Len(game): 23, Training Time: 3.234s, Prediction Time: 1.938s label PolicyGradient, Reward 46: 36.000, Len(game): 36, Training Time: 3.234s, Prediction Time: 1.962s label PolicyGradient, Reward 47: 33.000, Len(game): 33, Training Time: 3.234s, Prediction Time: 1.981s label PolicyGradient, Reward 48: 32.000, Len(game): 32, Training Time: 3.234s, Prediction Time: 2.001s label PolicyGradient, Reward 49: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.020s label PolicyGradient, Reward 50: 50.000, Len(game): 50, Training Time: 3.234s, Prediction Time: 2.051s label PolicyGradient, Reward 51: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.072s label PolicyGradient, Reward 52: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.097s label PolicyGradient, Reward 53: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.117s label PolicyGradient, Reward 54: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.138s label PolicyGradient, Reward 55: 28.000, Len(game): 28, Training Time: 3.234s, Prediction Time: 2.156s label PolicyGradient, Reward 56: 33.000, Len(game): 33, Training Time: 3.234s, Prediction Time: 2.176s label PolicyGradient, Reward 57: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.197s label PolicyGradient, Reward 58: 25.000, Len(game): 25, Training Time: 3.234s, Prediction Time: 2.213s label PolicyGradient, Reward 59: 25.000, Len(game): 25, Training Time: 3.234s, Prediction Time: 2.228s label PolicyGradient, Reward 60: 29.000, Len(game): 29, Training Time: 3.234s, Prediction Time: 2.246s label PolicyGradient, Reward 61: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.266s label PolicyGradient, Reward 62: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.286s label PolicyGradient, Reward 63: 38.000, Len(game): 38, Training Time: 3.234s, Prediction Time: 2.311s label PolicyGradient, Reward 64: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.336s label PolicyGradient, Reward 65: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.359s label PolicyGradient, Reward 66: 22.000, Len(game): 22, Training Time: 3.234s, Prediction Time: 2.373s label PolicyGradient, Reward 67: 17.000, Len(game): 17, Training Time: 3.234s, Prediction Time: 2.383s label PolicyGradient, Reward 68: 16.000, Len(game): 16, Training Time: 3.234s, Prediction Time: 2.393s label PolicyGradient, Reward 69: 21.000, Len(game): 21, Training Time: 3.234s, Prediction Time: 2.406s label PolicyGradient, Reward 70: 41.000, Len(game): 41, Training Time: 3.234s, Prediction Time: 2.432s label PolicyGradient, Reward 71: 23.000, Len(game): 23, Training Time: 3.234s, Prediction Time: 2.445s label PolicyGradient, Reward 72: 41.000, Len(game): 41, Training Time: 3.234s, Prediction Time: 2.471s label PolicyGradient, Reward 73: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.490s label PolicyGradient, Reward 74: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.510s label PolicyGradient, Reward 75: 30.000, Len(game): 30, Training Time: 3.234s, Prediction Time: 2.530s label PolicyGradient, Reward 76: 36.000, Len(game): 36, Training Time: 3.234s, Prediction Time: 2.552s label PolicyGradient, Reward 77: 26.000, Len(game): 26, Training Time: 3.234s, Prediction Time: 2.569s label PolicyGradient, Reward 78: 30.000, Len(game): 30, Training Time: 3.234s, Prediction Time: 2.587s label PolicyGradient, Reward 79: 26.000, Len(game): 26, Training Time: 3.234s, Prediction Time: 2.603s label PolicyGradient, Reward 80: 59.000, Len(game): 59, Training Time: 3.234s, Prediction Time: 2.639s label PolicyGradient, Reward 81: 42.000, Len(game): 42, Training Time: 3.234s, Prediction Time: 2.664s label PolicyGradient, Reward 82: 33.000, Len(game): 33, Training Time: 3.234s, Prediction Time: 2.686s label PolicyGradient, Reward 83: 37.000, Len(game): 37, Training Time: 3.234s, Prediction Time: 2.708s label PolicyGradient, Reward 84: 44.000, Len(game): 44, Training Time: 3.234s, Prediction Time: 2.735s label PolicyGradient, Reward 85: 35.000, Len(game): 35, Training Time: 3.234s, Prediction Time: 2.757s label PolicyGradient, Reward 86: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.781s label PolicyGradient, Reward 87: 38.000, Len(game): 38, Training Time: 3.234s, Prediction Time: 2.805s label PolicyGradient, Reward 88: 20.000, Len(game): 20, Training Time: 3.234s, Prediction Time: 2.818s label PolicyGradient, Reward 89: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.840s label PolicyGradient, Reward 90: 40.000, Len(game): 40, Training Time: 3.234s, Prediction Time: 2.866s label PolicyGradient, Reward 91: 47.000, Len(game): 47, Training Time: 3.234s, Prediction Time: 2.896s label PolicyGradient, Reward 92: 27.000, Len(game): 27, Training Time: 3.234s, Prediction Time: 2.912s label PolicyGradient, Reward 93: 42.000, Len(game): 42, Training Time: 3.234s, Prediction Time: 2.938s label PolicyGradient, Reward 94: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.962s label PolicyGradient, Reward 95: 26.000, Len(game): 26, Training Time: 3.234s, Prediction Time: 2.978s label PolicyGradient, Reward 96: 35.000, Len(game): 35, Training Time: 3.234s, Prediction Time: 3.000s label PolicyGradient, Reward 97: 23.000, Len(game): 23, Training Time: 3.234s, Prediction Time: 3.014s label PolicyGradient, Reward 98: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 3.035s label PolicyGradient, Reward 99: 35.000, Len(game): 35, Training Time: 3.234s, Prediction Time: 3.057s label Controller-based, Reward 0: 138.000, Len(game): 138, Training Time: 0.003s, Prediction Time: 0.001s label Controller-based, Reward 1: 230.000, Len(game): 230, Training Time: 0.006s, Prediction Time: 0.003s label Controller-based, Reward 2: 9.000, Len(game): 9, Training Time: 0.018s, Prediction Time: 0.003s label Controller-based, Reward 3: 148.000, Len(game): 148, Training Time: 0.028s, Prediction Time: 0.005s label Controller-based, Reward 4: 9.000, Len(game): 9, Training Time: 0.037s, Prediction Time: 0.005s label Controller-based, Reward 5: 263.000, Len(game): 263, Training Time: 0.049s, Prediction Time: 0.006s label Controller-based, Reward 6: 206.000, Len(game): 206, Training Time: 0.061s, Prediction Time: 0.008s label Controller-based, Reward 7: 249.000, Len(game): 249, Training Time: 0.073s, Prediction Time: 0.010s label Controller-based, Reward 8: 281.000, Len(game): 281, Training Time: 0.086s, Prediction Time: 0.012s label Controller-based, Reward 9: 153.000, Len(game): 153, Training Time: 0.097s, Prediction Time: 0.013s no training label Controller-based, Reward 10: 1000.000, Len(game): 1000, Training Time: 0.097s, Prediction Time: 0.021s label Controller-based, Reward 11: 750.000, Len(game): 750, Training Time: 0.114s, Prediction Time: 0.028s no training label Controller-based, Reward 12: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.037s no training label Controller-based, Reward 13: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.045s no training label Controller-based, Reward 14: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.053s no training label Controller-based, Reward 15: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.061s no training label Controller-based, Reward 16: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.070s no training label Controller-based, Reward 17: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.078s no training label Controller-based, Reward 18: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.086s no training label Controller-based, Reward 19: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.095s no training label Controller-based, Reward 20: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.103s no training label Controller-based, Reward 21: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.113s no training label Controller-based, Reward 22: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.121s no training label Controller-based, Reward 23: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.128s no training label Controller-based, Reward 24: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.137s no training label Controller-based, Reward 25: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.146s no training label Controller-based, Reward 26: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.154s no training label Controller-based, Reward 27: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.161s no training label Controller-based, Reward 28: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.170s no training label Controller-based, Reward 29: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.179s no training label Controller-based, Reward 30: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.187s no training label Controller-based, Reward 31: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.195s no training label Controller-based, Reward 32: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.203s no training label Controller-based, Reward 33: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.210s no training label Controller-based, Reward 34: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.217s no training label Controller-based, Reward 35: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.225s no training label Controller-based, Reward 36: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.234s no training label Controller-based, Reward 37: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.242s no training label Controller-based, Reward 38: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.250s no training label Controller-based, Reward 39: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.259s no training label Controller-based, Reward 40: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.267s no training label Controller-based, Reward 41: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.275s no training label Controller-based, Reward 42: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.284s no training label Controller-based, Reward 43: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.292s no training label Controller-based, Reward 44: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.301s no training label Controller-based, Reward 45: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.308s no training label Controller-based, Reward 46: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.317s no training label Controller-based, Reward 47: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.326s no training label Controller-based, Reward 48: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.334s no training label Controller-based, Reward 49: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.342s no training label Controller-based, Reward 50: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.351s no training label Controller-based, Reward 51: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.358s no training label Controller-based, Reward 52: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.366s no training label Controller-based, Reward 53: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.374s no training label Controller-based, Reward 54: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.382s no training label Controller-based, Reward 55: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.391s no training label Controller-based, Reward 56: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.399s no training label Controller-based, Reward 57: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.407s no training label Controller-based, Reward 58: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.415s no training label Controller-based, Reward 59: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.423s no training label Controller-based, Reward 60: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.431s no training label Controller-based, Reward 61: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.439s no training label Controller-based, Reward 62: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.447s no training label Controller-based, Reward 63: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.455s no training label Controller-based, Reward 64: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.463s no training label Controller-based, Reward 65: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.471s no training label Controller-based, Reward 66: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.480s no training label Controller-based, Reward 67: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.488s no training label Controller-based, Reward 68: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.496s no training label Controller-based, Reward 69: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.505s no training label Controller-based, Reward 70: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.513s no training label Controller-based, Reward 71: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.522s no training label Controller-based, Reward 72: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.530s no training label Controller-based, Reward 73: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.538s no training label Controller-based, Reward 74: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.546s no training label Controller-based, Reward 75: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.555s no training label Controller-based, Reward 76: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.563s no training label Controller-based, Reward 77: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.572s no training label Controller-based, Reward 78: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.581s no training label Controller-based, Reward 79: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.589s no training label Controller-based, Reward 80: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.597s no training label Controller-based, Reward 81: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.605s no training label Controller-based, Reward 82: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.613s no training label Controller-based, Reward 83: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.621s no training label Controller-based, Reward 84: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.630s no training label Controller-based, Reward 85: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.638s no training label Controller-based, Reward 86: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.647s no training label Controller-based, Reward 87: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.655s no training label Controller-based, Reward 88: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.664s no training label Controller-based, Reward 89: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.671s no training label Controller-based, Reward 90: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.681s no training label Controller-based, Reward 91: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.689s no training label Controller-based, Reward 92: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.698s no training label Controller-based, Reward 93: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.707s no training label Controller-based, Reward 94: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.715s no training label Controller-based, Reward 95: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.722s no training label Controller-based, Reward 96: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.731s no training label Controller-based, Reward 97: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.740s no training label Controller-based, Reward 98: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.749s no training label Controller-based, Reward 99: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.756s label KACAgent, Reward 0: 14.000, Len(game): 14, Training Time: 0.004s, Prediction Time: 0.000s label KACAgent, Reward 1: 48.000, Len(game): 48, Training Time: 0.008s, Prediction Time: 0.009s label KACAgent, Reward 2: 15.000, Len(game): 15, Training Time: 0.015s, Prediction Time: 0.012s label KACAgent, Reward 3: 70.000, Len(game): 70, Training Time: 0.024s, Prediction Time: 0.026s label KACAgent, Reward 4: 60.000, Len(game): 60, Training Time: 0.038s, Prediction Time: 0.040s label KACAgent, Reward 5: 105.000, Len(game): 105, Training Time: 0.063s, Prediction Time: 0.064s label KACAgent, Reward 6: 136.000, Len(game): 136, Training Time: 0.106s, Prediction Time: 0.105s label KACAgent, Reward 7: 71.000, Len(game): 71, Training Time: 0.158s, Prediction Time: 0.139s label KACAgent, Reward 8: 97.000, Len(game): 97, Training Time: 0.226s, Prediction Time: 0.190s label KACAgent, Reward 9: 191.000, Len(game): 191, Training Time: 0.337s, Prediction Time: 0.283s label KACAgent, Reward 10: 78.000, Len(game): 78, Training Time: 0.465s, Prediction Time: 0.363s label KACAgent, Reward 11: 142.000, Len(game): 142, Training Time: 0.629s, Prediction Time: 0.475s label KACAgent, Reward 12: 149.000, Len(game): 149, Training Time: 0.833s, Prediction Time: 0.633s label KACAgent, Reward 13: 362.000, Len(game): 362, Training Time: 1.147s, Prediction Time: 0.915s label KACAgent, Reward 14: 209.000, Len(game): 209, Training Time: 1.565s, Prediction Time: 1.223s label KACAgent, Reward 15: 286.000, Len(game): 286, Training Time: 2.114s, Prediction Time: 1.648s label KACAgent, Reward 16: 297.000, Len(game): 297, Training Time: 2.809s, Prediction Time: 2.164s label KACAgent, Reward 17: 366.000, Len(game): 366, Training Time: 3.681s, Prediction Time: 2.861s label KACAgent, Reward 18: 273.000, Len(game): 273, Training Time: 3.681s, Prediction Time: 3.597s label KACAgent, Reward 19: 384.000, Len(game): 384, Training Time: 3.681s, Prediction Time: 3.916s label KACAgent, Reward 20: 268.000, Len(game): 268, Training Time: 3.681s, Prediction Time: 4.126s label KACAgent, Reward 21: 345.000, Len(game): 345, Training Time: 3.681s, Prediction Time: 4.396s label KACAgent, Reward 22: 311.000, Len(game): 311, Training Time: 3.681s, Prediction Time: 4.638s label KACAgent, Reward 23: 369.000, Len(game): 369, Training Time: 3.681s, Prediction Time: 4.926s label KACAgent, Reward 24: 357.000, Len(game): 357, Training Time: 3.681s, Prediction Time: 5.203s label KACAgent, Reward 25: 391.000, Len(game): 391, Training Time: 3.681s, Prediction Time: 5.510s label KACAgent, Reward 26: 340.000, Len(game): 340, Training Time: 3.681s, Prediction Time: 5.775s label KACAgent, Reward 27: 337.000, Len(game): 337, Training Time: 3.681s, Prediction Time: 6.037s label KACAgent, Reward 28: 354.000, Len(game): 354, Training Time: 3.681s, Prediction Time: 6.316s label KACAgent, Reward 29: 290.000, Len(game): 290, Training Time: 3.681s, Prediction Time: 6.541s label KACAgent, Reward 30: 112.000, Len(game): 112, Training Time: 3.681s, Prediction Time: 6.627s label KACAgent, Reward 31: 390.000, Len(game): 390, Training Time: 3.681s, Prediction Time: 6.930s label KACAgent, Reward 32: 287.000, Len(game): 287, Training Time: 3.681s, Prediction Time: 7.156s label KACAgent, Reward 33: 294.000, Len(game): 294, Training Time: 3.681s, Prediction Time: 7.383s label KACAgent, Reward 34: 287.000, Len(game): 287, Training Time: 3.681s, Prediction Time: 7.608s label KACAgent, Reward 35: 341.000, Len(game): 341, Training Time: 3.681s, Prediction Time: 7.880s label KACAgent, Reward 36: 356.000, Len(game): 356, Training Time: 3.681s, Prediction Time: 8.162s label KACAgent, Reward 37: 302.000, Len(game): 302, Training Time: 3.681s, Prediction Time: 8.394s label KACAgent, Reward 38: 434.000, Len(game): 434, Training Time: 3.681s, Prediction Time: 8.737s label KACAgent, Reward 39: 285.000, Len(game): 285, Training Time: 3.681s, Prediction Time: 8.961s label KACAgent, Reward 40: 356.000, Len(game): 356, Training Time: 3.681s, Prediction Time: 9.239s label KACAgent, Reward 41: 309.000, Len(game): 309, Training Time: 3.681s, Prediction Time: 9.480s label KACAgent, Reward 42: 384.000, Len(game): 384, Training Time: 3.681s, Prediction Time: 9.777s label KACAgent, Reward 43: 439.000, Len(game): 439, Training Time: 3.681s, Prediction Time: 10.119s label KACAgent, Reward 44: 414.000, Len(game): 414, Training Time: 3.681s, Prediction Time: 10.441s label KACAgent, Reward 45: 379.000, Len(game): 379, Training Time: 3.681s, Prediction Time: 10.737s label KACAgent, Reward 46: 283.000, Len(game): 283, Training Time: 3.681s, Prediction Time: 10.956s label KACAgent, Reward 47: 315.000, Len(game): 315, Training Time: 3.681s, Prediction Time: 11.203s label KACAgent, Reward 48: 266.000, Len(game): 266, Training Time: 3.681s, Prediction Time: 11.410s label KACAgent, Reward 49: 306.000, Len(game): 306, Training Time: 3.681s, Prediction Time: 11.649s label KACAgent, Reward 50: 378.000, Len(game): 378, Training Time: 3.681s, Prediction Time: 11.946s label KACAgent, Reward 51: 196.000, Len(game): 196, Training Time: 3.681s, Prediction Time: 12.098s label KACAgent, Reward 52: 508.000, Len(game): 508, Training Time: 3.681s, Prediction Time: 12.494s label KACAgent, Reward 53: 312.000, Len(game): 312, Training Time: 3.681s, Prediction Time: 12.735s label KACAgent, Reward 54: 393.000, Len(game): 393, Training Time: 3.681s, Prediction Time: 13.039s label KACAgent, Reward 55: 276.000, Len(game): 276, Training Time: 3.681s, Prediction Time: 13.255s label KACAgent, Reward 56: 405.000, Len(game): 405, Training Time: 3.681s, Prediction Time: 13.572s label KACAgent, Reward 57: 461.000, Len(game): 461, Training Time: 3.681s, Prediction Time: 13.929s label KACAgent, Reward 58: 294.000, Len(game): 294, Training Time: 3.681s, Prediction Time: 14.161s label KACAgent, Reward 59: 331.000, Len(game): 331, Training Time: 3.681s, Prediction Time: 14.420s label KACAgent, Reward 60: 314.000, Len(game): 314, Training Time: 3.681s, Prediction Time: 14.664s label KACAgent, Reward 61: 281.000, Len(game): 281, Training Time: 3.681s, Prediction Time: 14.883s label KACAgent, Reward 62: 300.000, Len(game): 300, Training Time: 3.681s, Prediction Time: 15.116s label KACAgent, Reward 63: 282.000, Len(game): 282, Training Time: 3.681s, Prediction Time: 15.335s label KACAgent, Reward 64: 304.000, Len(game): 304, Training Time: 3.681s, Prediction Time: 15.571s label KACAgent, Reward 65: 296.000, Len(game): 296, Training Time: 3.681s, Prediction Time: 15.801s label KACAgent, Reward 66: 346.000, Len(game): 346, Training Time: 3.681s, Prediction Time: 16.071s label KACAgent, Reward 67: 318.000, Len(game): 318, Training Time: 3.681s, Prediction Time: 16.318s label KACAgent, Reward 68: 319.000, Len(game): 319, Training Time: 3.681s, Prediction Time: 16.565s label KACAgent, Reward 69: 312.000, Len(game): 312, Training Time: 3.681s, Prediction Time: 16.810s label KACAgent, Reward 70: 186.000, Len(game): 186, Training Time: 3.681s, Prediction Time: 16.955s label KACAgent, Reward 71: 362.000, Len(game): 362, Training Time: 3.681s, Prediction Time: 17.237s label KACAgent, Reward 72: 433.000, Len(game): 433, Training Time: 3.681s, Prediction Time: 17.573s label KACAgent, Reward 73: 285.000, Len(game): 285, Training Time: 3.681s, Prediction Time: 17.795s label KACAgent, Reward 74: 332.000, Len(game): 332, Training Time: 3.681s, Prediction Time: 18.053s label KACAgent, Reward 75: 346.000, Len(game): 346, Training Time: 3.681s, Prediction Time: 18.322s label KACAgent, Reward 76: 364.000, Len(game): 364, Training Time: 3.681s, Prediction Time: 18.609s label KACAgent, Reward 77: 652.000, Len(game): 652, Training Time: 3.681s, Prediction Time: 19.121s label KACAgent, Reward 78: 376.000, Len(game): 376, Training Time: 3.681s, Prediction Time: 19.421s label KACAgent, Reward 79: 620.000, Len(game): 620, Training Time: 3.681s, Prediction Time: 19.907s label KACAgent, Reward 80: 338.000, Len(game): 338, Training Time: 3.681s, Prediction Time: 20.175s label KACAgent, Reward 81: 319.000, Len(game): 319, Training Time: 3.681s, Prediction Time: 20.432s label KACAgent, Reward 82: 332.000, Len(game): 332, Training Time: 3.681s, Prediction Time: 20.694s label KACAgent, Reward 83: 302.000, Len(game): 302, Training Time: 3.681s, Prediction Time: 20.934s label KACAgent, Reward 84: 427.000, Len(game): 427, Training Time: 3.681s, Prediction Time: 21.268s label KACAgent, Reward 85: 298.000, Len(game): 298, Training Time: 3.681s, Prediction Time: 21.501s label KACAgent, Reward 86: 308.000, Len(game): 308, Training Time: 3.681s, Prediction Time: 21.749s label KACAgent, Reward 87: 263.000, Len(game): 263, Training Time: 3.681s, Prediction Time: 21.957s label KACAgent, Reward 88: 316.000, Len(game): 316, Training Time: 3.681s, Prediction Time: 22.208s label KACAgent, Reward 89: 373.000, Len(game): 373, Training Time: 3.681s, Prediction Time: 22.514s label KACAgent, Reward 90: 322.000, Len(game): 322, Training Time: 3.681s, Prediction Time: 22.769s label KACAgent, Reward 91: 337.000, Len(game): 337, Training Time: 3.681s, Prediction Time: 23.036s label KACAgent, Reward 92: 328.000, Len(game): 328, Training Time: 3.681s, Prediction Time: 23.293s label KACAgent, Reward 93: 472.000, Len(game): 472, Training Time: 3.681s, Prediction Time: 23.669s label KACAgent, Reward 94: 372.000, Len(game): 372, Training Time: 3.681s, Prediction Time: 23.969s label KACAgent, Reward 95: 293.000, Len(game): 293, Training Time: 3.681s, Prediction Time: 24.202s label KACAgent, Reward 96: 386.000, Len(game): 386, Training Time: 3.681s, Prediction Time: 24.509s label KACAgent, Reward 97: 323.000, Len(game): 323, Training Time: 3.681s, Prediction Time: 24.765s label KACAgent, Reward 98: 364.000, Len(game): 364, Training Time: 3.681s, Prediction Time: 25.065s label KACAgent, Reward 99: 334.000, Len(game): 334, Training Time: 3.681s, Prediction Time: 25.328s label DQNAgent, Reward 0: 31.000, Len(game): 31, Training Time: 0.001s, Prediction Time: 0.000s label DQNAgent, Reward 1: 37.000, Len(game): 37, Training Time: 0.007s, Prediction Time: 0.000s label DQNAgent, Reward 2: 29.000, Len(game): 29, Training Time: 0.043s, Prediction Time: 0.000s label DQNAgent, Reward 3: 17.000, Len(game): 17, Training Time: 0.061s, Prediction Time: 0.000s label DQNAgent, Reward 4: 16.000, Len(game): 16, Training Time: 0.078s, Prediction Time: 0.001s label DQNAgent, Reward 5: 11.000, Len(game): 11, Training Time: 0.091s, Prediction Time: 0.001s label DQNAgent, Reward 6: 23.000, Len(game): 23, Training Time: 0.118s, Prediction Time: 0.001s label DQNAgent, Reward 7: 12.000, Len(game): 12, Training Time: 0.132s, Prediction Time: 0.001s label DQNAgent, Reward 8: 19.000, Len(game): 19, Training Time: 0.155s, Prediction Time: 0.002s label DQNAgent, Reward 9: 13.000, Len(game): 13, Training Time: 0.172s, Prediction Time: 0.003s label DQNAgent, Reward 10: 14.000, Len(game): 14, Training Time: 0.191s, Prediction Time: 0.003s label DQNAgent, Reward 11: 21.000, Len(game): 21, Training Time: 0.213s, Prediction Time: 0.004s label DQNAgent, Reward 12: 9.000, Len(game): 9, Training Time: 0.223s, Prediction Time: 0.005s label DQNAgent, Reward 13: 11.000, Len(game): 11, Training Time: 0.235s, Prediction Time: 0.006s label DQNAgent, Reward 14: 18.000, Len(game): 18, Training Time: 0.256s, Prediction Time: 0.006s label DQNAgent, Reward 15: 9.000, Len(game): 9, Training Time: 0.266s, Prediction Time: 0.007s label DQNAgent, Reward 16: 14.000, Len(game): 14, Training Time: 0.281s, Prediction Time: 0.007s label DQNAgent, Reward 17: 13.000, Len(game): 13, Training Time: 0.295s, Prediction Time: 0.008s label DQNAgent, Reward 18: 13.000, Len(game): 13, Training Time: 0.310s, Prediction Time: 0.008s label DQNAgent, Reward 19: 9.000, Len(game): 9, Training Time: 0.320s, Prediction Time: 0.008s label DQNAgent, Reward 20: 10.000, Len(game): 10, Training Time: 0.331s, Prediction Time: 0.009s label DQNAgent, Reward 21: 26.000, Len(game): 26, Training Time: 0.359s, Prediction Time: 0.011s label DQNAgent, Reward 22: 16.000, Len(game): 16, Training Time: 0.378s, Prediction Time: 0.011s label DQNAgent, Reward 23: 28.000, Len(game): 28, Training Time: 0.409s, Prediction Time: 0.011s label DQNAgent, Reward 24: 11.000, Len(game): 11, Training Time: 0.422s, Prediction Time: 0.012s label DQNAgent, Reward 25: 18.000, Len(game): 18, Training Time: 0.442s, Prediction Time: 0.013s label DQNAgent, Reward 26: 27.000, Len(game): 27, Training Time: 0.475s, Prediction Time: 0.013s label DQNAgent, Reward 27: 16.000, Len(game): 16, Training Time: 0.492s, Prediction Time: 0.013s label DQNAgent, Reward 28: 11.000, Len(game): 11, Training Time: 0.505s, Prediction Time: 0.013s label DQNAgent, Reward 29: 11.000, Len(game): 11, Training Time: 0.516s, Prediction Time: 0.014s label DQNAgent, Reward 30: 19.000, Len(game): 19, Training Time: 0.540s, Prediction Time: 0.015s label DQNAgent, Reward 31: 16.000, Len(game): 16, Training Time: 0.562s, Prediction Time: 0.016s label DQNAgent, Reward 32: 18.000, Len(game): 18, Training Time: 0.582s, Prediction Time: 0.017s label DQNAgent, Reward 33: 12.000, Len(game): 12, Training Time: 0.594s, Prediction Time: 0.017s label DQNAgent, Reward 34: 15.000, Len(game): 15, Training Time: 0.610s, Prediction Time: 0.018s label DQNAgent, Reward 35: 33.000, Len(game): 33, Training Time: 0.646s, Prediction Time: 0.019s label DQNAgent, Reward 36: 34.000, Len(game): 34, Training Time: 0.685s, Prediction Time: 0.020s label DQNAgent, Reward 37: 25.000, Len(game): 25, Training Time: 0.712s, Prediction Time: 0.020s label DQNAgent, Reward 38: 11.000, Len(game): 11, Training Time: 0.724s, Prediction Time: 0.022s label DQNAgent, Reward 39: 20.000, Len(game): 20, Training Time: 0.746s, Prediction Time: 0.023s label DQNAgent, Reward 40: 13.000, Len(game): 13, Training Time: 0.760s, Prediction Time: 0.023s label DQNAgent, Reward 41: 38.000, Len(game): 38, Training Time: 0.802s, Prediction Time: 0.024s label DQNAgent, Reward 42: 20.000, Len(game): 20, Training Time: 0.825s, Prediction Time: 0.025s label DQNAgent, Reward 43: 14.000, Len(game): 14, Training Time: 0.840s, Prediction Time: 0.025s label DQNAgent, Reward 44: 28.000, Len(game): 28, Training Time: 0.871s, Prediction Time: 0.026s label DQNAgent, Reward 45: 24.000, Len(game): 24, Training Time: 0.899s, Prediction Time: 0.027s label DQNAgent, Reward 46: 33.000, Len(game): 33, Training Time: 0.936s, Prediction Time: 0.028s label DQNAgent, Reward 47: 19.000, Len(game): 19, Training Time: 0.957s, Prediction Time: 0.029s label DQNAgent, Reward 48: 23.000, Len(game): 23, Training Time: 0.981s, Prediction Time: 0.029s label DQNAgent, Reward 49: 36.000, Len(game): 36, Training Time: 1.020s, Prediction Time: 0.030s label DQNAgent, Reward 50: 111.000, Len(game): 111, Training Time: 1.144s, Prediction Time: 0.035s label DQNAgent, Reward 51: 176.000, Len(game): 176, Training Time: 1.343s, Prediction Time: 0.041s label DQNAgent, Reward 52: 202.000, Len(game): 202, Training Time: 1.569s, Prediction Time: 0.048s label DQNAgent, Reward 53: 151.000, Len(game): 151, Training Time: 1.736s, Prediction Time: 0.053s label DQNAgent, Reward 54: 163.000, Len(game): 163, Training Time: 1.936s, Prediction Time: 0.060s label DQNAgent, Reward 55: 178.000, Len(game): 178, Training Time: 2.147s, Prediction Time: 0.067s label DQNAgent, Reward 56: 443.000, Len(game): 443, Training Time: 2.657s, Prediction Time: 0.084s label DQNAgent, Reward 57: 234.000, Len(game): 234, Training Time: 2.931s, Prediction Time: 0.094s label DQNAgent, Reward 58: 179.000, Len(game): 179, Training Time: 3.131s, Prediction Time: 0.102s label DQNAgent, Reward 59: 202.000, Len(game): 202, Training Time: 3.131s, Prediction Time: 0.110s label DQNAgent, Reward 60: 325.000, Len(game): 325, Training Time: 3.131s, Prediction Time: 0.123s label DQNAgent, Reward 61: 221.000, Len(game): 221, Training Time: 3.131s, Prediction Time: 0.132s label DQNAgent, Reward 62: 212.000, Len(game): 212, Training Time: 3.131s, Prediction Time: 0.140s label DQNAgent, Reward 63: 186.000, Len(game): 186, Training Time: 3.131s, Prediction Time: 0.147s label DQNAgent, Reward 64: 285.000, Len(game): 285, Training Time: 3.131s, Prediction Time: 0.160s label DQNAgent, Reward 65: 342.000, Len(game): 342, Training Time: 3.131s, Prediction Time: 0.173s label DQNAgent, Reward 66: 205.000, Len(game): 205, Training Time: 3.131s, Prediction Time: 0.182s label DQNAgent, Reward 67: 150.000, Len(game): 150, Training Time: 3.131s, Prediction Time: 0.188s label DQNAgent, Reward 68: 215.000, Len(game): 215, Training Time: 3.131s, Prediction Time: 0.198s label DQNAgent, Reward 69: 163.000, Len(game): 163, Training Time: 3.131s, Prediction Time: 0.204s label DQNAgent, Reward 70: 252.000, Len(game): 252, Training Time: 3.131s, Prediction Time: 0.214s label DQNAgent, Reward 71: 316.000, Len(game): 316, Training Time: 3.131s, Prediction Time: 0.228s label DQNAgent, Reward 72: 218.000, Len(game): 218, Training Time: 3.131s, Prediction Time: 0.237s label DQNAgent, Reward 73: 225.000, Len(game): 225, Training Time: 3.131s, Prediction Time: 0.246s label DQNAgent, Reward 74: 305.000, Len(game): 305, Training Time: 3.131s, Prediction Time: 0.258s label DQNAgent, Reward 75: 254.000, Len(game): 254, Training Time: 3.131s, Prediction Time: 0.269s label DQNAgent, Reward 76: 185.000, Len(game): 185, Training Time: 3.131s, Prediction Time: 0.277s label DQNAgent, Reward 77: 262.000, Len(game): 262, Training Time: 3.131s, Prediction Time: 0.287s label DQNAgent, Reward 78: 212.000, Len(game): 212, Training Time: 3.131s, Prediction Time: 0.296s label DQNAgent, Reward 79: 334.000, Len(game): 334, Training Time: 3.131s, Prediction Time: 0.310s label DQNAgent, Reward 80: 236.000, Len(game): 236, Training Time: 3.131s, Prediction Time: 0.321s label DQNAgent, Reward 81: 185.000, Len(game): 185, Training Time: 3.131s, Prediction Time: 0.329s label DQNAgent, Reward 82: 166.000, Len(game): 166, Training Time: 3.131s, Prediction Time: 0.335s label DQNAgent, Reward 83: 325.000, Len(game): 325, Training Time: 3.131s, Prediction Time: 0.348s label DQNAgent, Reward 84: 140.000, Len(game): 140, Training Time: 3.131s, Prediction Time: 0.353s label DQNAgent, Reward 85: 346.000, Len(game): 346, Training Time: 3.131s, Prediction Time: 0.367s label DQNAgent, Reward 86: 246.000, Len(game): 246, Training Time: 3.131s, Prediction Time: 0.377s label DQNAgent, Reward 87: 202.000, Len(game): 202, Training Time: 3.131s, Prediction Time: 0.385s label DQNAgent, Reward 88: 262.000, Len(game): 262, Training Time: 3.131s, Prediction Time: 0.395s label DQNAgent, Reward 89: 223.000, Len(game): 223, Training Time: 3.131s, Prediction Time: 0.404s label DQNAgent, Reward 90: 188.000, Len(game): 188, Training Time: 3.131s, Prediction Time: 0.412s label DQNAgent, Reward 91: 173.000, Len(game): 173, Training Time: 3.131s, Prediction Time: 0.419s label DQNAgent, Reward 92: 167.000, Len(game): 167, Training Time: 3.131s, Prediction Time: 0.426s label DQNAgent, Reward 93: 242.000, Len(game): 242, Training Time: 3.131s, Prediction Time: 0.435s label DQNAgent, Reward 94: 310.000, Len(game): 310, Training Time: 3.131s, Prediction Time: 0.448s label DQNAgent, Reward 95: 320.000, Len(game): 320, Training Time: 3.131s, Prediction Time: 0.461s label DQNAgent, Reward 96: 145.000, Len(game): 145, Training Time: 3.131s, Prediction Time: 0.467s label DQNAgent, Reward 97: 156.000, Len(game): 156, Training Time: 3.131s, Prediction Time: 0.472s label DQNAgent, Reward 98: 178.000, Len(game): 178, Training Time: 3.131s, Prediction Time: 0.480s label DQNAgent, Reward 99: 217.000, Len(game): 217, Training Time: 3.131s, Prediction Time: 0.488s Computed global error Bellman mean: 6.83823226615183e-08 iter: 3 label KQLearningHJBCP, Reward 0: 35.000, Len(game): 35, Training Time: 0.012s, Prediction Time: 0.000s Computed global error Bellman mean: 6.153183776724802e-08 iter: 4 label KQLearningHJBCP, Reward 1: 40.000, Len(game): 40, Training Time: 0.036s, Prediction Time: 0.007s Computed global error Bellman mean: 6.570681726583113e-08 iter: 6 label KQLearningHJBCP, Reward 2: 47.000, Len(game): 47, Training Time: 0.099s, Prediction Time: 0.015s no training label KQLearningHJBCP, Reward 3: 1000.000, Len(game): 1000, Training Time: 0.099s, Prediction Time: 0.211s Computed global error Bellman mean: 1.207237322271833e-07 iter: 9 label KQLearningHJBCP, Reward 4: 529.000, Len(game): 529, Training Time: 2.215s, Prediction Time: 0.318s no training label KQLearningHJBCP, Reward 5: 1000.000, Len(game): 1000, Training Time: 2.215s, Prediction Time: 0.748s Computed global error Bellman mean: 1.9297541858819368e-07 iter: 8 label KQLearningHJBCP, Reward 6: 779.000, Len(game): 779, Training Time: 10.511s, Prediction Time: 1.100s label KQLearningHJBCP, Reward 7: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 1.790s label KQLearningHJBCP, Reward 8: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 2.465s label KQLearningHJBCP, Reward 9: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 3.151s label KQLearningHJBCP, Reward 10: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 3.831s label KQLearningHJBCP, Reward 11: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 4.517s label KQLearningHJBCP, Reward 12: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 5.202s label KQLearningHJBCP, Reward 13: 163.000, Len(game): 163, Training Time: 10.511s, Prediction Time: 5.314s label KQLearningHJBCP, Reward 14: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 5.998s label KQLearningHJBCP, Reward 15: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 6.679s label KQLearningHJBCP, Reward 16: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 7.358s label KQLearningHJBCP, Reward 17: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 8.046s label KQLearningHJBCP, Reward 18: 173.000, Len(game): 173, Training Time: 10.511s, Prediction Time: 8.163s label KQLearningHJBCP, Reward 19: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 8.845s label KQLearningHJBCP, Reward 20: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 9.524s label KQLearningHJBCP, Reward 21: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 10.206s label KQLearningHJBCP, Reward 22: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 10.891s label KQLearningHJBCP, Reward 23: 382.000, Len(game): 382, Training Time: 10.511s, Prediction Time: 11.151s label KQLearningHJBCP, Reward 24: 459.000, Len(game): 459, Training Time: 10.511s, Prediction Time: 11.466s label KQLearningHJBCP, Reward 25: 264.000, Len(game): 264, Training Time: 10.511s, Prediction Time: 11.646s label KQLearningHJBCP, Reward 26: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 12.332s label KQLearningHJBCP, Reward 27: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 13.027s label KQLearningHJBCP, Reward 28: 515.000, Len(game): 515, Training Time: 10.511s, Prediction Time: 13.374s label KQLearningHJBCP, Reward 29: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 14.055s label KQLearningHJBCP, Reward 30: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 14.744s label KQLearningHJBCP, Reward 31: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 15.427s label KQLearningHJBCP, Reward 32: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 16.106s label KQLearningHJBCP, Reward 33: 240.000, Len(game): 240, Training Time: 10.511s, Prediction Time: 16.271s label KQLearningHJBCP, Reward 34: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 16.953s label KQLearningHJBCP, Reward 35: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 17.631s label KQLearningHJBCP, Reward 36: 273.000, Len(game): 273, Training Time: 10.511s, Prediction Time: 17.814s label KQLearningHJBCP, Reward 37: 688.000, Len(game): 688, Training Time: 10.511s, Prediction Time: 18.282s label KQLearningHJBCP, Reward 38: 861.000, Len(game): 861, Training Time: 10.511s, Prediction Time: 18.862s label KQLearningHJBCP, Reward 39: 242.000, Len(game): 242, Training Time: 10.511s, Prediction Time: 19.025s label KQLearningHJBCP, Reward 40: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 19.712s label KQLearningHJBCP, Reward 41: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 20.401s label KQLearningHJBCP, Reward 42: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 21.084s label KQLearningHJBCP, Reward 43: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 21.775s label KQLearningHJBCP, Reward 44: 985.000, Len(game): 985, Training Time: 10.511s, Prediction Time: 22.457s label KQLearningHJBCP, Reward 45: 530.000, Len(game): 530, Training Time: 10.511s, Prediction Time: 22.820s label KQLearningHJBCP, Reward 46: 228.000, Len(game): 228, Training Time: 10.511s, Prediction Time: 22.977s label KQLearningHJBCP, Reward 47: 405.000, Len(game): 405, Training Time: 10.511s, Prediction Time: 23.254s label KQLearningHJBCP, Reward 48: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 23.943s label KQLearningHJBCP, Reward 49: 366.000, Len(game): 366, Training Time: 10.511s, Prediction Time: 24.198s label KQLearningHJBCP, Reward 50: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 24.887s label KQLearningHJBCP, Reward 51: 208.000, Len(game): 208, Training Time: 10.511s, Prediction Time: 25.035s label KQLearningHJBCP, Reward 52: 497.000, Len(game): 497, Training Time: 10.511s, Prediction Time: 25.380s label KQLearningHJBCP, Reward 53: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 26.065s label KQLearningHJBCP, Reward 54: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 26.751s label KQLearningHJBCP, Reward 55: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 27.442s label KQLearningHJBCP, Reward 56: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 28.128s label KQLearningHJBCP, Reward 57: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 28.812s label KQLearningHJBCP, Reward 58: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 29.494s label KQLearningHJBCP, Reward 59: 288.000, Len(game): 288, Training Time: 10.511s, Prediction Time: 29.694s label KQLearningHJBCP, Reward 60: 244.000, Len(game): 244, Training Time: 10.511s, Prediction Time: 29.859s label KQLearningHJBCP, Reward 61: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 30.550s label KQLearningHJBCP, Reward 62: 224.000, Len(game): 224, Training Time: 10.511s, Prediction Time: 30.705s label KQLearningHJBCP, Reward 63: 259.000, Len(game): 259, Training Time: 10.511s, Prediction Time: 30.885s label KQLearningHJBCP, Reward 64: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 31.574s label KQLearningHJBCP, Reward 65: 285.000, Len(game): 285, Training Time: 10.511s, Prediction Time: 31.774s label KQLearningHJBCP, Reward 66: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 32.483s label KQLearningHJBCP, Reward 67: 268.000, Len(game): 268, Training Time: 10.511s, Prediction Time: 32.672s label KQLearningHJBCP, Reward 68: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 33.366s label KQLearningHJBCP, Reward 69: 237.000, Len(game): 237, Training Time: 10.511s, Prediction Time: 33.533s label KQLearningHJBCP, Reward 70: 252.000, Len(game): 252, Training Time: 10.511s, Prediction Time: 33.706s label KQLearningHJBCP, Reward 71: 751.000, Len(game): 751, Training Time: 10.511s, Prediction Time: 34.228s label KQLearningHJBCP, Reward 72: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 34.919s label KQLearningHJBCP, Reward 73: 170.000, Len(game): 170, Training Time: 10.511s, Prediction Time: 35.037s label KQLearningHJBCP, Reward 74: 336.000, Len(game): 336, Training Time: 10.511s, Prediction Time: 35.267s label KQLearningHJBCP, Reward 75: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 35.953s label KQLearningHJBCP, Reward 76: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 36.639s label KQLearningHJBCP, Reward 77: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 37.328s label KQLearningHJBCP, Reward 78: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 38.012s label KQLearningHJBCP, Reward 79: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 38.710s label KQLearningHJBCP, Reward 80: 204.000, Len(game): 204, Training Time: 10.511s, Prediction Time: 38.853s label KQLearningHJBCP, Reward 81: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 39.536s label KQLearningHJBCP, Reward 82: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 40.223s label KQLearningHJBCP, Reward 83: 757.000, Len(game): 757, Training Time: 10.511s, Prediction Time: 40.738s label KQLearningHJBCP, Reward 84: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 41.428s label KQLearningHJBCP, Reward 85: 204.000, Len(game): 204, Training Time: 10.511s, Prediction Time: 41.567s label KQLearningHJBCP, Reward 86: 281.000, Len(game): 281, Training Time: 10.511s, Prediction Time: 41.761s label KQLearningHJBCP, Reward 87: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 42.444s label KQLearningHJBCP, Reward 88: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 43.123s label KQLearningHJBCP, Reward 89: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 43.802s label KQLearningHJBCP, Reward 90: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 44.488s label KQLearningHJBCP, Reward 91: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 45.172s label KQLearningHJBCP, Reward 92: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 45.858s label KQLearningHJBCP, Reward 93: 177.000, Len(game): 177, Training Time: 10.511s, Prediction Time: 45.978s label KQLearningHJBCP, Reward 94: 563.000, Len(game): 563, Training Time: 10.511s, Prediction Time: 46.358s label KQLearningHJBCP, Reward 95: 382.000, Len(game): 382, Training Time: 10.511s, Prediction Time: 46.617s label KQLearningHJBCP, Reward 96: 704.000, Len(game): 704, Training Time: 10.511s, Prediction Time: 47.097s label KQLearningHJBCP, Reward 97: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 47.782s label KQLearningHJBCP, Reward 98: 539.000, Len(game): 539, Training Time: 10.511s, Prediction Time: 48.150s label KQLearningHJBCP, Reward 99: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 48.828s Computed global error Bellman mean: 3.9266451670430215e-08 iter: 2 label KQLearning, Reward 0: 18.000, Len(game): 18, Training Time: 0.005s, Prediction Time: 0.000s Computed global error Bellman mean: 0.3964756234060676 iter: 5 label KQLearning, Reward 1: 51.000, Len(game): 51, Training Time: 0.022s, Prediction Time: 0.008s Computed global error Bellman mean: 0.4553239146341465 iter: 5 label KQLearning, Reward 2: 35.000, Len(game): 35, Training Time: 0.052s, Prediction Time: 0.014s Computed global error Bellman mean: 3.059006677079503e-07 iter: 4 label KQLearning, Reward 3: 53.000, Len(game): 53, Training Time: 0.089s, Prediction Time: 0.024s Computed global error Bellman mean: 0.02300332025657364 iter: 5 label KQLearning, Reward 4: 63.000, Len(game): 63, Training Time: 0.171s, Prediction Time: 0.036s Computed global error Bellman mean: 0.12679985761031448 iter: 5 label KQLearning, Reward 5: 45.000, Len(game): 45, Training Time: 0.319s, Prediction Time: 0.048s Computed global error Bellman mean: 0.052994993189221073 iter: 5 label KQLearning, Reward 6: 31.000, Len(game): 31, Training Time: 0.488s, Prediction Time: 0.058s Computed global error Bellman mean: 0.02126114540487673 iter: 5 label KQLearning, Reward 7: 50.000, Len(game): 50, Training Time: 0.717s, Prediction Time: 0.072s Computed global error Bellman mean: 0.0009045258385653113 iter: 5 label KQLearning, Reward 8: 59.000, Len(game): 59, Training Time: 1.065s, Prediction Time: 0.092s Computed global error Bellman mean: 0.028279770743522326 iter: 5 label KQLearning, Reward 9: 84.000, Len(game): 84, Training Time: 1.535s, Prediction Time: 0.123s Computed global error Bellman mean: 0.0038382381461582412 iter: 5 label KQLearning, Reward 10: 51.000, Len(game): 51, Training Time: 2.153s, Prediction Time: 0.143s Computed global error Bellman mean: 0.015333520935118947 iter: 5 label KQLearning, Reward 11: 88.000, Len(game): 88, Training Time: 2.922s, Prediction Time: 0.177s Computed global error Bellman mean: 0.0067286687677481725 iter: 5 label KQLearning, Reward 12: 74.000, Len(game): 74, Training Time: 3.841s, Prediction Time: 0.205s label KQLearning, Reward 13: 91.000, Len(game): 91, Training Time: 3.841s, Prediction Time: 0.244s label KQLearning, Reward 14: 84.000, Len(game): 84, Training Time: 3.841s, Prediction Time: 0.279s label KQLearning, Reward 15: 153.000, Len(game): 153, Training Time: 3.841s, Prediction Time: 0.351s label KQLearning, Reward 16: 67.000, Len(game): 67, Training Time: 3.841s, Prediction Time: 0.382s label KQLearning, Reward 17: 99.000, Len(game): 99, Training Time: 3.841s, Prediction Time: 0.427s label KQLearning, Reward 18: 105.000, Len(game): 105, Training Time: 3.841s, Prediction Time: 0.477s label KQLearning, Reward 19: 67.000, Len(game): 67, Training Time: 3.841s, Prediction Time: 0.507s label KQLearning, Reward 20: 81.000, Len(game): 81, Training Time: 3.841s, Prediction Time: 0.544s label KQLearning, Reward 21: 70.000, Len(game): 70, Training Time: 3.841s, Prediction Time: 0.576s label KQLearning, Reward 22: 87.000, Len(game): 87, Training Time: 3.841s, Prediction Time: 0.615s label KQLearning, Reward 23: 136.000, Len(game): 136, Training Time: 3.841s, Prediction Time: 0.678s label KQLearning, Reward 24: 106.000, Len(game): 106, Training Time: 3.841s, Prediction Time: 0.727s label KQLearning, Reward 25: 111.000, Len(game): 111, Training Time: 3.841s, Prediction Time: 0.777s label KQLearning, Reward 26: 73.000, Len(game): 73, Training Time: 3.841s, Prediction Time: 0.811s label KQLearning, Reward 27: 352.000, Len(game): 352, Training Time: 3.841s, Prediction Time: 0.974s label KQLearning, Reward 28: 83.000, Len(game): 83, Training Time: 3.841s, Prediction Time: 1.014s label KQLearning, Reward 29: 75.000, Len(game): 75, Training Time: 3.841s, Prediction Time: 1.051s label KQLearning, Reward 30: 65.000, Len(game): 65, Training Time: 3.841s, Prediction Time: 1.082s label KQLearning, Reward 31: 1000.000, Len(game): 1000, Training Time: 3.841s, Prediction Time: 1.543s label KQLearning, Reward 32: 84.000, Len(game): 84, Training Time: 3.841s, Prediction Time: 1.581s label KQLearning, Reward 33: 103.000, Len(game): 103, Training Time: 3.841s, Prediction Time: 1.628s label KQLearning, Reward 34: 72.000, Len(game): 72, Training Time: 3.841s, Prediction Time: 1.660s label KQLearning, Reward 35: 79.000, Len(game): 79, Training Time: 3.841s, Prediction Time: 1.695s label KQLearning, Reward 36: 88.000, Len(game): 88, Training Time: 3.841s, Prediction Time: 1.734s label KQLearning, Reward 37: 84.000, Len(game): 84, Training Time: 3.841s, Prediction Time: 1.772s label KQLearning, Reward 38: 60.000, Len(game): 60, Training Time: 3.841s, Prediction Time: 1.798s label KQLearning, Reward 39: 64.000, Len(game): 64, Training Time: 3.841s, Prediction Time: 1.827s label KQLearning, Reward 40: 74.000, Len(game): 74, Training Time: 3.841s, Prediction Time: 1.861s label KQLearning, Reward 41: 74.000, Len(game): 74, Training Time: 3.841s, Prediction Time: 1.894s label KQLearning, Reward 42: 72.000, Len(game): 72, Training Time: 3.841s, Prediction Time: 1.927s label KQLearning, Reward 43: 83.000, Len(game): 83, Training Time: 3.841s, Prediction Time: 1.965s label KQLearning, Reward 44: 91.000, Len(game): 91, Training Time: 3.841s, Prediction Time: 2.005s label KQLearning, Reward 45: 75.000, Len(game): 75, Training Time: 3.841s, Prediction Time: 2.039s label KQLearning, Reward 46: 83.000, Len(game): 83, Training Time: 3.841s, Prediction Time: 2.076s label KQLearning, Reward 47: 101.000, Len(game): 101, Training Time: 3.841s, Prediction Time: 2.122s label KQLearning, Reward 48: 75.000, Len(game): 75, Training Time: 3.841s, Prediction Time: 2.156s label KQLearning, Reward 49: 85.000, Len(game): 85, Training Time: 3.841s, Prediction Time: 2.196s label KQLearning, Reward 50: 85.000, Len(game): 85, Training Time: 3.841s, Prediction Time: 2.235s label KQLearning, Reward 51: 67.000, Len(game): 67, Training Time: 3.841s, Prediction Time: 2.265s label KQLearning, Reward 52: 187.000, Len(game): 187, Training Time: 3.841s, Prediction Time: 2.354s label KQLearning, Reward 53: 118.000, Len(game): 118, Training Time: 3.841s, Prediction Time: 2.408s label KQLearning, Reward 54: 86.000, Len(game): 86, Training Time: 3.841s, Prediction Time: 2.451s label KQLearning, Reward 55: 64.000, Len(game): 64, Training Time: 3.841s, Prediction Time: 2.480s label KQLearning, Reward 56: 93.000, Len(game): 93, Training Time: 3.841s, Prediction Time: 2.523s label KQLearning, Reward 57: 220.000, Len(game): 220, Training Time: 3.841s, Prediction Time: 2.625s label KQLearning, Reward 58: 78.000, Len(game): 78, Training Time: 3.841s, Prediction Time: 2.660s label KQLearning, Reward 59: 81.000, Len(game): 81, Training Time: 3.841s, Prediction Time: 2.698s label KQLearning, Reward 60: 113.000, Len(game): 113, Training Time: 3.841s, Prediction Time: 2.750s label KQLearning, Reward 61: 96.000, Len(game): 96, Training Time: 3.841s, Prediction Time: 2.796s label KQLearning, Reward 62: 85.000, Len(game): 85, Training Time: 3.841s, Prediction Time: 2.837s label KQLearning, Reward 63: 73.000, Len(game): 73, Training Time: 3.841s, Prediction Time: 2.871s label KQLearning, Reward 64: 73.000, Len(game): 73, Training Time: 3.841s, Prediction Time: 2.904s label KQLearning, Reward 65: 62.000, Len(game): 62, Training Time: 3.841s, Prediction Time: 2.932s label KQLearning, Reward 66: 68.000, Len(game): 68, Training Time: 3.841s, Prediction Time: 2.963s label KQLearning, Reward 67: 91.000, Len(game): 91, Training Time: 3.841s, Prediction Time: 3.006s label KQLearning, Reward 68: 105.000, Len(game): 105, Training Time: 3.841s, Prediction Time: 3.055s label KQLearning, Reward 69: 77.000, Len(game): 77, Training Time: 3.841s, Prediction Time: 3.090s label KQLearning, Reward 70: 67.000, Len(game): 67, Training Time: 3.841s, Prediction Time: 3.121s label KQLearning, Reward 71: 65.000, Len(game): 65, Training Time: 3.841s, Prediction Time: 3.151s label KQLearning, Reward 72: 151.000, Len(game): 151, Training Time: 3.841s, Prediction Time: 3.220s label KQLearning, Reward 73: 106.000, Len(game): 106, Training Time: 3.841s, Prediction Time: 3.269s label KQLearning, Reward 74: 96.000, Len(game): 96, Training Time: 3.841s, Prediction Time: 3.312s label KQLearning, Reward 75: 89.000, Len(game): 89, Training Time: 3.841s, Prediction Time: 3.353s label KQLearning, Reward 76: 110.000, Len(game): 110, Training Time: 3.841s, Prediction Time: 3.405s label KQLearning, Reward 77: 76.000, Len(game): 76, Training Time: 3.841s, Prediction Time: 3.440s label KQLearning, Reward 78: 59.000, Len(game): 59, Training Time: 3.841s, Prediction Time: 3.466s label KQLearning, Reward 79: 117.000, Len(game): 117, Training Time: 3.841s, Prediction Time: 3.519s label KQLearning, Reward 80: 81.000, Len(game): 81, Training Time: 3.841s, Prediction Time: 3.559s label KQLearning, Reward 81: 92.000, Len(game): 92, Training Time: 3.841s, Prediction Time: 3.600s label KQLearning, Reward 82: 108.000, Len(game): 108, Training Time: 3.841s, Prediction Time: 3.652s label KQLearning, Reward 83: 79.000, Len(game): 79, Training Time: 3.841s, Prediction Time: 3.689s label KQLearning, Reward 84: 93.000, Len(game): 93, Training Time: 3.841s, Prediction Time: 3.733s label KQLearning, Reward 85: 63.000, Len(game): 63, Training Time: 3.841s, Prediction Time: 3.761s label KQLearning, Reward 86: 80.000, Len(game): 80, Training Time: 3.841s, Prediction Time: 3.798s label KQLearning, Reward 87: 97.000, Len(game): 97, Training Time: 3.841s, Prediction Time: 3.842s label KQLearning, Reward 88: 114.000, Len(game): 114, Training Time: 3.841s, Prediction Time: 3.895s label KQLearning, Reward 89: 87.000, Len(game): 87, Training Time: 3.841s, Prediction Time: 3.935s label KQLearning, Reward 90: 70.000, Len(game): 70, Training Time: 3.841s, Prediction Time: 3.967s label KQLearning, Reward 91: 76.000, Len(game): 76, Training Time: 3.841s, Prediction Time: 4.003s label KQLearning, Reward 92: 86.000, Len(game): 86, Training Time: 3.841s, Prediction Time: 4.043s label KQLearning, Reward 93: 75.000, Len(game): 75, Training Time: 3.841s, Prediction Time: 4.077s label KQLearning, Reward 94: 107.000, Len(game): 107, Training Time: 3.841s, Prediction Time: 4.128s label KQLearning, Reward 95: 79.000, Len(game): 79, Training Time: 3.841s, Prediction Time: 4.166s label KQLearning, Reward 96: 104.000, Len(game): 104, Training Time: 3.841s, Prediction Time: 4.214s label KQLearning, Reward 97: 90.000, Len(game): 90, Training Time: 3.841s, Prediction Time: 4.256s label KQLearning, Reward 98: 81.000, Len(game): 81, Training Time: 3.841s, Prediction Time: 4.294s label KQLearning, Reward 99: 95.000, Len(game): 95, Training Time: 3.841s, Prediction Time: 4.338s 1 label PPOAgent, Reward 0: 21.000, Len(game): 21, Training Time: 0.004s, Prediction Time: 0.004s label PPOAgent, Reward 1: 53.000, Len(game): 53, Training Time: 0.014s, Prediction Time: 0.014s label PPOAgent, Reward 2: 11.000, Len(game): 11, Training Time: 0.016s, Prediction Time: 0.016s label PPOAgent, Reward 3: 36.000, Len(game): 36, Training Time: 0.023s, Prediction Time: 0.023s label PPOAgent, Reward 4: 14.000, Len(game): 14, Training Time: 0.025s, Prediction Time: 0.025s label PPOAgent, Reward 5: 20.000, Len(game): 20, Training Time: 0.029s, Prediction Time: 0.029s label PPOAgent, Reward 6: 22.000, Len(game): 22, Training Time: 0.033s, Prediction Time: 0.033s label PPOAgent, Reward 7: 57.000, Len(game): 57, Training Time: 0.044s, Prediction Time: 0.044s label PPOAgent, Reward 8: 17.000, Len(game): 17, Training Time: 0.047s, Prediction Time: 0.047s label PPOAgent, Reward 9: 19.000, Len(game): 19, Training Time: 0.050s, Prediction Time: 0.050s label PPOAgent, Reward 10: 9.000, Len(game): 9, Training Time: 0.053s, Prediction Time: 0.053s label PPOAgent, Reward 11: 16.000, Len(game): 16, Training Time: 0.055s, Prediction Time: 0.055s label PPOAgent, Reward 12: 19.000, Len(game): 19, Training Time: 0.058s, Prediction Time: 0.058s label PPOAgent, Reward 13: 12.000, Len(game): 12, Training Time: 0.061s, Prediction Time: 0.061s label PPOAgent, Reward 14: 39.000, Len(game): 39, Training Time: 0.069s, Prediction Time: 0.069s label PPOAgent, Reward 15: 16.000, Len(game): 16, Training Time: 0.071s, Prediction Time: 0.071s label PPOAgent, Reward 16: 19.000, Len(game): 19, Training Time: 0.075s, Prediction Time: 0.075s label PPOAgent, Reward 17: 17.000, Len(game): 17, Training Time: 0.078s, Prediction Time: 0.078s label PPOAgent, Reward 18: 31.000, Len(game): 31, Training Time: 0.084s, Prediction Time: 0.084s label PPOAgent, Reward 19: 17.000, Len(game): 17, Training Time: 0.087s, Prediction Time: 0.087s label PPOAgent, Reward 20: 13.000, Len(game): 13, Training Time: 0.090s, Prediction Time: 0.090s label PPOAgent, Reward 21: 18.000, Len(game): 18, Training Time: 0.094s, Prediction Time: 0.094s label PPOAgent, Reward 22: 20.000, Len(game): 20, Training Time: 0.097s, Prediction Time: 0.097s label PPOAgent, Reward 23: 31.000, Len(game): 31, Training Time: 0.103s, Prediction Time: 0.103s label PPOAgent, Reward 24: 16.000, Len(game): 16, Training Time: 0.106s, Prediction Time: 0.106s label PPOAgent, Reward 25: 41.000, Len(game): 41, Training Time: 0.114s, Prediction Time: 0.114s label PPOAgent, Reward 26: 12.000, Len(game): 12, Training Time: 0.116s, Prediction Time: 0.116s label PPOAgent, Reward 27: 29.000, Len(game): 29, Training Time: 0.121s, Prediction Time: 0.121s label PPOAgent, Reward 28: 31.000, Len(game): 31, Training Time: 0.128s, Prediction Time: 0.128s label PPOAgent, Reward 29: 27.000, Len(game): 27, Training Time: 0.133s, Prediction Time: 0.133s label PPOAgent, Reward 30: 18.000, Len(game): 18, Training Time: 0.136s, Prediction Time: 0.136s label PPOAgent, Reward 31: 16.000, Len(game): 16, Training Time: 0.137s, Prediction Time: 0.137s label PPOAgent, Reward 32: 33.000, Len(game): 33, Training Time: 0.142s, Prediction Time: 0.142s label PPOAgent, Reward 33: 41.000, Len(game): 41, Training Time: 0.150s, Prediction Time: 0.150s label PPOAgent, Reward 34: 22.000, Len(game): 22, Training Time: 0.154s, Prediction Time: 0.154s label PPOAgent, Reward 35: 13.000, Len(game): 13, Training Time: 0.157s, Prediction Time: 0.157s label PPOAgent, Reward 36: 19.000, Len(game): 19, Training Time: 0.160s, Prediction Time: 0.160s label PPOAgent, Reward 37: 15.000, Len(game): 15, Training Time: 0.162s, Prediction Time: 0.162s label PPOAgent, Reward 38: 30.000, Len(game): 30, Training Time: 0.168s, Prediction Time: 0.168s label PPOAgent, Reward 39: 49.000, Len(game): 49, Training Time: 0.176s, Prediction Time: 0.176s label PPOAgent, Reward 40: 21.000, Len(game): 21, Training Time: 0.181s, Prediction Time: 0.181s label PPOAgent, Reward 41: 21.000, Len(game): 21, Training Time: 0.186s, Prediction Time: 0.186s label PPOAgent, Reward 42: 49.000, Len(game): 49, Training Time: 0.194s, Prediction Time: 0.194s label PPOAgent, Reward 43: 23.000, Len(game): 23, Training Time: 0.199s, Prediction Time: 0.199s label PPOAgent, Reward 44: 48.000, Len(game): 48, Training Time: 0.208s, Prediction Time: 0.208s label PPOAgent, Reward 45: 54.000, Len(game): 54, Training Time: 0.217s, Prediction Time: 0.217s label PPOAgent, Reward 46: 55.000, Len(game): 55, Training Time: 0.268s, Prediction Time: 0.268s label PPOAgent, Reward 47: 22.000, Len(game): 22, Training Time: 0.273s, Prediction Time: 0.273s label PPOAgent, Reward 48: 34.000, Len(game): 34, Training Time: 0.280s, Prediction Time: 0.280s label PPOAgent, Reward 49: 21.000, Len(game): 21, Training Time: 0.285s, Prediction Time: 0.285s label PPOAgent, Reward 50: 27.000, Len(game): 27, Training Time: 0.290s, Prediction Time: 0.290s label PPOAgent, Reward 51: 38.000, Len(game): 38, Training Time: 0.298s, Prediction Time: 0.298s label PPOAgent, Reward 52: 41.000, Len(game): 41, Training Time: 0.306s, Prediction Time: 0.306s label PPOAgent, Reward 53: 11.000, Len(game): 11, Training Time: 0.308s, Prediction Time: 0.308s label PPOAgent, Reward 54: 17.000, Len(game): 17, Training Time: 0.311s, Prediction Time: 0.311s label PPOAgent, Reward 55: 32.000, Len(game): 32, Training Time: 0.317s, Prediction Time: 0.317s label PPOAgent, Reward 56: 45.000, Len(game): 45, Training Time: 0.328s, Prediction Time: 0.328s label PPOAgent, Reward 57: 27.000, Len(game): 27, Training Time: 0.332s, Prediction Time: 0.332s label PPOAgent, Reward 58: 21.000, Len(game): 21, Training Time: 0.338s, Prediction Time: 0.338s label PPOAgent, Reward 59: 16.000, Len(game): 16, Training Time: 0.341s, Prediction Time: 0.341s label PPOAgent, Reward 60: 14.000, Len(game): 14, Training Time: 0.344s, Prediction Time: 0.344s label PPOAgent, Reward 61: 25.000, Len(game): 25, Training Time: 0.348s, Prediction Time: 0.348s label PPOAgent, Reward 62: 15.000, Len(game): 15, Training Time: 0.352s, Prediction Time: 0.352s label PPOAgent, Reward 63: 16.000, Len(game): 16, Training Time: 0.355s, Prediction Time: 0.355s label PPOAgent, Reward 64: 24.000, Len(game): 24, Training Time: 0.360s, Prediction Time: 0.360s label PPOAgent, Reward 65: 25.000, Len(game): 25, Training Time: 0.365s, Prediction Time: 0.365s label PPOAgent, Reward 66: 12.000, Len(game): 12, Training Time: 0.367s, Prediction Time: 0.367s label PPOAgent, Reward 67: 20.000, Len(game): 20, Training Time: 0.370s, Prediction Time: 0.370s label PPOAgent, Reward 68: 28.000, Len(game): 28, Training Time: 0.375s, Prediction Time: 0.375s label PPOAgent, Reward 69: 41.000, Len(game): 41, Training Time: 0.383s, Prediction Time: 0.383s label PPOAgent, Reward 70: 23.000, Len(game): 23, Training Time: 0.388s, Prediction Time: 0.388s label PPOAgent, Reward 71: 18.000, Len(game): 18, Training Time: 0.391s, Prediction Time: 0.391s label PPOAgent, Reward 72: 18.000, Len(game): 18, Training Time: 0.395s, Prediction Time: 0.395s label PPOAgent, Reward 73: 28.000, Len(game): 28, Training Time: 0.400s, Prediction Time: 0.400s label PPOAgent, Reward 74: 21.000, Len(game): 21, Training Time: 0.404s, Prediction Time: 0.404s label PPOAgent, Reward 75: 23.000, Len(game): 23, Training Time: 0.408s, Prediction Time: 0.408s label PPOAgent, Reward 76: 25.000, Len(game): 25, Training Time: 0.413s, Prediction Time: 0.413s label PPOAgent, Reward 77: 23.000, Len(game): 23, Training Time: 0.418s, Prediction Time: 0.418s label PPOAgent, Reward 78: 14.000, Len(game): 14, Training Time: 0.423s, Prediction Time: 0.423s label PPOAgent, Reward 79: 16.000, Len(game): 16, Training Time: 0.426s, Prediction Time: 0.426s label PPOAgent, Reward 80: 22.000, Len(game): 22, Training Time: 0.433s, Prediction Time: 0.433s label PPOAgent, Reward 81: 20.000, Len(game): 20, Training Time: 0.439s, Prediction Time: 0.439s label PPOAgent, Reward 82: 43.000, Len(game): 43, Training Time: 0.450s, Prediction Time: 0.450s label PPOAgent, Reward 83: 48.000, Len(game): 48, Training Time: 0.460s, Prediction Time: 0.460s label PPOAgent, Reward 84: 36.000, Len(game): 36, Training Time: 0.467s, Prediction Time: 0.467s label PPOAgent, Reward 85: 42.000, Len(game): 42, Training Time: 0.475s, Prediction Time: 0.475s label PPOAgent, Reward 86: 18.000, Len(game): 18, Training Time: 0.478s, Prediction Time: 0.478s label PPOAgent, Reward 87: 21.000, Len(game): 21, Training Time: 0.481s, Prediction Time: 0.481s label PPOAgent, Reward 88: 35.000, Len(game): 35, Training Time: 0.490s, Prediction Time: 0.490s label PPOAgent, Reward 89: 51.000, Len(game): 51, Training Time: 0.499s, Prediction Time: 0.499s label PPOAgent, Reward 90: 10.000, Len(game): 10, Training Time: 0.501s, Prediction Time: 0.501s label PPOAgent, Reward 91: 42.000, Len(game): 42, Training Time: 0.509s, Prediction Time: 0.509s label PPOAgent, Reward 92: 67.000, Len(game): 67, Training Time: 0.560s, Prediction Time: 0.560s label PPOAgent, Reward 93: 14.000, Len(game): 14, Training Time: 0.562s, Prediction Time: 0.562s label PPOAgent, Reward 94: 21.000, Len(game): 21, Training Time: 0.567s, Prediction Time: 0.567s label PPOAgent, Reward 95: 73.000, Len(game): 73, Training Time: 0.580s, Prediction Time: 0.580s label PPOAgent, Reward 96: 53.000, Len(game): 53, Training Time: 0.590s, Prediction Time: 0.590s label PPOAgent, Reward 97: 34.000, Len(game): 34, Training Time: 0.597s, Prediction Time: 0.597s label PPOAgent, Reward 98: 33.000, Len(game): 33, Training Time: 0.602s, Prediction Time: 0.602s label PPOAgent, Reward 99: 20.000, Len(game): 20, Training Time: 0.606s, Prediction Time: 0.606s label PolicyGradient, Reward 0: 15.000, Len(game): 15, Training Time: 0.004s, Prediction Time: 0.001s label PolicyGradient, Reward 1: 16.000, Len(game): 16, Training Time: 0.007s, Prediction Time: 0.004s label PolicyGradient, Reward 2: 15.000, Len(game): 15, Training Time: 0.012s, Prediction Time: 0.008s label PolicyGradient, Reward 3: 45.000, Len(game): 45, Training Time: 0.021s, Prediction Time: 0.019s label PolicyGradient, Reward 4: 14.000, Len(game): 14, Training Time: 0.030s, Prediction Time: 0.022s label PolicyGradient, Reward 5: 41.000, Len(game): 41, Training Time: 0.040s, Prediction Time: 0.031s label PolicyGradient, Reward 6: 51.000, Len(game): 51, Training Time: 0.054s, Prediction Time: 0.043s label PolicyGradient, Reward 7: 42.000, Len(game): 42, Training Time: 0.069s, Prediction Time: 0.055s label PolicyGradient, Reward 8: 68.000, Len(game): 68, Training Time: 0.094s, Prediction Time: 0.074s label PolicyGradient, Reward 9: 30.000, Len(game): 30, Training Time: 0.118s, Prediction Time: 0.087s label PolicyGradient, Reward 10: 161.000, Len(game): 161, Training Time: 0.170s, Prediction Time: 0.136s label PolicyGradient, Reward 11: 77.000, Len(game): 77, Training Time: 0.238s, Prediction Time: 0.177s label PolicyGradient, Reward 12: 105.000, Len(game): 105, Training Time: 0.322s, Prediction Time: 0.237s label PolicyGradient, Reward 13: 65.000, Len(game): 65, Training Time: 0.429s, Prediction Time: 0.289s label PolicyGradient, Reward 14: 120.000, Len(game): 120, Training Time: 0.564s, Prediction Time: 0.372s label PolicyGradient, Reward 15: 48.000, Len(game): 48, Training Time: 0.706s, Prediction Time: 0.443s label PolicyGradient, Reward 16: 186.000, Len(game): 186, Training Time: 0.925s, Prediction Time: 0.584s label PolicyGradient, Reward 17: 67.000, Len(game): 67, Training Time: 1.151s, Prediction Time: 0.697s label PolicyGradient, Reward 18: 234.000, Len(game): 234, Training Time: 1.481s, Prediction Time: 0.913s label PolicyGradient, Reward 19: 694.000, Len(game): 694, Training Time: 2.184s, Prediction Time: 1.466s label PolicyGradient, Reward 20: 234.000, Len(game): 234, Training Time: 2.962s, Prediction Time: 1.990s label PolicyGradient, Reward 21: 939.000, Len(game): 939, Training Time: 4.512s, Prediction Time: 3.131s label PolicyGradient, Reward 22: 119.000, Len(game): 119, Training Time: 4.512s, Prediction Time: 4.060s label PolicyGradient, Reward 23: 66.000, Len(game): 66, Training Time: 4.512s, Prediction Time: 4.128s label PolicyGradient, Reward 24: 78.000, Len(game): 78, Training Time: 4.512s, Prediction Time: 4.199s label PolicyGradient, Reward 25: 117.000, Len(game): 117, Training Time: 4.512s, Prediction Time: 4.303s label PolicyGradient, Reward 26: 143.000, Len(game): 143, Training Time: 4.512s, Prediction Time: 4.428s label PolicyGradient, Reward 27: 76.000, Len(game): 76, Training Time: 4.512s, Prediction Time: 4.494s label PolicyGradient, Reward 28: 160.000, Len(game): 160, Training Time: 4.512s, Prediction Time: 4.635s label PolicyGradient, Reward 29: 236.000, Len(game): 236, Training Time: 4.512s, Prediction Time: 4.847s label PolicyGradient, Reward 30: 142.000, Len(game): 142, Training Time: 4.512s, Prediction Time: 4.974s label PolicyGradient, Reward 31: 135.000, Len(game): 135, Training Time: 4.512s, Prediction Time: 5.097s label PolicyGradient, Reward 32: 101.000, Len(game): 101, Training Time: 4.512s, Prediction Time: 5.187s label PolicyGradient, Reward 33: 219.000, Len(game): 219, Training Time: 4.512s, Prediction Time: 5.381s label PolicyGradient, Reward 34: 129.000, Len(game): 129, Training Time: 4.512s, Prediction Time: 5.499s label PolicyGradient, Reward 35: 107.000, Len(game): 107, Training Time: 4.512s, Prediction Time: 5.597s label PolicyGradient, Reward 36: 155.000, Len(game): 155, Training Time: 4.512s, Prediction Time: 5.735s label PolicyGradient, Reward 37: 76.000, Len(game): 76, Training Time: 4.512s, Prediction Time: 5.804s label PolicyGradient, Reward 38: 150.000, Len(game): 150, Training Time: 4.512s, Prediction Time: 5.936s label PolicyGradient, Reward 39: 113.000, Len(game): 113, Training Time: 4.512s, Prediction Time: 6.037s label PolicyGradient, Reward 40: 148.000, Len(game): 148, Training Time: 4.512s, Prediction Time: 6.169s label PolicyGradient, Reward 41: 117.000, Len(game): 117, Training Time: 4.512s, Prediction Time: 6.274s label PolicyGradient, Reward 42: 118.000, Len(game): 118, Training Time: 4.512s, Prediction Time: 6.378s label PolicyGradient, Reward 43: 202.000, Len(game): 202, Training Time: 4.512s, Prediction Time: 6.557s label PolicyGradient, Reward 44: 62.000, Len(game): 62, Training Time: 4.512s, Prediction Time: 6.611s label PolicyGradient, Reward 45: 119.000, Len(game): 119, Training Time: 4.512s, Prediction Time: 6.716s label PolicyGradient, Reward 46: 179.000, Len(game): 179, Training Time: 4.512s, Prediction Time: 6.876s label PolicyGradient, Reward 47: 148.000, Len(game): 148, Training Time: 4.512s, Prediction Time: 7.006s label PolicyGradient, Reward 48: 165.000, Len(game): 165, Training Time: 4.512s, Prediction Time: 7.153s label PolicyGradient, Reward 49: 167.000, Len(game): 167, Training Time: 4.512s, Prediction Time: 7.299s label PolicyGradient, Reward 50: 123.000, Len(game): 123, Training Time: 4.512s, Prediction Time: 7.408s label PolicyGradient, Reward 51: 83.000, Len(game): 83, Training Time: 4.512s, Prediction Time: 7.482s label PolicyGradient, Reward 52: 93.000, Len(game): 93, Training Time: 4.512s, Prediction Time: 7.564s label PolicyGradient, Reward 53: 119.000, Len(game): 119, Training Time: 4.512s, Prediction Time: 7.672s label PolicyGradient, Reward 54: 89.000, Len(game): 89, Training Time: 4.512s, Prediction Time: 7.753s label PolicyGradient, Reward 55: 101.000, Len(game): 101, Training Time: 4.512s, Prediction Time: 7.845s label PolicyGradient, Reward 56: 109.000, Len(game): 109, Training Time: 4.512s, Prediction Time: 7.940s label PolicyGradient, Reward 57: 162.000, Len(game): 162, Training Time: 4.512s, Prediction Time: 8.084s label PolicyGradient, Reward 58: 91.000, Len(game): 91, Training Time: 4.512s, Prediction Time: 8.166s label PolicyGradient, Reward 59: 95.000, Len(game): 95, Training Time: 4.512s, Prediction Time: 8.249s label PolicyGradient, Reward 60: 131.000, Len(game): 131, Training Time: 4.512s, Prediction Time: 8.364s label PolicyGradient, Reward 61: 110.000, Len(game): 110, Training Time: 4.512s, Prediction Time: 8.460s label PolicyGradient, Reward 62: 102.000, Len(game): 102, Training Time: 4.512s, Prediction Time: 8.548s label PolicyGradient, Reward 63: 122.000, Len(game): 122, Training Time: 4.512s, Prediction Time: 8.662s label PolicyGradient, Reward 64: 157.000, Len(game): 157, Training Time: 4.512s, Prediction Time: 8.799s label PolicyGradient, Reward 65: 85.000, Len(game): 85, Training Time: 4.512s, Prediction Time: 8.874s label PolicyGradient, Reward 66: 111.000, Len(game): 111, Training Time: 4.512s, Prediction Time: 8.972s label PolicyGradient, Reward 67: 264.000, Len(game): 264, Training Time: 4.512s, Prediction Time: 9.210s label PolicyGradient, Reward 68: 94.000, Len(game): 94, Training Time: 4.512s, Prediction Time: 9.294s label PolicyGradient, Reward 69: 94.000, Len(game): 94, Training Time: 4.512s, Prediction Time: 9.381s label PolicyGradient, Reward 70: 64.000, Len(game): 64, Training Time: 4.512s, Prediction Time: 9.438s label PolicyGradient, Reward 71: 144.000, Len(game): 144, Training Time: 4.512s, Prediction Time: 9.565s label PolicyGradient, Reward 72: 102.000, Len(game): 102, Training Time: 4.512s, Prediction Time: 9.653s label PolicyGradient, Reward 73: 184.000, Len(game): 184, Training Time: 4.512s, Prediction Time: 9.814s label PolicyGradient, Reward 74: 141.000, Len(game): 141, Training Time: 4.512s, Prediction Time: 9.936s label PolicyGradient, Reward 75: 125.000, Len(game): 125, Training Time: 4.512s, Prediction Time: 10.046s label PolicyGradient, Reward 76: 108.000, Len(game): 108, Training Time: 4.512s, Prediction Time: 10.142s label PolicyGradient, Reward 77: 147.000, Len(game): 147, Training Time: 4.512s, Prediction Time: 10.273s label PolicyGradient, Reward 78: 114.000, Len(game): 114, Training Time: 4.512s, Prediction Time: 10.376s label PolicyGradient, Reward 79: 95.000, Len(game): 95, Training Time: 4.512s, Prediction Time: 10.460s label PolicyGradient, Reward 80: 100.000, Len(game): 100, Training Time: 4.512s, Prediction Time: 10.551s label PolicyGradient, Reward 81: 144.000, Len(game): 144, Training Time: 4.512s, Prediction Time: 10.681s label PolicyGradient, Reward 82: 124.000, Len(game): 124, Training Time: 4.512s, Prediction Time: 10.789s label PolicyGradient, Reward 83: 128.000, Len(game): 128, Training Time: 4.512s, Prediction Time: 10.901s label PolicyGradient, Reward 84: 131.000, Len(game): 131, Training Time: 4.512s, Prediction Time: 11.017s label PolicyGradient, Reward 85: 83.000, Len(game): 83, Training Time: 4.512s, Prediction Time: 11.091s label PolicyGradient, Reward 86: 83.000, Len(game): 83, Training Time: 4.512s, Prediction Time: 11.165s label PolicyGradient, Reward 87: 58.000, Len(game): 58, Training Time: 4.512s, Prediction Time: 11.215s label PolicyGradient, Reward 88: 130.000, Len(game): 130, Training Time: 4.512s, Prediction Time: 11.329s label PolicyGradient, Reward 89: 96.000, Len(game): 96, Training Time: 4.512s, Prediction Time: 11.411s label PolicyGradient, Reward 90: 98.000, Len(game): 98, Training Time: 4.512s, Prediction Time: 11.497s label PolicyGradient, Reward 91: 167.000, Len(game): 167, Training Time: 4.512s, Prediction Time: 11.645s label PolicyGradient, Reward 92: 106.000, Len(game): 106, Training Time: 4.512s, Prediction Time: 11.738s label PolicyGradient, Reward 93: 144.000, Len(game): 144, Training Time: 4.512s, Prediction Time: 11.866s label PolicyGradient, Reward 94: 85.000, Len(game): 85, Training Time: 4.512s, Prediction Time: 11.939s label PolicyGradient, Reward 95: 130.000, Len(game): 130, Training Time: 4.512s, Prediction Time: 12.053s label PolicyGradient, Reward 96: 98.000, Len(game): 98, Training Time: 4.512s, Prediction Time: 12.137s label PolicyGradient, Reward 97: 150.000, Len(game): 150, Training Time: 4.512s, Prediction Time: 12.268s label PolicyGradient, Reward 98: 132.000, Len(game): 132, Training Time: 4.512s, Prediction Time: 12.387s label PolicyGradient, Reward 99: 177.000, Len(game): 177, Training Time: 4.512s, Prediction Time: 12.543s label Controller-based, Reward 0: 127.000, Len(game): 127, Training Time: 0.002s, Prediction Time: 0.001s label Controller-based, Reward 1: 75.000, Len(game): 75, Training Time: 0.003s, Prediction Time: 0.002s label Controller-based, Reward 2: 9.000, Len(game): 9, Training Time: 0.011s, Prediction Time: 0.002s label Controller-based, Reward 3: 199.000, Len(game): 199, Training Time: 0.022s, Prediction Time: 0.004s label Controller-based, Reward 4: 286.000, Len(game): 286, Training Time: 0.033s, Prediction Time: 0.007s label Controller-based, Reward 5: 364.000, Len(game): 364, Training Time: 0.046s, Prediction Time: 0.010s no training label Controller-based, Reward 6: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.019s no training label Controller-based, Reward 7: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.028s no training label Controller-based, Reward 8: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.036s no training label Controller-based, Reward 9: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.045s no training label Controller-based, Reward 10: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.053s no training label Controller-based, Reward 11: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.063s no training label Controller-based, Reward 12: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.071s no training label Controller-based, Reward 13: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.079s no training label Controller-based, Reward 14: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.087s no training label Controller-based, Reward 15: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.095s no training label Controller-based, Reward 16: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.103s no training label Controller-based, Reward 17: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.111s no training label Controller-based, Reward 18: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.119s no training label Controller-based, Reward 19: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.128s no training label Controller-based, Reward 20: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.136s no training label Controller-based, Reward 21: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.144s no training label Controller-based, Reward 22: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.152s no training label Controller-based, Reward 23: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.160s no training label Controller-based, Reward 24: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.168s no training label Controller-based, Reward 25: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.177s no training label Controller-based, Reward 26: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.185s no training label Controller-based, Reward 27: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.193s no training label Controller-based, Reward 28: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.201s no training label Controller-based, Reward 29: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.209s no training label Controller-based, Reward 30: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.217s no training label Controller-based, Reward 31: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.226s no training label Controller-based, Reward 32: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.233s no training label Controller-based, Reward 33: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.241s no training label Controller-based, Reward 34: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.250s no training label Controller-based, Reward 35: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.258s no training label Controller-based, Reward 36: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.265s no training label Controller-based, Reward 37: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.273s no training label Controller-based, Reward 38: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.280s no training label Controller-based, Reward 39: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.288s no training label Controller-based, Reward 40: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.296s no training label Controller-based, Reward 41: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.305s no training label Controller-based, Reward 42: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.313s no training label Controller-based, Reward 43: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.321s no training label Controller-based, Reward 44: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.329s no training label Controller-based, Reward 45: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.338s no training label Controller-based, Reward 46: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.346s no training label Controller-based, Reward 47: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.354s no training label Controller-based, Reward 48: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.362s no training label Controller-based, Reward 49: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.371s no training label Controller-based, Reward 50: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.379s no training label Controller-based, Reward 51: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.387s no training label Controller-based, Reward 52: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.396s no training label Controller-based, Reward 53: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.403s no training label Controller-based, Reward 54: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.412s no training label Controller-based, Reward 55: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.420s no training label Controller-based, Reward 56: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.428s no training label Controller-based, Reward 57: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.436s no training label Controller-based, Reward 58: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.443s no training label Controller-based, Reward 59: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.451s no training label Controller-based, Reward 60: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.459s no training label Controller-based, Reward 61: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.466s no training label Controller-based, Reward 62: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.474s no training label Controller-based, Reward 63: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.483s no training label Controller-based, Reward 64: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.491s no training label Controller-based, Reward 65: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.500s no training label Controller-based, Reward 66: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.507s no training label Controller-based, Reward 67: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.516s no training label Controller-based, Reward 68: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.524s no training label Controller-based, Reward 69: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.533s no training label Controller-based, Reward 70: 1000.000, Len(game): 1000, Training Time: 0.049s, Prediction Time: 0.541s no training label Controller-based, Reward 71: 1000.000, Len(game): 1000, Training Time: 0.049s, Prediction Time: 0.548s no training label Controller-based, Reward 72: 1000.000, Len(game): 1000, Training Time: 0.049s, Prediction Time: 0.556s no training label Controller-based, Reward 73: 1000.000, Len(game): 1000, Training Time: 0.049s, Prediction Time: 0.564s no training label Controller-based, Reward 74: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.572s no training label Controller-based, Reward 75: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.578s no training label Controller-based, Reward 76: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.587s no training label Controller-based, Reward 77: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.596s no training label Controller-based, Reward 78: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.604s no training label Controller-based, Reward 79: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.612s no training label Controller-based, Reward 80: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.621s no training label Controller-based, Reward 81: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.629s no training label Controller-based, Reward 82: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.638s no training label Controller-based, Reward 83: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.645s no training label Controller-based, Reward 84: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.653s no training label Controller-based, Reward 85: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.662s no training label Controller-based, Reward 86: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.671s no training label Controller-based, Reward 87: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.679s no training label Controller-based, Reward 88: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.687s no training label Controller-based, Reward 89: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.696s no training label Controller-based, Reward 90: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.704s no training label Controller-based, Reward 91: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.712s no training label Controller-based, Reward 92: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.720s no training label Controller-based, Reward 93: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.729s no training label Controller-based, Reward 94: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.736s no training label Controller-based, Reward 95: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.745s no training label Controller-based, Reward 96: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.753s no training label Controller-based, Reward 97: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.761s no training label Controller-based, Reward 98: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.770s no training label Controller-based, Reward 99: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.777s label KACAgent, Reward 0: 14.000, Len(game): 14, Training Time: 0.004s, Prediction Time: 0.000s label KACAgent, Reward 1: 39.000, Len(game): 39, Training Time: 0.009s, Prediction Time: 0.007s label KACAgent, Reward 2: 65.000, Len(game): 65, Training Time: 0.024s, Prediction Time: 0.019s label KACAgent, Reward 3: 91.000, Len(game): 91, Training Time: 0.040s, Prediction Time: 0.038s label KACAgent, Reward 4: 80.000, Len(game): 80, Training Time: 0.063s, Prediction Time: 0.056s label KACAgent, Reward 5: 117.000, Len(game): 117, Training Time: 0.102s, Prediction Time: 0.091s label KACAgent, Reward 6: 69.000, Len(game): 69, Training Time: 0.148s, Prediction Time: 0.123s label KACAgent, Reward 7: 164.000, Len(game): 164, Training Time: 0.217s, Prediction Time: 0.189s label KACAgent, Reward 8: 103.000, Len(game): 103, Training Time: 0.309s, Prediction Time: 0.255s label KACAgent, Reward 9: 278.000, Len(game): 278, Training Time: 0.478s, Prediction Time: 0.404s label KACAgent, Reward 10: 114.000, Len(game): 114, Training Time: 0.670s, Prediction Time: 0.527s label KACAgent, Reward 11: 984.000, Len(game): 984, Training Time: 1.226s, Prediction Time: 1.141s label KACAgent, Reward 12: 412.000, Len(game): 412, Training Time: 2.074s, Prediction Time: 1.789s label KACAgent, Reward 13: 111.000, Len(game): 111, Training Time: 2.990s, Prediction Time: 2.340s label KACAgent, Reward 14: 202.000, Len(game): 202, Training Time: 4.040s, Prediction Time: 3.047s label KACAgent, Reward 15: 344.000, Len(game): 344, Training Time: 4.040s, Prediction Time: 3.935s label KACAgent, Reward 16: 250.000, Len(game): 250, Training Time: 4.040s, Prediction Time: 4.140s label KACAgent, Reward 17: 205.000, Len(game): 205, Training Time: 4.040s, Prediction Time: 4.307s label KACAgent, Reward 18: 332.000, Len(game): 332, Training Time: 4.040s, Prediction Time: 4.577s label KACAgent, Reward 19: 345.000, Len(game): 345, Training Time: 4.040s, Prediction Time: 4.856s label KACAgent, Reward 20: 283.000, Len(game): 283, Training Time: 4.040s, Prediction Time: 5.084s label KACAgent, Reward 21: 299.000, Len(game): 299, Training Time: 4.040s, Prediction Time: 5.330s label KACAgent, Reward 22: 473.000, Len(game): 473, Training Time: 4.040s, Prediction Time: 5.721s label KACAgent, Reward 23: 280.000, Len(game): 280, Training Time: 4.040s, Prediction Time: 5.955s label KACAgent, Reward 24: 304.000, Len(game): 304, Training Time: 4.040s, Prediction Time: 6.200s label KACAgent, Reward 25: 195.000, Len(game): 195, Training Time: 4.040s, Prediction Time: 6.359s label KACAgent, Reward 26: 240.000, Len(game): 240, Training Time: 4.040s, Prediction Time: 6.558s label KACAgent, Reward 27: 168.000, Len(game): 168, Training Time: 4.040s, Prediction Time: 6.694s label KACAgent, Reward 28: 177.000, Len(game): 177, Training Time: 4.040s, Prediction Time: 6.835s label KACAgent, Reward 29: 282.000, Len(game): 282, Training Time: 4.040s, Prediction Time: 7.061s label KACAgent, Reward 30: 300.000, Len(game): 300, Training Time: 4.040s, Prediction Time: 7.304s label KACAgent, Reward 31: 243.000, Len(game): 243, Training Time: 4.040s, Prediction Time: 7.497s label KACAgent, Reward 32: 467.000, Len(game): 467, Training Time: 4.040s, Prediction Time: 7.880s label KACAgent, Reward 33: 158.000, Len(game): 158, Training Time: 4.040s, Prediction Time: 8.013s label KACAgent, Reward 34: 301.000, Len(game): 301, Training Time: 4.040s, Prediction Time: 8.261s label KACAgent, Reward 35: 154.000, Len(game): 154, Training Time: 4.040s, Prediction Time: 8.385s label KACAgent, Reward 36: 262.000, Len(game): 262, Training Time: 4.040s, Prediction Time: 8.597s label KACAgent, Reward 37: 283.000, Len(game): 283, Training Time: 4.040s, Prediction Time: 8.826s label KACAgent, Reward 38: 586.000, Len(game): 586, Training Time: 4.040s, Prediction Time: 9.296s label KACAgent, Reward 39: 313.000, Len(game): 313, Training Time: 4.040s, Prediction Time: 9.549s label KACAgent, Reward 40: 275.000, Len(game): 275, Training Time: 4.040s, Prediction Time: 9.774s label KACAgent, Reward 41: 191.000, Len(game): 191, Training Time: 4.040s, Prediction Time: 9.927s label KACAgent, Reward 42: 223.000, Len(game): 223, Training Time: 4.040s, Prediction Time: 10.107s label KACAgent, Reward 43: 323.000, Len(game): 323, Training Time: 4.040s, Prediction Time: 10.365s label KACAgent, Reward 44: 205.000, Len(game): 205, Training Time: 4.040s, Prediction Time: 10.530s label KACAgent, Reward 45: 242.000, Len(game): 242, Training Time: 4.040s, Prediction Time: 10.724s label KACAgent, Reward 46: 164.000, Len(game): 164, Training Time: 4.040s, Prediction Time: 10.854s label KACAgent, Reward 47: 314.000, Len(game): 314, Training Time: 4.040s, Prediction Time: 11.104s label KACAgent, Reward 48: 320.000, Len(game): 320, Training Time: 4.040s, Prediction Time: 11.360s label KACAgent, Reward 49: 145.000, Len(game): 145, Training Time: 4.040s, Prediction Time: 11.477s label KACAgent, Reward 50: 325.000, Len(game): 325, Training Time: 4.040s, Prediction Time: 11.736s label KACAgent, Reward 51: 219.000, Len(game): 219, Training Time: 4.040s, Prediction Time: 11.912s label KACAgent, Reward 52: 178.000, Len(game): 178, Training Time: 4.040s, Prediction Time: 12.058s label KACAgent, Reward 53: 232.000, Len(game): 232, Training Time: 4.040s, Prediction Time: 12.250s label KACAgent, Reward 54: 333.000, Len(game): 333, Training Time: 4.040s, Prediction Time: 12.521s label KACAgent, Reward 55: 256.000, Len(game): 256, Training Time: 4.040s, Prediction Time: 12.726s label KACAgent, Reward 56: 302.000, Len(game): 302, Training Time: 4.040s, Prediction Time: 12.970s label KACAgent, Reward 57: 303.000, Len(game): 303, Training Time: 4.040s, Prediction Time: 13.216s label KACAgent, Reward 58: 344.000, Len(game): 344, Training Time: 4.040s, Prediction Time: 13.494s label KACAgent, Reward 59: 216.000, Len(game): 216, Training Time: 4.040s, Prediction Time: 13.669s label KACAgent, Reward 60: 419.000, Len(game): 419, Training Time: 4.040s, Prediction Time: 14.008s label KACAgent, Reward 61: 264.000, Len(game): 264, Training Time: 4.040s, Prediction Time: 14.219s label KACAgent, Reward 62: 247.000, Len(game): 247, Training Time: 4.040s, Prediction Time: 14.415s label KACAgent, Reward 63: 185.000, Len(game): 185, Training Time: 4.040s, Prediction Time: 14.564s label KACAgent, Reward 64: 252.000, Len(game): 252, Training Time: 4.040s, Prediction Time: 14.768s label KACAgent, Reward 65: 324.000, Len(game): 324, Training Time: 4.040s, Prediction Time: 15.034s label KACAgent, Reward 66: 192.000, Len(game): 192, Training Time: 4.040s, Prediction Time: 15.188s label KACAgent, Reward 67: 189.000, Len(game): 189, Training Time: 4.040s, Prediction Time: 15.342s label KACAgent, Reward 68: 342.000, Len(game): 342, Training Time: 4.040s, Prediction Time: 15.617s label KACAgent, Reward 69: 309.000, Len(game): 309, Training Time: 4.040s, Prediction Time: 15.868s label KACAgent, Reward 70: 327.000, Len(game): 327, Training Time: 4.040s, Prediction Time: 16.134s label KACAgent, Reward 71: 215.000, Len(game): 215, Training Time: 4.040s, Prediction Time: 16.310s label KACAgent, Reward 72: 277.000, Len(game): 277, Training Time: 4.040s, Prediction Time: 16.536s label KACAgent, Reward 73: 188.000, Len(game): 188, Training Time: 4.040s, Prediction Time: 16.687s label KACAgent, Reward 74: 211.000, Len(game): 211, Training Time: 4.040s, Prediction Time: 16.858s label KACAgent, Reward 75: 313.000, Len(game): 313, Training Time: 4.040s, Prediction Time: 17.113s label KACAgent, Reward 76: 193.000, Len(game): 193, Training Time: 4.040s, Prediction Time: 17.268s label KACAgent, Reward 77: 191.000, Len(game): 191, Training Time: 4.040s, Prediction Time: 17.420s label KACAgent, Reward 78: 356.000, Len(game): 356, Training Time: 4.040s, Prediction Time: 17.708s label KACAgent, Reward 79: 348.000, Len(game): 348, Training Time: 4.040s, Prediction Time: 17.989s label KACAgent, Reward 80: 338.000, Len(game): 338, Training Time: 4.040s, Prediction Time: 18.260s label KACAgent, Reward 81: 194.000, Len(game): 194, Training Time: 4.040s, Prediction Time: 18.416s label KACAgent, Reward 82: 189.000, Len(game): 189, Training Time: 4.040s, Prediction Time: 18.568s label KACAgent, Reward 83: 332.000, Len(game): 332, Training Time: 4.040s, Prediction Time: 18.838s label KACAgent, Reward 84: 136.000, Len(game): 136, Training Time: 4.040s, Prediction Time: 18.949s label KACAgent, Reward 85: 276.000, Len(game): 276, Training Time: 4.040s, Prediction Time: 19.174s label KACAgent, Reward 86: 207.000, Len(game): 207, Training Time: 4.040s, Prediction Time: 19.342s label KACAgent, Reward 87: 306.000, Len(game): 306, Training Time: 4.040s, Prediction Time: 19.588s label KACAgent, Reward 88: 303.000, Len(game): 303, Training Time: 4.040s, Prediction Time: 19.830s label KACAgent, Reward 89: 286.000, Len(game): 286, Training Time: 4.040s, Prediction Time: 20.057s label KACAgent, Reward 90: 311.000, Len(game): 311, Training Time: 4.040s, Prediction Time: 20.307s label KACAgent, Reward 91: 247.000, Len(game): 247, Training Time: 4.040s, Prediction Time: 20.504s label KACAgent, Reward 92: 221.000, Len(game): 221, Training Time: 4.040s, Prediction Time: 20.680s label KACAgent, Reward 93: 190.000, Len(game): 190, Training Time: 4.040s, Prediction Time: 20.832s label KACAgent, Reward 94: 266.000, Len(game): 266, Training Time: 4.040s, Prediction Time: 21.046s label KACAgent, Reward 95: 290.000, Len(game): 290, Training Time: 4.040s, Prediction Time: 21.284s label KACAgent, Reward 96: 286.000, Len(game): 286, Training Time: 4.040s, Prediction Time: 21.521s label KACAgent, Reward 97: 256.000, Len(game): 256, Training Time: 4.040s, Prediction Time: 21.731s label KACAgent, Reward 98: 238.000, Len(game): 238, Training Time: 4.040s, Prediction Time: 21.924s label KACAgent, Reward 99: 132.000, Len(game): 132, Training Time: 4.040s, Prediction Time: 22.030s label DQNAgent, Reward 0: 22.000, Len(game): 22, Training Time: 0.000s, Prediction Time: 0.000s label DQNAgent, Reward 1: 14.000, Len(game): 14, Training Time: 0.000s, Prediction Time: 0.000s label DQNAgent, Reward 2: 16.000, Len(game): 16, Training Time: 0.000s, Prediction Time: 0.000s label DQNAgent, Reward 3: 23.000, Len(game): 23, Training Time: 0.019s, Prediction Time: 0.001s label DQNAgent, Reward 4: 22.000, Len(game): 22, Training Time: 0.043s, Prediction Time: 0.002s label DQNAgent, Reward 5: 17.000, Len(game): 17, Training Time: 0.062s, Prediction Time: 0.002s label DQNAgent, Reward 6: 14.000, Len(game): 14, Training Time: 0.077s, Prediction Time: 0.002s label DQNAgent, Reward 7: 11.000, Len(game): 11, Training Time: 0.088s, Prediction Time: 0.003s label DQNAgent, Reward 8: 12.000, Len(game): 12, Training Time: 0.100s, Prediction Time: 0.004s label DQNAgent, Reward 9: 18.000, Len(game): 18, Training Time: 0.120s, Prediction Time: 0.004s label DQNAgent, Reward 10: 24.000, Len(game): 24, Training Time: 0.149s, Prediction Time: 0.004s label DQNAgent, Reward 11: 30.000, Len(game): 30, Training Time: 0.183s, Prediction Time: 0.004s label DQNAgent, Reward 12: 24.000, Len(game): 24, Training Time: 0.209s, Prediction Time: 0.004s label DQNAgent, Reward 13: 14.000, Len(game): 14, Training Time: 0.223s, Prediction Time: 0.004s label DQNAgent, Reward 14: 23.000, Len(game): 23, Training Time: 0.252s, Prediction Time: 0.005s label DQNAgent, Reward 15: 13.000, Len(game): 13, Training Time: 0.270s, Prediction Time: 0.006s label DQNAgent, Reward 16: 12.000, Len(game): 12, Training Time: 0.287s, Prediction Time: 0.007s label DQNAgent, Reward 17: 16.000, Len(game): 16, Training Time: 0.308s, Prediction Time: 0.007s label DQNAgent, Reward 18: 22.000, Len(game): 22, Training Time: 0.339s, Prediction Time: 0.007s label DQNAgent, Reward 19: 38.000, Len(game): 38, Training Time: 0.389s, Prediction Time: 0.008s label DQNAgent, Reward 20: 16.000, Len(game): 16, Training Time: 0.408s, Prediction Time: 0.008s label DQNAgent, Reward 21: 25.000, Len(game): 25, Training Time: 0.435s, Prediction Time: 0.009s label DQNAgent, Reward 22: 13.000, Len(game): 13, Training Time: 0.448s, Prediction Time: 0.010s label DQNAgent, Reward 23: 22.000, Len(game): 22, Training Time: 0.473s, Prediction Time: 0.011s label DQNAgent, Reward 24: 15.000, Len(game): 15, Training Time: 0.490s, Prediction Time: 0.011s label DQNAgent, Reward 25: 28.000, Len(game): 28, Training Time: 0.521s, Prediction Time: 0.012s label DQNAgent, Reward 26: 19.000, Len(game): 19, Training Time: 0.542s, Prediction Time: 0.013s label DQNAgent, Reward 27: 24.000, Len(game): 24, Training Time: 0.569s, Prediction Time: 0.014s label DQNAgent, Reward 28: 11.000, Len(game): 11, Training Time: 0.582s, Prediction Time: 0.014s label DQNAgent, Reward 29: 12.000, Len(game): 12, Training Time: 0.596s, Prediction Time: 0.014s label DQNAgent, Reward 30: 17.000, Len(game): 17, Training Time: 0.619s, Prediction Time: 0.014s label DQNAgent, Reward 31: 41.000, Len(game): 41, Training Time: 0.675s, Prediction Time: 0.016s label DQNAgent, Reward 32: 23.000, Len(game): 23, Training Time: 0.707s, Prediction Time: 0.016s label DQNAgent, Reward 33: 10.000, Len(game): 10, Training Time: 0.718s, Prediction Time: 0.017s label DQNAgent, Reward 34: 9.000, Len(game): 9, Training Time: 0.729s, Prediction Time: 0.018s label DQNAgent, Reward 35: 11.000, Len(game): 11, Training Time: 0.745s, Prediction Time: 0.018s label DQNAgent, Reward 36: 10.000, Len(game): 10, Training Time: 0.759s, Prediction Time: 0.018s label DQNAgent, Reward 37: 14.000, Len(game): 14, Training Time: 0.776s, Prediction Time: 0.019s label DQNAgent, Reward 38: 14.000, Len(game): 14, Training Time: 0.792s, Prediction Time: 0.020s label DQNAgent, Reward 39: 12.000, Len(game): 12, Training Time: 0.805s, Prediction Time: 0.021s label DQNAgent, Reward 40: 14.000, Len(game): 14, Training Time: 0.822s, Prediction Time: 0.022s label DQNAgent, Reward 41: 13.000, Len(game): 13, Training Time: 0.837s, Prediction Time: 0.022s label DQNAgent, Reward 42: 12.000, Len(game): 12, Training Time: 0.851s, Prediction Time: 0.022s label DQNAgent, Reward 43: 33.000, Len(game): 33, Training Time: 0.890s, Prediction Time: 0.023s label DQNAgent, Reward 44: 10.000, Len(game): 10, Training Time: 0.902s, Prediction Time: 0.023s label DQNAgent, Reward 45: 16.000, Len(game): 16, Training Time: 0.919s, Prediction Time: 0.023s label DQNAgent, Reward 46: 13.000, Len(game): 13, Training Time: 0.934s, Prediction Time: 0.024s label DQNAgent, Reward 47: 18.000, Len(game): 18, Training Time: 0.955s, Prediction Time: 0.024s label DQNAgent, Reward 48: 34.000, Len(game): 34, Training Time: 0.996s, Prediction Time: 0.025s label DQNAgent, Reward 49: 15.000, Len(game): 15, Training Time: 1.012s, Prediction Time: 0.025s label DQNAgent, Reward 50: 16.000, Len(game): 16, Training Time: 1.030s, Prediction Time: 0.026s label DQNAgent, Reward 51: 28.000, Len(game): 28, Training Time: 1.060s, Prediction Time: 0.027s label DQNAgent, Reward 52: 15.000, Len(game): 15, Training Time: 1.075s, Prediction Time: 0.028s label DQNAgent, Reward 53: 26.000, Len(game): 26, Training Time: 1.103s, Prediction Time: 0.030s label DQNAgent, Reward 54: 32.000, Len(game): 32, Training Time: 1.136s, Prediction Time: 0.031s label DQNAgent, Reward 55: 24.000, Len(game): 24, Training Time: 1.161s, Prediction Time: 0.032s label DQNAgent, Reward 56: 61.000, Len(game): 61, Training Time: 1.226s, Prediction Time: 0.035s label DQNAgent, Reward 57: 12.000, Len(game): 12, Training Time: 1.242s, Prediction Time: 0.035s label DQNAgent, Reward 58: 59.000, Len(game): 59, Training Time: 1.316s, Prediction Time: 0.037s label DQNAgent, Reward 59: 199.000, Len(game): 199, Training Time: 1.537s, Prediction Time: 0.043s label DQNAgent, Reward 60: 117.000, Len(game): 117, Training Time: 1.667s, Prediction Time: 0.047s label DQNAgent, Reward 61: 139.000, Len(game): 139, Training Time: 1.831s, Prediction Time: 0.051s label DQNAgent, Reward 62: 91.000, Len(game): 91, Training Time: 1.929s, Prediction Time: 0.054s label DQNAgent, Reward 63: 124.000, Len(game): 124, Training Time: 2.076s, Prediction Time: 0.059s label DQNAgent, Reward 64: 110.000, Len(game): 110, Training Time: 2.212s, Prediction Time: 0.064s label DQNAgent, Reward 65: 143.000, Len(game): 143, Training Time: 2.511s, Prediction Time: 0.071s label DQNAgent, Reward 66: 84.000, Len(game): 84, Training Time: 2.620s, Prediction Time: 0.074s label DQNAgent, Reward 67: 120.000, Len(game): 120, Training Time: 2.761s, Prediction Time: 0.080s label DQNAgent, Reward 68: 77.000, Len(game): 77, Training Time: 2.844s, Prediction Time: 0.084s label DQNAgent, Reward 69: 118.000, Len(game): 118, Training Time: 2.972s, Prediction Time: 0.089s label DQNAgent, Reward 70: 109.000, Len(game): 109, Training Time: 3.101s, Prediction Time: 0.095s label DQNAgent, Reward 71: 166.000, Len(game): 166, Training Time: 3.101s, Prediction Time: 0.101s label DQNAgent, Reward 72: 162.000, Len(game): 162, Training Time: 3.101s, Prediction Time: 0.109s label DQNAgent, Reward 73: 230.000, Len(game): 230, Training Time: 3.101s, Prediction Time: 0.119s label DQNAgent, Reward 74: 110.000, Len(game): 110, Training Time: 3.101s, Prediction Time: 0.124s label DQNAgent, Reward 75: 109.000, Len(game): 109, Training Time: 3.101s, Prediction Time: 0.129s label DQNAgent, Reward 76: 124.000, Len(game): 124, Training Time: 3.101s, Prediction Time: 0.135s label DQNAgent, Reward 77: 106.000, Len(game): 106, Training Time: 3.101s, Prediction Time: 0.139s label DQNAgent, Reward 78: 130.000, Len(game): 130, Training Time: 3.101s, Prediction Time: 0.144s label DQNAgent, Reward 79: 119.000, Len(game): 119, Training Time: 3.101s, Prediction Time: 0.149s label DQNAgent, Reward 80: 162.000, Len(game): 162, Training Time: 3.101s, Prediction Time: 0.156s label DQNAgent, Reward 81: 110.000, Len(game): 110, Training Time: 3.101s, Prediction Time: 0.160s label DQNAgent, Reward 82: 106.000, Len(game): 106, Training Time: 3.101s, Prediction Time: 0.165s label DQNAgent, Reward 83: 136.000, Len(game): 136, Training Time: 3.101s, Prediction Time: 0.170s label DQNAgent, Reward 84: 142.000, Len(game): 142, Training Time: 3.101s, Prediction Time: 0.176s label DQNAgent, Reward 85: 128.000, Len(game): 128, Training Time: 3.101s, Prediction Time: 0.180s label DQNAgent, Reward 86: 152.000, Len(game): 152, Training Time: 3.101s, Prediction Time: 0.187s label DQNAgent, Reward 87: 121.000, Len(game): 121, Training Time: 3.101s, Prediction Time: 0.192s label DQNAgent, Reward 88: 137.000, Len(game): 137, Training Time: 3.101s, Prediction Time: 0.197s label DQNAgent, Reward 89: 181.000, Len(game): 181, Training Time: 3.101s, Prediction Time: 0.205s label DQNAgent, Reward 90: 122.000, Len(game): 122, Training Time: 3.101s, Prediction Time: 0.209s label DQNAgent, Reward 91: 126.000, Len(game): 126, Training Time: 3.101s, Prediction Time: 0.216s label DQNAgent, Reward 92: 148.000, Len(game): 148, Training Time: 3.101s, Prediction Time: 0.223s label DQNAgent, Reward 93: 118.000, Len(game): 118, Training Time: 3.101s, Prediction Time: 0.227s label DQNAgent, Reward 94: 106.000, Len(game): 106, Training Time: 3.101s, Prediction Time: 0.232s label DQNAgent, Reward 95: 126.000, Len(game): 126, Training Time: 3.101s, Prediction Time: 0.237s label DQNAgent, Reward 96: 179.000, Len(game): 179, Training Time: 3.101s, Prediction Time: 0.245s label DQNAgent, Reward 97: 200.000, Len(game): 200, Training Time: 3.101s, Prediction Time: 0.253s label DQNAgent, Reward 98: 117.000, Len(game): 117, Training Time: 3.101s, Prediction Time: 0.258s label DQNAgent, Reward 99: 144.000, Len(game): 144, Training Time: 3.101s, Prediction Time: 0.265s Computed global error Bellman mean: 2.429265355294616e-07 iter: 2 label KQLearningHJBCP, Reward 0: 27.000, Len(game): 27, Training Time: 0.011s, Prediction Time: 0.001s Computed global error Bellman mean: 1.1135033661321919e-07 iter: 6 label KQLearningHJBCP, Reward 1: 44.000, Len(game): 44, Training Time: 0.043s, Prediction Time: 0.007s Computed global error Bellman mean: 3.422438720370735e-07 iter: 8 label KQLearningHJBCP, Reward 2: 54.000, Len(game): 54, Training Time: 0.122s, Prediction Time: 0.017s Computed global error Bellman mean: 3.9577951882171255e-07 iter: 4 label KQLearningHJBCP, Reward 3: 58.000, Len(game): 58, Training Time: 0.203s, Prediction Time: 0.029s Computed global error Bellman mean: 3.432339072491923e-07 iter: 6 label KQLearningHJBCP, Reward 4: 58.000, Len(game): 58, Training Time: 0.415s, Prediction Time: 0.043s Computed global error Bellman mean: 5.280399794638235e-07 iter: 5 label KQLearningHJBCP, Reward 5: 63.000, Len(game): 63, Training Time: 0.666s, Prediction Time: 0.060s Computed global error Bellman mean: 1.3332333149421225e-06 iter: 10 label KQLearningHJBCP, Reward 6: 65.000, Len(game): 65, Training Time: 1.528s, Prediction Time: 0.080s Computed global error Bellman mean: 4.910292080454003e-07 iter: 6 label KQLearningHJBCP, Reward 7: 48.000, Len(game): 48, Training Time: 2.194s, Prediction Time: 0.096s Computed global error Bellman mean: 4.686085707314039e-07 iter: 5 label KQLearningHJBCP, Reward 8: 73.000, Len(game): 73, Training Time: 2.916s, Prediction Time: 0.121s Computed global error Bellman mean: 4.164509857890951e-07 iter: 6 label KQLearningHJBCP, Reward 9: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.149s label KQLearningHJBCP, Reward 10: 33.000, Len(game): 33, Training Time: 4.082s, Prediction Time: 0.162s label KQLearningHJBCP, Reward 11: 104.000, Len(game): 104, Training Time: 4.082s, Prediction Time: 0.199s label KQLearningHJBCP, Reward 12: 78.000, Len(game): 78, Training Time: 4.082s, Prediction Time: 0.229s label KQLearningHJBCP, Reward 13: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 0.256s label KQLearningHJBCP, Reward 14: 78.000, Len(game): 78, Training Time: 4.082s, Prediction Time: 0.284s label KQLearningHJBCP, Reward 15: 54.000, Len(game): 54, Training Time: 4.082s, Prediction Time: 0.304s label KQLearningHJBCP, Reward 16: 47.000, Len(game): 47, Training Time: 4.082s, Prediction Time: 0.321s label KQLearningHJBCP, Reward 17: 93.000, Len(game): 93, Training Time: 4.082s, Prediction Time: 0.358s label KQLearningHJBCP, Reward 18: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.391s label KQLearningHJBCP, Reward 19: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.423s label KQLearningHJBCP, Reward 20: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 0.451s label KQLearningHJBCP, Reward 21: 53.000, Len(game): 53, Training Time: 4.082s, Prediction Time: 0.472s label KQLearningHJBCP, Reward 22: 52.000, Len(game): 52, Training Time: 4.082s, Prediction Time: 0.493s label KQLearningHJBCP, Reward 23: 49.000, Len(game): 49, Training Time: 4.082s, Prediction Time: 0.513s label KQLearningHJBCP, Reward 24: 74.000, Len(game): 74, Training Time: 4.082s, Prediction Time: 0.543s label KQLearningHJBCP, Reward 25: 113.000, Len(game): 113, Training Time: 4.082s, Prediction Time: 0.589s label KQLearningHJBCP, Reward 26: 110.000, Len(game): 110, Training Time: 4.082s, Prediction Time: 0.633s label KQLearningHJBCP, Reward 27: 43.000, Len(game): 43, Training Time: 4.082s, Prediction Time: 0.649s label KQLearningHJBCP, Reward 28: 66.000, Len(game): 66, Training Time: 4.082s, Prediction Time: 0.677s label KQLearningHJBCP, Reward 29: 77.000, Len(game): 77, Training Time: 4.082s, Prediction Time: 0.709s label KQLearningHJBCP, Reward 30: 67.000, Len(game): 67, Training Time: 4.082s, Prediction Time: 0.736s label KQLearningHJBCP, Reward 31: 61.000, Len(game): 61, Training Time: 4.082s, Prediction Time: 0.760s label KQLearningHJBCP, Reward 32: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.792s label KQLearningHJBCP, Reward 33: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.824s label KQLearningHJBCP, Reward 34: 80.000, Len(game): 80, Training Time: 4.082s, Prediction Time: 0.858s label KQLearningHJBCP, Reward 35: 90.000, Len(game): 90, Training Time: 4.082s, Prediction Time: 0.897s label KQLearningHJBCP, Reward 36: 100.000, Len(game): 100, Training Time: 4.082s, Prediction Time: 0.937s label KQLearningHJBCP, Reward 37: 83.000, Len(game): 83, Training Time: 4.082s, Prediction Time: 0.970s label KQLearningHJBCP, Reward 38: 74.000, Len(game): 74, Training Time: 4.082s, Prediction Time: 0.999s label KQLearningHJBCP, Reward 39: 80.000, Len(game): 80, Training Time: 4.082s, Prediction Time: 1.033s label KQLearningHJBCP, Reward 40: 75.000, Len(game): 75, Training Time: 4.082s, Prediction Time: 1.063s label KQLearningHJBCP, Reward 41: 56.000, Len(game): 56, Training Time: 4.082s, Prediction Time: 1.087s label KQLearningHJBCP, Reward 42: 85.000, Len(game): 85, Training Time: 4.082s, Prediction Time: 1.121s label KQLearningHJBCP, Reward 43: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 1.149s label KQLearningHJBCP, Reward 44: 88.000, Len(game): 88, Training Time: 4.082s, Prediction Time: 1.187s label KQLearningHJBCP, Reward 45: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 1.216s label KQLearningHJBCP, Reward 46: 46.000, Len(game): 46, Training Time: 4.082s, Prediction Time: 1.234s label KQLearningHJBCP, Reward 47: 106.000, Len(game): 106, Training Time: 4.082s, Prediction Time: 1.277s label KQLearningHJBCP, Reward 48: 83.000, Len(game): 83, Training Time: 4.082s, Prediction Time: 1.312s label KQLearningHJBCP, Reward 49: 85.000, Len(game): 85, Training Time: 4.082s, Prediction Time: 1.346s label KQLearningHJBCP, Reward 50: 68.000, Len(game): 68, Training Time: 4.082s, Prediction Time: 1.374s label KQLearningHJBCP, Reward 51: 68.000, Len(game): 68, Training Time: 4.082s, Prediction Time: 1.402s label KQLearningHJBCP, Reward 52: 84.000, Len(game): 84, Training Time: 4.082s, Prediction Time: 1.436s label KQLearningHJBCP, Reward 53: 94.000, Len(game): 94, Training Time: 4.082s, Prediction Time: 1.474s label KQLearningHJBCP, Reward 54: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 1.503s label KQLearningHJBCP, Reward 55: 86.000, Len(game): 86, Training Time: 4.082s, Prediction Time: 1.538s label KQLearningHJBCP, Reward 56: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 1.568s label KQLearningHJBCP, Reward 57: 68.000, Len(game): 68, Training Time: 4.082s, Prediction Time: 1.596s label KQLearningHJBCP, Reward 58: 51.000, Len(game): 51, Training Time: 4.082s, Prediction Time: 1.617s label KQLearningHJBCP, Reward 59: 45.000, Len(game): 45, Training Time: 4.082s, Prediction Time: 1.634s label KQLearningHJBCP, Reward 60: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 1.666s label KQLearningHJBCP, Reward 61: 84.000, Len(game): 84, Training Time: 4.082s, Prediction Time: 1.700s label KQLearningHJBCP, Reward 62: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 1.732s label KQLearningHJBCP, Reward 63: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 1.760s label KQLearningHJBCP, Reward 64: 101.000, Len(game): 101, Training Time: 4.082s, Prediction Time: 1.801s label KQLearningHJBCP, Reward 65: 78.000, Len(game): 78, Training Time: 4.082s, Prediction Time: 1.832s label KQLearningHJBCP, Reward 66: 90.000, Len(game): 90, Training Time: 4.082s, Prediction Time: 1.869s label KQLearningHJBCP, Reward 67: 107.000, Len(game): 107, Training Time: 4.082s, Prediction Time: 1.912s label KQLearningHJBCP, Reward 68: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 1.943s label KQLearningHJBCP, Reward 69: 67.000, Len(game): 67, Training Time: 4.082s, Prediction Time: 1.970s label KQLearningHJBCP, Reward 70: 64.000, Len(game): 64, Training Time: 4.082s, Prediction Time: 1.996s label KQLearningHJBCP, Reward 71: 69.000, Len(game): 69, Training Time: 4.082s, Prediction Time: 2.024s label KQLearningHJBCP, Reward 72: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 2.055s label KQLearningHJBCP, Reward 73: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 2.083s label KQLearningHJBCP, Reward 74: 78.000, Len(game): 78, Training Time: 4.082s, Prediction Time: 2.116s label KQLearningHJBCP, Reward 75: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 2.144s label KQLearningHJBCP, Reward 76: 94.000, Len(game): 94, Training Time: 4.082s, Prediction Time: 2.183s label KQLearningHJBCP, Reward 77: 50.000, Len(game): 50, Training Time: 4.082s, Prediction Time: 2.203s label KQLearningHJBCP, Reward 78: 79.000, Len(game): 79, Training Time: 4.082s, Prediction Time: 2.236s label KQLearningHJBCP, Reward 79: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 2.266s label KQLearningHJBCP, Reward 80: 96.000, Len(game): 96, Training Time: 4.082s, Prediction Time: 2.305s label KQLearningHJBCP, Reward 81: 77.000, Len(game): 77, Training Time: 4.082s, Prediction Time: 2.337s label KQLearningHJBCP, Reward 82: 74.000, Len(game): 74, Training Time: 4.082s, Prediction Time: 2.367s label KQLearningHJBCP, Reward 83: 42.000, Len(game): 42, Training Time: 4.082s, Prediction Time: 2.384s label KQLearningHJBCP, Reward 84: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 2.415s label KQLearningHJBCP, Reward 85: 51.000, Len(game): 51, Training Time: 4.082s, Prediction Time: 2.436s label KQLearningHJBCP, Reward 86: 77.000, Len(game): 77, Training Time: 4.082s, Prediction Time: 2.467s label KQLearningHJBCP, Reward 87: 71.000, Len(game): 71, Training Time: 4.082s, Prediction Time: 2.496s label KQLearningHJBCP, Reward 88: 73.000, Len(game): 73, Training Time: 4.082s, Prediction Time: 2.527s label KQLearningHJBCP, Reward 89: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 2.557s label KQLearningHJBCP, Reward 90: 47.000, Len(game): 47, Training Time: 4.082s, Prediction Time: 2.576s label KQLearningHJBCP, Reward 91: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 2.607s label KQLearningHJBCP, Reward 92: 47.000, Len(game): 47, Training Time: 4.082s, Prediction Time: 2.627s label KQLearningHJBCP, Reward 93: 58.000, Len(game): 58, Training Time: 4.082s, Prediction Time: 2.653s label KQLearningHJBCP, Reward 94: 80.000, Len(game): 80, Training Time: 4.082s, Prediction Time: 2.685s label KQLearningHJBCP, Reward 95: 77.000, Len(game): 77, Training Time: 4.082s, Prediction Time: 2.716s label KQLearningHJBCP, Reward 96: 66.000, Len(game): 66, Training Time: 4.082s, Prediction Time: 2.743s label KQLearningHJBCP, Reward 97: 71.000, Len(game): 71, Training Time: 4.082s, Prediction Time: 2.773s label KQLearningHJBCP, Reward 98: 80.000, Len(game): 80, Training Time: 4.082s, Prediction Time: 2.806s label KQLearningHJBCP, Reward 99: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 2.835s Computed global error Bellman mean: 2.2538068875251957e-08 iter: 2 label KQLearning, Reward 0: 17.000, Len(game): 17, Training Time: 0.005s, Prediction Time: 0.001s Computed global error Bellman mean: 2.1038873556891403e-08 iter: 3 label KQLearning, Reward 1: 19.000, Len(game): 19, Training Time: 0.011s, Prediction Time: 0.006s Computed global error Bellman mean: 3.3796859718234394e-08 iter: 4 label KQLearning, Reward 2: 36.000, Len(game): 36, Training Time: 0.027s, Prediction Time: 0.013s Computed global error Bellman mean: 0.005292865730735803 iter: 5 label KQLearning, Reward 3: 41.000, Len(game): 41, Training Time: 0.056s, Prediction Time: 0.021s Computed global error Bellman mean: 3.536763350335074e-08 iter: 5 label KQLearning, Reward 4: 22.000, Len(game): 22, Training Time: 0.099s, Prediction Time: 0.026s Computed global error Bellman mean: 7.233596841768994e-08 iter: 5 label KQLearning, Reward 5: 23.000, Len(game): 23, Training Time: 0.148s, Prediction Time: 0.031s Computed global error Bellman mean: 6.697590247687889e-05 iter: 5 label KQLearning, Reward 6: 57.000, Len(game): 57, Training Time: 0.224s, Prediction Time: 0.044s Computed global error Bellman mean: 6.971595679015014e-08 iter: 5 label KQLearning, Reward 7: 59.000, Len(game): 59, Training Time: 0.363s, Prediction Time: 0.059s Computed global error Bellman mean: 1.0071149827691969e-07 iter: 5 label KQLearning, Reward 8: 97.000, Len(game): 97, Training Time: 0.654s, Prediction Time: 0.087s Computed global error Bellman mean: 9.729623505213322e-08 iter: 5 label KQLearning, Reward 9: 71.000, Len(game): 71, Training Time: 1.035s, Prediction Time: 0.111s Computed global error Bellman mean: 1.228719115186594e-05 iter: 5 label KQLearning, Reward 10: 143.000, Len(game): 143, Training Time: 1.667s, Prediction Time: 0.163s Computed global error Bellman mean: 0.22382836200452905 iter: 5 label KQLearning, Reward 11: 198.000, Len(game): 198, Training Time: 2.817s, Prediction Time: 0.238s Computed global error Bellman mean: 0.09699429032149978 iter: 5 label KQLearning, Reward 12: 305.000, Len(game): 305, Training Time: 4.855s, Prediction Time: 0.387s label KQLearning, Reward 13: 370.000, Len(game): 370, Training Time: 4.855s, Prediction Time: 0.593s label KQLearning, Reward 14: 533.000, Len(game): 533, Training Time: 4.855s, Prediction Time: 0.895s label KQLearning, Reward 15: 168.000, Len(game): 168, Training Time: 4.855s, Prediction Time: 0.989s label KQLearning, Reward 16: 150.000, Len(game): 150, Training Time: 4.855s, Prediction Time: 1.072s label KQLearning, Reward 17: 222.000, Len(game): 222, Training Time: 4.855s, Prediction Time: 1.197s label KQLearning, Reward 18: 253.000, Len(game): 253, Training Time: 4.855s, Prediction Time: 1.341s label KQLearning, Reward 19: 217.000, Len(game): 217, Training Time: 4.855s, Prediction Time: 1.463s label KQLearning, Reward 20: 221.000, Len(game): 221, Training Time: 4.855s, Prediction Time: 1.587s label KQLearning, Reward 21: 147.000, Len(game): 147, Training Time: 4.855s, Prediction Time: 1.671s label KQLearning, Reward 22: 170.000, Len(game): 170, Training Time: 4.855s, Prediction Time: 1.768s label KQLearning, Reward 23: 155.000, Len(game): 155, Training Time: 4.855s, Prediction Time: 1.854s label KQLearning, Reward 24: 295.000, Len(game): 295, Training Time: 4.855s, Prediction Time: 2.020s label KQLearning, Reward 25: 153.000, Len(game): 153, Training Time: 4.855s, Prediction Time: 2.107s label KQLearning, Reward 26: 309.000, Len(game): 309, Training Time: 4.855s, Prediction Time: 2.281s label KQLearning, Reward 27: 157.000, Len(game): 157, Training Time: 4.855s, Prediction Time: 2.370s label KQLearning, Reward 28: 263.000, Len(game): 263, Training Time: 4.855s, Prediction Time: 2.520s label KQLearning, Reward 29: 175.000, Len(game): 175, Training Time: 4.855s, Prediction Time: 2.622s label KQLearning, Reward 30: 218.000, Len(game): 218, Training Time: 4.856s, Prediction Time: 2.744s label KQLearning, Reward 31: 177.000, Len(game): 177, Training Time: 4.856s, Prediction Time: 2.846s label KQLearning, Reward 32: 155.000, Len(game): 155, Training Time: 4.856s, Prediction Time: 2.934s label KQLearning, Reward 33: 174.000, Len(game): 174, Training Time: 4.856s, Prediction Time: 3.034s label KQLearning, Reward 34: 198.000, Len(game): 198, Training Time: 4.856s, Prediction Time: 3.146s label KQLearning, Reward 35: 176.000, Len(game): 176, Training Time: 4.856s, Prediction Time: 3.247s label KQLearning, Reward 36: 161.000, Len(game): 161, Training Time: 4.856s, Prediction Time: 3.344s label KQLearning, Reward 37: 176.000, Len(game): 176, Training Time: 4.856s, Prediction Time: 3.444s label KQLearning, Reward 38: 127.000, Len(game): 127, Training Time: 4.856s, Prediction Time: 3.516s label KQLearning, Reward 39: 207.000, Len(game): 207, Training Time: 4.856s, Prediction Time: 3.635s label KQLearning, Reward 40: 209.000, Len(game): 209, Training Time: 4.856s, Prediction Time: 3.754s label KQLearning, Reward 41: 127.000, Len(game): 127, Training Time: 4.856s, Prediction Time: 3.829s label KQLearning, Reward 42: 134.000, Len(game): 134, Training Time: 4.856s, Prediction Time: 3.909s label KQLearning, Reward 43: 115.000, Len(game): 115, Training Time: 4.856s, Prediction Time: 3.977s label KQLearning, Reward 44: 150.000, Len(game): 150, Training Time: 4.856s, Prediction Time: 4.066s label KQLearning, Reward 45: 197.000, Len(game): 197, Training Time: 4.856s, Prediction Time: 4.181s label KQLearning, Reward 46: 176.000, Len(game): 176, Training Time: 4.856s, Prediction Time: 4.282s label KQLearning, Reward 47: 233.000, Len(game): 233, Training Time: 4.856s, Prediction Time: 4.414s label KQLearning, Reward 48: 167.000, Len(game): 167, Training Time: 4.856s, Prediction Time: 4.509s label KQLearning, Reward 49: 181.000, Len(game): 181, Training Time: 4.856s, Prediction Time: 4.614s label KQLearning, Reward 50: 192.000, Len(game): 192, Training Time: 4.856s, Prediction Time: 4.721s label KQLearning, Reward 51: 225.000, Len(game): 225, Training Time: 4.856s, Prediction Time: 4.853s label KQLearning, Reward 52: 654.000, Len(game): 654, Training Time: 4.856s, Prediction Time: 5.228s label KQLearning, Reward 53: 217.000, Len(game): 217, Training Time: 4.856s, Prediction Time: 5.363s label KQLearning, Reward 54: 240.000, Len(game): 240, Training Time: 4.856s, Prediction Time: 5.515s label KQLearning, Reward 55: 232.000, Len(game): 232, Training Time: 4.856s, Prediction Time: 5.658s label KQLearning, Reward 56: 314.000, Len(game): 314, Training Time: 4.856s, Prediction Time: 5.855s label KQLearning, Reward 57: 219.000, Len(game): 219, Training Time: 4.856s, Prediction Time: 5.984s label KQLearning, Reward 58: 365.000, Len(game): 365, Training Time: 4.856s, Prediction Time: 6.192s label KQLearning, Reward 59: 157.000, Len(game): 157, Training Time: 4.856s, Prediction Time: 6.283s label KQLearning, Reward 60: 223.000, Len(game): 223, Training Time: 4.856s, Prediction Time: 6.409s label KQLearning, Reward 61: 383.000, Len(game): 383, Training Time: 4.856s, Prediction Time: 6.631s label KQLearning, Reward 62: 259.000, Len(game): 259, Training Time: 4.856s, Prediction Time: 6.780s label KQLearning, Reward 63: 212.000, Len(game): 212, Training Time: 4.856s, Prediction Time: 6.903s label KQLearning, Reward 64: 144.000, Len(game): 144, Training Time: 4.856s, Prediction Time: 6.986s label KQLearning, Reward 65: 155.000, Len(game): 155, Training Time: 4.856s, Prediction Time: 7.076s label KQLearning, Reward 66: 181.000, Len(game): 181, Training Time: 4.856s, Prediction Time: 7.178s label KQLearning, Reward 67: 243.000, Len(game): 243, Training Time: 4.856s, Prediction Time: 7.318s label KQLearning, Reward 68: 147.000, Len(game): 147, Training Time: 4.856s, Prediction Time: 7.401s label KQLearning, Reward 69: 157.000, Len(game): 157, Training Time: 4.856s, Prediction Time: 7.490s label KQLearning, Reward 70: 279.000, Len(game): 279, Training Time: 4.856s, Prediction Time: 7.649s label KQLearning, Reward 71: 155.000, Len(game): 155, Training Time: 4.856s, Prediction Time: 7.738s label KQLearning, Reward 72: 160.000, Len(game): 160, Training Time: 4.856s, Prediction Time: 7.830s label KQLearning, Reward 73: 142.000, Len(game): 142, Training Time: 4.856s, Prediction Time: 7.910s label KQLearning, Reward 74: 298.000, Len(game): 298, Training Time: 4.856s, Prediction Time: 8.080s label KQLearning, Reward 75: 175.000, Len(game): 175, Training Time: 4.856s, Prediction Time: 8.180s label KQLearning, Reward 76: 194.000, Len(game): 194, Training Time: 4.856s, Prediction Time: 8.290s label KQLearning, Reward 77: 147.000, Len(game): 147, Training Time: 4.856s, Prediction Time: 8.373s label KQLearning, Reward 78: 165.000, Len(game): 165, Training Time: 4.856s, Prediction Time: 8.466s label KQLearning, Reward 79: 155.000, Len(game): 155, Training Time: 4.856s, Prediction Time: 8.553s label KQLearning, Reward 80: 202.000, Len(game): 202, Training Time: 4.856s, Prediction Time: 8.665s label KQLearning, Reward 81: 158.000, Len(game): 158, Training Time: 4.856s, Prediction Time: 8.756s label KQLearning, Reward 82: 279.000, Len(game): 279, Training Time: 4.856s, Prediction Time: 8.915s label KQLearning, Reward 83: 218.000, Len(game): 218, Training Time: 4.856s, Prediction Time: 9.039s label KQLearning, Reward 84: 200.000, Len(game): 200, Training Time: 4.856s, Prediction Time: 9.152s label KQLearning, Reward 85: 210.000, Len(game): 210, Training Time: 4.856s, Prediction Time: 9.270s label KQLearning, Reward 86: 171.000, Len(game): 171, Training Time: 4.856s, Prediction Time: 9.374s label KQLearning, Reward 87: 187.000, Len(game): 187, Training Time: 4.856s, Prediction Time: 9.482s label KQLearning, Reward 88: 144.000, Len(game): 144, Training Time: 4.856s, Prediction Time: 9.564s label KQLearning, Reward 89: 185.000, Len(game): 185, Training Time: 4.856s, Prediction Time: 9.669s label KQLearning, Reward 90: 133.000, Len(game): 133, Training Time: 4.856s, Prediction Time: 9.744s label KQLearning, Reward 91: 160.000, Len(game): 160, Training Time: 4.856s, Prediction Time: 9.834s label KQLearning, Reward 92: 161.000, Len(game): 161, Training Time: 4.856s, Prediction Time: 9.926s label KQLearning, Reward 93: 211.000, Len(game): 211, Training Time: 4.856s, Prediction Time: 10.046s label KQLearning, Reward 94: 164.000, Len(game): 164, Training Time: 4.856s, Prediction Time: 10.137s label KQLearning, Reward 95: 339.000, Len(game): 339, Training Time: 4.856s, Prediction Time: 10.328s label KQLearning, Reward 96: 175.000, Len(game): 175, Training Time: 4.856s, Prediction Time: 10.426s label KQLearning, Reward 97: 138.000, Len(game): 138, Training Time: 4.856s, Prediction Time: 10.504s label KQLearning, Reward 98: 151.000, Len(game): 151, Training Time: 4.856s, Prediction Time: 10.589s label KQLearning, Reward 99: 167.000, Len(game): 167, Training Time: 4.856s, Prediction Time: 10.684s 2 .. rst-class:: sphx-glr-timing **Total running time of the script:** (4 minutes 8.002 seconds) .. _sphx_glr_download_auto_ch8_ch8_cartpole.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: ch8_cartpole.ipynb ` .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: ch8_cartpole.py ` .. container:: sphx-glr-download sphx-glr-download-zip :download:`Download zipped: ch8_cartpole.zip ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_