.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_ch8\ch8_cartpole.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_ch8_ch8_cartpole.py>`
        to download the full example code.

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_ch8_ch8_cartpole.py:


==========================
8.2 Experiments - Cartpole
==========================
We use the OpenAI Gym library to instanciate the gymnasium CartPole-v1 environment and reproduce the figure from chapter 8_XXX. 

We train the following agents: 

- PPO 
- DQN 
- Controller-based
- Kernel Actor-Critic
- Kernel Q-Learning
- Kernel Q-Learning HJB
- Kernel Policy-Gradient

We show how you can tweak some methods in each algorithm to tune them to the environment. For a detailed documentation on KAgents, see **codpy documentation**.

.. GENERATED FROM PYTHON SOURCE LINES 19-31

.. code-block:: Python


    # Importing necessary modules
    import sys

    from matplotlib import pyplot as plt
    import numpy as np

    import codpy.core as core
    import codpy.KQLearning as KQLearning

    from ignore_utils import * 


.. GENERATED FROM PYTHON SOURCE LINES 32-34

KQLearning
------------------------

.. GENERATED FROM PYTHON SOURCE LINES 34-83

.. code-block:: Python

    class KQLearningCP(KQLearning.KQLearning):

        def format(self, sarsd, max_training_game_size=None, **kwargs):
            """
            In Cartpole, we only want to keep a certain amount of timesteps for each episode. The original format approach keep all the data.
            """
            states, actions, next_states, rewards, dones = [
                core.get_matrix(e) for e in sarsd
            ]

            actions = KQLearning.rl_hot_encoder(actions, self.actions_dim)
            returns = self.compute_returns(
                states, actions, next_states, rewards, dones, **kwargs
            )
            dones = core.get_matrix(dones, dtype=bool)
            if max_training_game_size is not None:
                states, actions, next_states, rewards, returns, dones = (
                    states[:max_training_game_size],
                    actions[:max_training_game_size],
                    next_states[:max_training_game_size],
                    rewards[:max_training_game_size],
                    returns[:max_training_game_size],
                    dones[:max_training_game_size],
                )

            return states, actions, next_states, rewards, returns, dones

        def train(self, game, max_training_game_size =sys.maxsize,tol=1e-4,**kwargs):
            """
            In cartpole we don't want clustering so we override the train method. 
            """
            states, actions, next_states, rewards, dones = game

            # In cartpole we skip training if we already solved the environment.
            if len(states) >= kwargs.get("max_game", 1e12):
                print("no training")
                return
            states, actions, next_states, rewards, returns, dones = self.format(game, max_training_game_size=max_training_game_size,**kwargs)
            if self.critic.is_valid():
                returns = self.critic(np.concatenate([states,actions],axis=1))

            self.replay_buffer.push(states, actions, next_states, rewards, returns, dones)
            games = self.replay_buffer.memory

            # self.critic here is a kernel, and it fit on the entire replay buffer to solve for Bellman equations.
            self.critic = self.optimal_states_values_function(games,verbose=True,**kwargs)        
            return 


.. GENERATED FROM PYTHON SOURCE LINES 84-86

PolicyGradient
------------------------

.. GENERATED FROM PYTHON SOURCE LINES 86-120

.. code-block:: Python


    class PolicyGradientCP(KQLearning.PolicyGradient):

        def format(self, sarsd, max_training_game_size=None, **kwargs):
            states, actions, next_states, rewards, dones = [
                core.get_matrix(e) for e in sarsd
            ]

            actions = KQLearning.rl_hot_encoder(actions, self.actions_dim)
            returns = self.compute_returns(
                states, actions, next_states, rewards, dones, **kwargs
            )
            dones = core.get_matrix(dones, dtype=bool)
            if max_training_game_size is not None:
                states, actions, next_states, rewards, returns, dones = (
                    states[:max_training_game_size],
                    actions[:max_training_game_size],
                    next_states[:max_training_game_size],
                    rewards[:max_training_game_size],
                    returns[:max_training_game_size],
                    dones[:max_training_game_size],
                )

            return states, actions, next_states, rewards, returns, dones


        def train(self, game, **kwargs):
            states, actions, next_states, rewards, dones = game
            if len(states) >= kwargs.get("max_game", 1e12):
                print("no training")
                return
            super().train(game,clip=1., **kwargs)
 
    
.. GENERATED FROM PYTHON SOURCE LINES 121-123

KActorCritic
------------------------

.. GENERATED FROM PYTHON SOURCE LINES 123-167

.. code-block:: Python

    class KActorCriticCP(KQLearning.KActorCritic):

        def format(self, sarsd, max_training_game_size=None, **kwargs):
            """
            Format the game data by keeping only up to max_trainin_game_size timesteps. 

            Parameters:
            - sarsd: tuple collection of game data (states, actions, next_states, rewards, dones).
            - max_training_game_size: maximum number of timesteps to keep for training.

            Returns:
            - states, actions, next_states, rewards, returns, dones: formatted game data.
            """
            states, actions, next_states, rewards, dones = [
                core.get_matrix(e) for e in sarsd
            ]

            actions = KQLearning.rl_hot_encoder(actions, self.actions_dim)
            returns = self.compute_returns(
                states, actions, next_states, rewards, dones, **kwargs
            )
            dones = core.get_matrix(dones, dtype=bool)
            if max_training_game_size is not None:
                states, actions, next_states, rewards, returns, dones = (
                    states[:max_training_game_size],
                    actions[:max_training_game_size],
                    next_states[:max_training_game_size],
                    rewards[:max_training_game_size],
                    returns[:max_training_game_size],
                    dones[:max_training_game_size],
                )

            return states, actions, next_states, rewards, returns, dones

        def train(self, game, **kwargs):
            """
            Skips training if the game was too long. (for cartpole, this means we already solved the environment.)
            """
            states, actions, next_states, rewards, dones = game
            if len(states) >= kwargs.get("max_game", 1e12):
                print("no training")
                return
            super().train(game, clip=1.,**kwargs)


.. GENERATED FROM PYTHON SOURCE LINES 168-170

HJB
------------------------

.. GENERATED FROM PYTHON SOURCE LINES 170-215

.. code-block:: Python

    
    class KQLearningHJBCP(KQLearning.KQLearningHJB):

        def format(self, sarsd, max_training_game_size=None, **kwargs):
            states, actions, next_states, rewards, dones = [
                core.get_matrix(e) for e in sarsd
            ]

            actions = KQLearning.rl_hot_encoder(actions, self.actions_dim)
            returns = self.compute_returns(
                states, actions, next_states, rewards, dones, **kwargs
            )
            dones = core.get_matrix(dones, dtype=bool)
            if max_training_game_size is not None:
                states, actions, next_states, rewards, returns, dones = (
                    states[:max_training_game_size],
                    actions[:max_training_game_size],
                    next_states[:max_training_game_size],
                    rewards[:max_training_game_size],
                    returns[:max_training_game_size],
                    dones[:max_training_game_size],
                )

            return states, actions, next_states, rewards, returns, dones


        def train(self, game, max_training_game_size =sys.maxsize,tol=1e-4,**kwargs):
            states, actions, next_states, rewards, dones = game

            if len(states) >= kwargs.get("max_game", 1e12):
                print("no training")
                return
            states, actions, next_states, rewards, returns, dones = self.format(game, max_training_game_size=max_training_game_size,**kwargs)

            self.replay_buffer.push(states, actions, next_states, rewards, returns, dones)
            games = self.replay_buffer.memory
            states, actions, next_states, rewards, returns, dones = games
            if self.critic.is_valid(): #This function returns False if the kernel hasn't be properly initialized, i.e x and fx haven't been set.
                # We compute returns using the critic instead of MC returns.
                returns = self.critic(np.concatenate([states,actions],axis=1))
                games = states, actions, next_states, rewards, returns, dones
        
            self.critic = self.optimal_states_values_function(games,verbose=True,**kwargs)        
            return 
    

.. GENERATED FROM PYTHON SOURCE LINES 216-218

KController
------------------------

.. GENERATED FROM PYTHON SOURCE LINES 218-387

.. code-block:: Python

    class heuristic_ControllerCP:
        """
        This class defines an expert-based heuristic controller for the CartPole environment.
        """
        # This is the number of parameters to be optimized
        dim = 4

        def __init__(self, w=None, **kwargs):
            if w is None:
                self.w = np.ones([self.dim]) * 0.5
            else:
                self.w = w
            pass

        def get_distribution(self):
            """
            This will be called by the optimizer. You need to define a way to sample from the parameters distribution, and get the support. 
            """
            class uniform:
                def __init__(self, shape1):
                    self.shape1 = shape1

                def __call__(self, n):
                    return 2 * np.random.uniform(size=[n, self.shape1]) - 1

                def support(self, v):
                    return v

            return uniform(self.w.shape[0])

        def get_thetas(self):
            return self.w

        def set_thetas(self, w):
            self.w = w.flatten()

        def __call__(self, s, **kwargs):
            """
            Will be used to make inference. This is where you define the action to be taken. 

            Parameters: 
            - s : state of the environment, a numpy array of shape (n, state_dim).

            Returns: 
            - prod: int, action to be taken
            """
            prod = (self.w * s).sum()
            prod = int((np.sign(prod) + 1) / 2)
            return prod
    
    class KControllerCP(KQLearning.KController):
        """
        This is the main class which will optimize the heuristic controller. 
        """
        def __init__(self, state_dim, actions_dim, **kwargs):
            # This is where you would pass any other custom controller
            controller = heuristic_ControllerCP(state_dim=state_dim, **kwargs)
            super().__init__(state_dim, actions_dim, controller, **kwargs)

        def get_function(self, **kwargs):
            """
            The optimizer will find the best parameters which maximizes this function. 

            This is where you would tweak the function to be maximized.
            """
            self.expectation_estimator = self.get_expectation_estimator(self.x, self.y, **kwargs)
            def function(x):
                expectation = self.expectation_estimator(x)
                distance = self.expectation_estimator.distance(x)
                return expectation * distance
            return function 


        def format(self, sarsd, **kwargs):
            """
            In the case of the controller, the agent only sees the sum of the rewards for an entire episode. 
            All other game data won't be used for training. The format function still need to output a tuple. 
            """
            state, action, next_state, reward, done = [
                core.get_matrix(e) for e in sarsd
            ]
            reward[done.astype(bool)] = 0

            action = KQLearning.rl_hot_encoder(action, self.actions_dim)
            action = core.get_matrix(self.controller.get_thetas()).T
            done = core.get_matrix(done, dtype=bool)
            return (
                core.get_matrix(state.mean(axis=0)).T,
                core.get_matrix(action.mean(axis=0)).T,
                core.get_matrix(next_state.mean(axis=0)).T,
                core.get_matrix(reward.sum(axis=0)).T,
                core.get_matrix(done.mean(axis=0)).T,
            )

        def train(self, game, **kwargs):
            # Similarily, you can skip training if the game is too long to save training time.
            states, actions, next_states, rewards, dones = game
            if len(states) >= kwargs.get("max_game", 1e12):
                print("no training")
                return
            super().train(game, **kwargs)
        
    if __name__ == "__main__":
        # Define agents here, which will be trained in the benchmark. If game_dictionnary is empty, the benchmark will try to load data from the .pkl file
        game_dictionary = {
            "PPOAgent": PPOAgent,
            "PolicyGradient": PolicyGradientCP,
            "Controller-based": KControllerCP,
            "KACAgent": KActorCriticCP,
            "DQNAgent": DQNAgent,
            "KQLearningHJBCP": KQLearningHJBCP,
            "KQLearning": KQLearningCP,
        }

        # Define your agent's parameters here. This dict will be passed in each agent's __init__() method.
        extras = {
            # "D":4,
            "KActor": {"n_batch": 1000000, "max_nystrom": 1000, "reg": 1e-9, "order": None},
            "KCritic": {
                "n_batch": 1000000,
                "max_nystrom": 1000,
                "reg": 1e-9,
                "order": None,
            },
            "Rewards": {
                "n_batch": 1000000,
                "max_nystrom": 1000,
                "reg": 1e-9,
                "order": None,
            },
            "DQNAgent": {
                # 'reward_function': mc_reward_function,
                "episodes": 500,
                "policy_param": 64,
                "target_param": 64,
            },
            "KController": {
                "reg": 1e-3,
                "order": None,
            },
            "HJBModel": {
                # "latent_shape":[100,50],
                "max_size": 100000,
                "n_batch": 1000000,
                "max_nystrom": 1000,
                "reg": 1e-9,
                "order": None,
                "state_dim": 4,
            },
            "max_game": 1000,
            "max_training_game_size": 1000,
            "gamma": 0.99,
            "capacity": 200000000,
            # "seed": 42,
        }
        seed = extras.get("seed", None)
        np.random.seed(seed)

        Benchmark()(
            game_dictionary,
            "CartPole-v1",
            num_games=100,
            num_repeats=3,
            max_time=3,
            axis="episode",
            # file_name="results_CP_final.pkl",
            **extras,
        )
        plt.show()
        pass


.. rst-class:: sphx-glr-horizontal


    *

      .. image-sg:: /auto_ch8/images/sphx_glr_ch8_cartpole_001.png
         :alt: Cumulative Reward over 100 Games
         :srcset: /auto_ch8/images/sphx_glr_ch8_cartpole_001.png
         :class: sphx-glr-multi-img

    *

      .. image-sg:: /auto_ch8/images/sphx_glr_ch8_cartpole_002.png
         :alt: Training Time per Game over 100 Games
         :srcset: /auto_ch8/images/sphx_glr_ch8_cartpole_002.png
         :class: sphx-glr-multi-img


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    label PPOAgent, Reward 0: 27.000, Len(game): 27, Training Time: 0.006s, Prediction Time: 0.006s
    label PPOAgent, Reward 1: 25.000, Len(game): 25, Training Time: 0.010s, Prediction Time: 0.010s
    label PPOAgent, Reward 2: 16.000, Len(game): 16, Training Time: 0.013s, Prediction Time: 0.013s
    label PPOAgent, Reward 3: 13.000, Len(game): 13, Training Time: 0.016s, Prediction Time: 0.016s
    label PPOAgent, Reward 4: 15.000, Len(game): 15, Training Time: 0.019s, Prediction Time: 0.019s
    label PPOAgent, Reward 5: 12.000, Len(game): 12, Training Time: 0.021s, Prediction Time: 0.021s
    label PPOAgent, Reward 6: 17.000, Len(game): 17, Training Time: 0.024s, Prediction Time: 0.024s
    label PPOAgent, Reward 7: 33.000, Len(game): 33, Training Time: 0.030s, Prediction Time: 0.030s
    label PPOAgent, Reward 8: 16.000, Len(game): 16, Training Time: 0.032s, Prediction Time: 0.032s
    label PPOAgent, Reward 9: 27.000, Len(game): 27, Training Time: 0.037s, Prediction Time: 0.037s
    label PPOAgent, Reward 10: 12.000, Len(game): 12, Training Time: 0.040s, Prediction Time: 0.040s
    label PPOAgent, Reward 11: 14.000, Len(game): 14, Training Time: 0.042s, Prediction Time: 0.042s
    label PPOAgent, Reward 12: 12.000, Len(game): 12, Training Time: 0.045s, Prediction Time: 0.045s
    label PPOAgent, Reward 13: 20.000, Len(game): 20, Training Time: 0.048s, Prediction Time: 0.048s
    label PPOAgent, Reward 14: 16.000, Len(game): 16, Training Time: 0.052s, Prediction Time: 0.052s
    label PPOAgent, Reward 15: 37.000, Len(game): 37, Training Time: 0.058s, Prediction Time: 0.058s
    label PPOAgent, Reward 16: 25.000, Len(game): 25, Training Time: 0.063s, Prediction Time: 0.063s
    label PPOAgent, Reward 17: 10.000, Len(game): 10, Training Time: 0.065s, Prediction Time: 0.065s
    label PPOAgent, Reward 18: 17.000, Len(game): 17, Training Time: 0.068s, Prediction Time: 0.068s
    label PPOAgent, Reward 19: 17.000, Len(game): 17, Training Time: 0.072s, Prediction Time: 0.072s
    label PPOAgent, Reward 20: 18.000, Len(game): 18, Training Time: 0.075s, Prediction Time: 0.075s
    label PPOAgent, Reward 21: 16.000, Len(game): 16, Training Time: 0.078s, Prediction Time: 0.078s
    label PPOAgent, Reward 22: 15.000, Len(game): 15, Training Time: 0.081s, Prediction Time: 0.081s
    label PPOAgent, Reward 23: 14.000, Len(game): 14, Training Time: 0.083s, Prediction Time: 0.083s
    label PPOAgent, Reward 24: 15.000, Len(game): 15, Training Time: 0.087s, Prediction Time: 0.087s
    label PPOAgent, Reward 25: 21.000, Len(game): 21, Training Time: 0.091s, Prediction Time: 0.091s
    label PPOAgent, Reward 26: 20.000, Len(game): 20, Training Time: 0.094s, Prediction Time: 0.094s
    label PPOAgent, Reward 27: 11.000, Len(game): 11, Training Time: 0.096s, Prediction Time: 0.096s
    label PPOAgent, Reward 28: 12.000, Len(game): 12, Training Time: 0.099s, Prediction Time: 0.099s
    label PPOAgent, Reward 29: 19.000, Len(game): 19, Training Time: 0.102s, Prediction Time: 0.102s
    label PPOAgent, Reward 30: 10.000, Len(game): 10, Training Time: 0.104s, Prediction Time: 0.104s
    label PPOAgent, Reward 31: 17.000, Len(game): 17, Training Time: 0.108s, Prediction Time: 0.108s
    label PPOAgent, Reward 32: 29.000, Len(game): 29, Training Time: 0.113s, Prediction Time: 0.113s
    label PPOAgent, Reward 33: 15.000, Len(game): 15, Training Time: 0.117s, Prediction Time: 0.117s
    label PPOAgent, Reward 34: 15.000, Len(game): 15, Training Time: 0.119s, Prediction Time: 0.119s
    label PPOAgent, Reward 35: 15.000, Len(game): 15, Training Time: 0.122s, Prediction Time: 0.122s
    label PPOAgent, Reward 36: 32.000, Len(game): 32, Training Time: 0.128s, Prediction Time: 0.128s
    label PPOAgent, Reward 37: 22.000, Len(game): 22, Training Time: 0.133s, Prediction Time: 0.133s
    label PPOAgent, Reward 38: 19.000, Len(game): 19, Training Time: 0.137s, Prediction Time: 0.137s
    label PPOAgent, Reward 39: 12.000, Len(game): 12, Training Time: 0.138s, Prediction Time: 0.138s
    label PPOAgent, Reward 40: 16.000, Len(game): 16, Training Time: 0.141s, Prediction Time: 0.141s
    label PPOAgent, Reward 41: 12.000, Len(game): 12, Training Time: 0.143s, Prediction Time: 0.143s
    label PPOAgent, Reward 42: 31.000, Len(game): 31, Training Time: 0.150s, Prediction Time: 0.150s
    label PPOAgent, Reward 43: 12.000, Len(game): 12, Training Time: 0.151s, Prediction Time: 0.151s
    label PPOAgent, Reward 44: 10.000, Len(game): 10, Training Time: 0.153s, Prediction Time: 0.153s
    label PPOAgent, Reward 45: 50.000, Len(game): 50, Training Time: 0.162s, Prediction Time: 0.162s
    label PPOAgent, Reward 46: 17.000, Len(game): 17, Training Time: 0.165s, Prediction Time: 0.165s
    label PPOAgent, Reward 47: 28.000, Len(game): 28, Training Time: 0.172s, Prediction Time: 0.172s
    label PPOAgent, Reward 48: 19.000, Len(game): 19, Training Time: 0.175s, Prediction Time: 0.175s
    label PPOAgent, Reward 49: 13.000, Len(game): 13, Training Time: 0.178s, Prediction Time: 0.178s
    label PPOAgent, Reward 50: 20.000, Len(game): 20, Training Time: 0.182s, Prediction Time: 0.182s
    label PPOAgent, Reward 51: 9.000, Len(game): 9, Training Time: 0.184s, Prediction Time: 0.184s
    label PPOAgent, Reward 52: 16.000, Len(game): 16, Training Time: 0.188s, Prediction Time: 0.188s
    label PPOAgent, Reward 53: 42.000, Len(game): 42, Training Time: 0.195s, Prediction Time: 0.195s
    label PPOAgent, Reward 54: 23.000, Len(game): 23, Training Time: 0.200s, Prediction Time: 0.200s
    label PPOAgent, Reward 55: 9.000, Len(game): 9, Training Time: 0.202s, Prediction Time: 0.202s
    label PPOAgent, Reward 56: 19.000, Len(game): 19, Training Time: 0.206s, Prediction Time: 0.206s
    label PPOAgent, Reward 57: 12.000, Len(game): 12, Training Time: 0.208s, Prediction Time: 0.208s
    label PPOAgent, Reward 58: 35.000, Len(game): 35, Training Time: 0.215s, Prediction Time: 0.215s
    label PPOAgent, Reward 59: 18.000, Len(game): 18, Training Time: 0.218s, Prediction Time: 0.218s
    label PPOAgent, Reward 60: 17.000, Len(game): 17, Training Time: 0.221s, Prediction Time: 0.221s
    label PPOAgent, Reward 61: 18.000, Len(game): 18, Training Time: 0.225s, Prediction Time: 0.225s
    label PPOAgent, Reward 62: 15.000, Len(game): 15, Training Time: 0.228s, Prediction Time: 0.228s
    label PPOAgent, Reward 63: 24.000, Len(game): 24, Training Time: 0.273s, Prediction Time: 0.273s
    label PPOAgent, Reward 64: 79.000, Len(game): 79, Training Time: 0.288s, Prediction Time: 0.288s
    label PPOAgent, Reward 65: 46.000, Len(game): 46, Training Time: 0.296s, Prediction Time: 0.296s
    label PPOAgent, Reward 66: 9.000, Len(game): 9, Training Time: 0.298s, Prediction Time: 0.298s
    label PPOAgent, Reward 67: 37.000, Len(game): 37, Training Time: 0.305s, Prediction Time: 0.305s
    label PPOAgent, Reward 68: 45.000, Len(game): 45, Training Time: 0.314s, Prediction Time: 0.314s
    label PPOAgent, Reward 69: 13.000, Len(game): 13, Training Time: 0.316s, Prediction Time: 0.316s
    label PPOAgent, Reward 70: 30.000, Len(game): 30, Training Time: 0.322s, Prediction Time: 0.322s
    label PPOAgent, Reward 71: 9.000, Len(game): 9, Training Time: 0.324s, Prediction Time: 0.324s
    label PPOAgent, Reward 72: 27.000, Len(game): 27, Training Time: 0.330s, Prediction Time: 0.330s
    label PPOAgent, Reward 73: 10.000, Len(game): 10, Training Time: 0.332s, Prediction Time: 0.332s
    label PPOAgent, Reward 74: 32.000, Len(game): 32, Training Time: 0.339s, Prediction Time: 0.339s
    label PPOAgent, Reward 75: 22.000, Len(game): 22, Training Time: 0.343s, Prediction Time: 0.343s
    label PPOAgent, Reward 76: 13.000, Len(game): 13, Training Time: 0.345s, Prediction Time: 0.345s
    label PPOAgent, Reward 77: 28.000, Len(game): 28, Training Time: 0.351s, Prediction Time: 0.351s
    label PPOAgent, Reward 78: 11.000, Len(game): 11, Training Time: 0.353s, Prediction Time: 0.353s
    label PPOAgent, Reward 79: 20.000, Len(game): 20, Training Time: 0.357s, Prediction Time: 0.357s
    label PPOAgent, Reward 80: 33.000, Len(game): 33, Training Time: 0.362s, Prediction Time: 0.362s
    label PPOAgent, Reward 81: 13.000, Len(game): 13, Training Time: 0.366s, Prediction Time: 0.366s
    label PPOAgent, Reward 82: 16.000, Len(game): 16, Training Time: 0.369s, Prediction Time: 0.369s
    label PPOAgent, Reward 83: 19.000, Len(game): 19, Training Time: 0.373s, Prediction Time: 0.373s
    label PPOAgent, Reward 84: 15.000, Len(game): 15, Training Time: 0.376s, Prediction Time: 0.376s
    label PPOAgent, Reward 85: 33.000, Len(game): 33, Training Time: 0.383s, Prediction Time: 0.383s
    label PPOAgent, Reward 86: 25.000, Len(game): 25, Training Time: 0.388s, Prediction Time: 0.388s
    label PPOAgent, Reward 87: 27.000, Len(game): 27, Training Time: 0.392s, Prediction Time: 0.392s
    label PPOAgent, Reward 88: 26.000, Len(game): 26, Training Time: 0.398s, Prediction Time: 0.398s
    label PPOAgent, Reward 89: 40.000, Len(game): 40, Training Time: 0.405s, Prediction Time: 0.405s
    label PPOAgent, Reward 90: 41.000, Len(game): 41, Training Time: 0.413s, Prediction Time: 0.413s
    label PPOAgent, Reward 91: 14.000, Len(game): 14, Training Time: 0.415s, Prediction Time: 0.415s
    label PPOAgent, Reward 92: 69.000, Len(game): 69, Training Time: 0.428s, Prediction Time: 0.428s
    label PPOAgent, Reward 93: 99.000, Len(game): 99, Training Time: 0.447s, Prediction Time: 0.447s
    label PPOAgent, Reward 94: 18.000, Len(game): 18, Training Time: 0.450s, Prediction Time: 0.450s
    label PPOAgent, Reward 95: 20.000, Len(game): 20, Training Time: 0.454s, Prediction Time: 0.454s
    label PPOAgent, Reward 96: 63.000, Len(game): 63, Training Time: 0.464s, Prediction Time: 0.464s
    label PPOAgent, Reward 97: 16.000, Len(game): 16, Training Time: 0.468s, Prediction Time: 0.468s
    label PPOAgent, Reward 98: 20.000, Len(game): 20, Training Time: 0.472s, Prediction Time: 0.472s
    label PPOAgent, Reward 99: 14.000, Len(game): 14, Training Time: 0.474s, Prediction Time: 0.474s
    label PolicyGradient, Reward 0: 25.000, Len(game): 25, Training Time: 0.005s, Prediction Time: 0.000s
    label PolicyGradient, Reward 1: 30.000, Len(game): 30, Training Time: 0.010s, Prediction Time: 0.005s
    label PolicyGradient, Reward 2: 12.000, Len(game): 12, Training Time: 0.016s, Prediction Time: 0.008s
    label PolicyGradient, Reward 3: 62.000, Len(game): 62, Training Time: 0.027s, Prediction Time: 0.019s
    label PolicyGradient, Reward 4: 17.000, Len(game): 17, Training Time: 0.036s, Prediction Time: 0.025s
    label PolicyGradient, Reward 5: 12.000, Len(game): 12, Training Time: 0.047s, Prediction Time: 0.029s
    label PolicyGradient, Reward 6: 17.000, Len(game): 17, Training Time: 0.059s, Prediction Time: 0.035s
    label PolicyGradient, Reward 7: 32.000, Len(game): 32, Training Time: 0.072s, Prediction Time: 0.044s
    label PolicyGradient, Reward 8: 12.000, Len(game): 12, Training Time: 0.086s, Prediction Time: 0.049s
    label PolicyGradient, Reward 9: 39.000, Len(game): 39, Training Time: 0.104s, Prediction Time: 0.061s
    label PolicyGradient, Reward 10: 18.000, Len(game): 18, Training Time: 0.126s, Prediction Time: 0.070s
    label PolicyGradient, Reward 11: 14.000, Len(game): 14, Training Time: 0.147s, Prediction Time: 0.079s
    label PolicyGradient, Reward 12: 41.000, Len(game): 41, Training Time: 0.175s, Prediction Time: 0.096s
    label PolicyGradient, Reward 13: 55.000, Len(game): 55, Training Time: 0.216s, Prediction Time: 0.117s
    label PolicyGradient, Reward 14: 10.000, Len(game): 10, Training Time: 0.254s, Prediction Time: 0.132s
    label PolicyGradient, Reward 15: 18.000, Len(game): 18, Training Time: 0.298s, Prediction Time: 0.150s
    label PolicyGradient, Reward 16: 12.000, Len(game): 12, Training Time: 0.343s, Prediction Time: 0.166s
    label PolicyGradient, Reward 17: 89.000, Len(game): 89, Training Time: 0.405s, Prediction Time: 0.206s
    label PolicyGradient, Reward 18: 11.000, Len(game): 11, Training Time: 0.464s, Prediction Time: 0.229s
    label PolicyGradient, Reward 19: 94.000, Len(game): 94, Training Time: 0.546s, Prediction Time: 0.276s
    label PolicyGradient, Reward 20: 26.000, Len(game): 26, Training Time: 0.636s, Prediction Time: 0.317s
    label PolicyGradient, Reward 21: 62.000, Len(game): 62, Training Time: 0.737s, Prediction Time: 0.367s
    label PolicyGradient, Reward 22: 77.000, Len(game): 77, Training Time: 0.852s, Prediction Time: 0.428s
    label PolicyGradient, Reward 23: 13.000, Len(game): 13, Training Time: 0.972s, Prediction Time: 0.478s
    label PolicyGradient, Reward 24: 97.000, Len(game): 97, Training Time: 1.125s, Prediction Time: 0.563s
    label PolicyGradient, Reward 25: 108.000, Len(game): 108, Training Time: 1.319s, Prediction Time: 0.665s
    label PolicyGradient, Reward 26: 22.000, Len(game): 22, Training Time: 1.512s, Prediction Time: 0.742s
    label PolicyGradient, Reward 27: 102.000, Len(game): 102, Training Time: 1.718s, Prediction Time: 0.863s
    label PolicyGradient, Reward 28: 109.000, Len(game): 109, Training Time: 1.996s, Prediction Time: 0.999s
    label PolicyGradient, Reward 29: 117.000, Len(game): 117, Training Time: 2.310s, Prediction Time: 1.162s
    label PolicyGradient, Reward 30: 16.000, Len(game): 16, Training Time: 2.625s, Prediction Time: 1.292s
    label PolicyGradient, Reward 31: 149.000, Len(game): 149, Training Time: 2.999s, Prediction Time: 1.518s
    label PolicyGradient, Reward 32: 105.000, Len(game): 105, Training Time: 3.434s, Prediction Time: 1.740s
    label PolicyGradient, Reward 33: 153.000, Len(game): 153, Training Time: 3.434s, Prediction Time: 2.021s
    label PolicyGradient, Reward 34: 117.000, Len(game): 117, Training Time: 3.434s, Prediction Time: 2.108s
    label PolicyGradient, Reward 35: 12.000, Len(game): 12, Training Time: 3.434s, Prediction Time: 2.116s
    label PolicyGradient, Reward 36: 265.000, Len(game): 265, Training Time: 3.434s, Prediction Time: 2.288s
    label PolicyGradient, Reward 37: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 2.299s
    label PolicyGradient, Reward 38: 119.000, Len(game): 119, Training Time: 3.434s, Prediction Time: 2.381s
    label PolicyGradient, Reward 39: 16.000, Len(game): 16, Training Time: 3.434s, Prediction Time: 2.393s
    label PolicyGradient, Reward 40: 176.000, Len(game): 176, Training Time: 3.434s, Prediction Time: 2.522s
    label PolicyGradient, Reward 41: 122.000, Len(game): 122, Training Time: 3.434s, Prediction Time: 2.603s
    label PolicyGradient, Reward 42: 103.000, Len(game): 103, Training Time: 3.434s, Prediction Time: 2.672s
    label PolicyGradient, Reward 43: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 2.683s
    label PolicyGradient, Reward 44: 34.000, Len(game): 34, Training Time: 3.434s, Prediction Time: 2.706s
    label PolicyGradient, Reward 45: 210.000, Len(game): 210, Training Time: 3.434s, Prediction Time: 2.847s
    label PolicyGradient, Reward 46: 128.000, Len(game): 128, Training Time: 3.434s, Prediction Time: 2.930s
    label PolicyGradient, Reward 47: 122.000, Len(game): 122, Training Time: 3.434s, Prediction Time: 3.011s
    label PolicyGradient, Reward 48: 219.000, Len(game): 219, Training Time: 3.434s, Prediction Time: 3.157s
    label PolicyGradient, Reward 49: 37.000, Len(game): 37, Training Time: 3.434s, Prediction Time: 3.182s
    label PolicyGradient, Reward 50: 39.000, Len(game): 39, Training Time: 3.434s, Prediction Time: 3.209s
    label PolicyGradient, Reward 51: 130.000, Len(game): 130, Training Time: 3.434s, Prediction Time: 3.295s
    label PolicyGradient, Reward 52: 104.000, Len(game): 104, Training Time: 3.434s, Prediction Time: 3.364s
    label PolicyGradient, Reward 53: 46.000, Len(game): 46, Training Time: 3.434s, Prediction Time: 3.397s
    label PolicyGradient, Reward 54: 18.000, Len(game): 18, Training Time: 3.434s, Prediction Time: 3.409s
    label PolicyGradient, Reward 55: 13.000, Len(game): 13, Training Time: 3.434s, Prediction Time: 3.417s
    label PolicyGradient, Reward 56: 111.000, Len(game): 111, Training Time: 3.434s, Prediction Time: 3.495s
    label PolicyGradient, Reward 57: 130.000, Len(game): 130, Training Time: 3.434s, Prediction Time: 3.582s
    label PolicyGradient, Reward 58: 112.000, Len(game): 112, Training Time: 3.434s, Prediction Time: 3.658s
    label PolicyGradient, Reward 59: 128.000, Len(game): 128, Training Time: 3.434s, Prediction Time: 3.745s
    label PolicyGradient, Reward 60: 122.000, Len(game): 122, Training Time: 3.434s, Prediction Time: 3.825s
    label PolicyGradient, Reward 61: 10.000, Len(game): 10, Training Time: 3.434s, Prediction Time: 3.832s
    label PolicyGradient, Reward 62: 16.000, Len(game): 16, Training Time: 3.434s, Prediction Time: 3.842s
    label PolicyGradient, Reward 63: 20.000, Len(game): 20, Training Time: 3.434s, Prediction Time: 3.855s
    label PolicyGradient, Reward 64: 115.000, Len(game): 115, Training Time: 3.434s, Prediction Time: 3.932s
    label PolicyGradient, Reward 65: 42.000, Len(game): 42, Training Time: 3.434s, Prediction Time: 3.960s
    label PolicyGradient, Reward 66: 148.000, Len(game): 148, Training Time: 3.434s, Prediction Time: 4.060s
    label PolicyGradient, Reward 67: 43.000, Len(game): 43, Training Time: 3.434s, Prediction Time: 4.089s
    label PolicyGradient, Reward 68: 124.000, Len(game): 124, Training Time: 3.434s, Prediction Time: 4.173s
    label PolicyGradient, Reward 69: 63.000, Len(game): 63, Training Time: 3.434s, Prediction Time: 4.214s
    label PolicyGradient, Reward 70: 112.000, Len(game): 112, Training Time: 3.434s, Prediction Time: 4.290s
    label PolicyGradient, Reward 71: 114.000, Len(game): 114, Training Time: 3.434s, Prediction Time: 4.366s
    label PolicyGradient, Reward 72: 17.000, Len(game): 17, Training Time: 3.434s, Prediction Time: 4.378s
    label PolicyGradient, Reward 73: 142.000, Len(game): 142, Training Time: 3.434s, Prediction Time: 4.477s
    label PolicyGradient, Reward 74: 152.000, Len(game): 152, Training Time: 3.434s, Prediction Time: 4.578s
    label PolicyGradient, Reward 75: 224.000, Len(game): 224, Training Time: 3.434s, Prediction Time: 4.730s
    label PolicyGradient, Reward 76: 106.000, Len(game): 106, Training Time: 3.434s, Prediction Time: 4.800s
    label PolicyGradient, Reward 77: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 4.810s
    label PolicyGradient, Reward 78: 149.000, Len(game): 149, Training Time: 3.434s, Prediction Time: 4.908s
    label PolicyGradient, Reward 79: 81.000, Len(game): 81, Training Time: 3.434s, Prediction Time: 4.962s
    label PolicyGradient, Reward 80: 109.000, Len(game): 109, Training Time: 3.434s, Prediction Time: 5.035s
    label PolicyGradient, Reward 81: 116.000, Len(game): 116, Training Time: 3.434s, Prediction Time: 5.114s
    label PolicyGradient, Reward 82: 37.000, Len(game): 37, Training Time: 3.434s, Prediction Time: 5.139s
    label PolicyGradient, Reward 83: 219.000, Len(game): 219, Training Time: 3.434s, Prediction Time: 5.286s
    label PolicyGradient, Reward 84: 33.000, Len(game): 33, Training Time: 3.434s, Prediction Time: 5.307s
    label PolicyGradient, Reward 85: 61.000, Len(game): 61, Training Time: 3.434s, Prediction Time: 5.348s
    label PolicyGradient, Reward 86: 21.000, Len(game): 21, Training Time: 3.434s, Prediction Time: 5.362s
    label PolicyGradient, Reward 87: 138.000, Len(game): 138, Training Time: 3.434s, Prediction Time: 5.455s
    label PolicyGradient, Reward 88: 42.000, Len(game): 42, Training Time: 3.434s, Prediction Time: 5.482s
    label PolicyGradient, Reward 89: 109.000, Len(game): 109, Training Time: 3.434s, Prediction Time: 5.555s
    label PolicyGradient, Reward 90: 113.000, Len(game): 113, Training Time: 3.434s, Prediction Time: 5.630s
    label PolicyGradient, Reward 91: 67.000, Len(game): 67, Training Time: 3.434s, Prediction Time: 5.674s
    label PolicyGradient, Reward 92: 51.000, Len(game): 51, Training Time: 3.434s, Prediction Time: 5.708s
    label PolicyGradient, Reward 93: 128.000, Len(game): 128, Training Time: 3.434s, Prediction Time: 5.794s
    label PolicyGradient, Reward 94: 77.000, Len(game): 77, Training Time: 3.434s, Prediction Time: 5.844s
    label PolicyGradient, Reward 95: 43.000, Len(game): 43, Training Time: 3.434s, Prediction Time: 5.873s
    label PolicyGradient, Reward 96: 40.000, Len(game): 40, Training Time: 3.434s, Prediction Time: 5.899s
    label PolicyGradient, Reward 97: 45.000, Len(game): 45, Training Time: 3.434s, Prediction Time: 5.930s
    label PolicyGradient, Reward 98: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 5.941s
    label PolicyGradient, Reward 99: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 5.950s
    label Controller-based, Reward 0: 112.000, Len(game): 112, Training Time: 0.002s, Prediction Time: 0.001s
    label Controller-based, Reward 1: 79.000, Len(game): 79, Training Time: 0.003s, Prediction Time: 0.001s
    label Controller-based, Reward 2: 9.000, Len(game): 9, Training Time: 0.011s, Prediction Time: 0.001s
    label Controller-based, Reward 3: 69.000, Len(game): 69, Training Time: 0.021s, Prediction Time: 0.003s
    no training
    label Controller-based, Reward 4: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.011s
    no training
    label Controller-based, Reward 5: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.020s
    no training
    label Controller-based, Reward 6: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.029s
    no training
    label Controller-based, Reward 7: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.036s
    no training
    label Controller-based, Reward 8: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.046s
    no training
    label Controller-based, Reward 9: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.054s
    no training
    label Controller-based, Reward 10: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.063s
    no training
    label Controller-based, Reward 11: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.071s
    no training
    label Controller-based, Reward 12: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.079s
    no training
    label Controller-based, Reward 13: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.085s
    no training
    label Controller-based, Reward 14: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.095s
    no training
    label Controller-based, Reward 15: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.103s
    no training
    label Controller-based, Reward 16: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.112s
    no training
    label Controller-based, Reward 17: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.120s
    no training
    label Controller-based, Reward 18: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.129s
    no training
    label Controller-based, Reward 19: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.137s
    no training
    label Controller-based, Reward 20: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.146s
    no training
    label Controller-based, Reward 21: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.153s
    no training
    label Controller-based, Reward 22: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.162s
    no training
    label Controller-based, Reward 23: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.171s
    no training
    label Controller-based, Reward 24: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.179s
    no training
    label Controller-based, Reward 25: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.187s
    no training
    label Controller-based, Reward 26: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.195s
    no training
    label Controller-based, Reward 27: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.203s
    no training
    label Controller-based, Reward 28: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.212s
    no training
    label Controller-based, Reward 29: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.220s
    no training
    label Controller-based, Reward 30: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.228s
    no training
    label Controller-based, Reward 31: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.237s
    no training
    label Controller-based, Reward 32: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.245s
    no training
    label Controller-based, Reward 33: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.253s
    no training
    label Controller-based, Reward 34: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.261s
    no training
    label Controller-based, Reward 35: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.270s
    no training
    label Controller-based, Reward 36: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.278s
    no training
    label Controller-based, Reward 37: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.286s
    no training
    label Controller-based, Reward 38: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.295s
    no training
    label Controller-based, Reward 39: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.303s
    no training
    label Controller-based, Reward 40: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.312s
    no training
    label Controller-based, Reward 41: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.320s
    no training
    label Controller-based, Reward 42: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.328s
    no training
    label Controller-based, Reward 43: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.336s
    no training
    label Controller-based, Reward 44: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.344s
    no training
    label Controller-based, Reward 45: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.352s
    no training
    label Controller-based, Reward 46: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.360s
    no training
    label Controller-based, Reward 47: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.369s
    no training
    label Controller-based, Reward 48: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.377s
    no training
    label Controller-based, Reward 49: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.385s
    no training
    label Controller-based, Reward 50: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.393s
    no training
    label Controller-based, Reward 51: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.401s
    no training
    label Controller-based, Reward 52: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.409s
    no training
    label Controller-based, Reward 53: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.418s
    no training
    label Controller-based, Reward 54: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.425s
    no training
    label Controller-based, Reward 55: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.433s
    no training
    label Controller-based, Reward 56: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.442s
    no training
    label Controller-based, Reward 57: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.450s
    no training
    label Controller-based, Reward 58: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.458s
    no training
    label Controller-based, Reward 59: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.466s
    no training
    label Controller-based, Reward 60: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.475s
    no training
    label Controller-based, Reward 61: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.483s
    no training
    label Controller-based, Reward 62: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.491s
    no training
    label Controller-based, Reward 63: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.499s
    no training
    label Controller-based, Reward 64: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.507s
    no training
    label Controller-based, Reward 65: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.516s
    no training
    label Controller-based, Reward 66: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.524s
    no training
    label Controller-based, Reward 67: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.532s
    no training
    label Controller-based, Reward 68: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.541s
    no training
    label Controller-based, Reward 69: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.549s
    no training
    label Controller-based, Reward 70: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.557s
    no training
    label Controller-based, Reward 71: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.566s
    no training
    label Controller-based, Reward 72: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.574s
    no training
    label Controller-based, Reward 73: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.582s
    no training
    label Controller-based, Reward 74: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.591s
    no training
    label Controller-based, Reward 75: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.598s
    no training
    label Controller-based, Reward 76: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.607s
    no training
    label Controller-based, Reward 77: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.615s
    no training
    label Controller-based, Reward 78: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.623s
    no training
    label Controller-based, Reward 79: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.632s
    no training
    label Controller-based, Reward 80: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.640s
    no training
    label Controller-based, Reward 81: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.648s
    no training
    label Controller-based, Reward 82: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.656s
    no training
    label Controller-based, Reward 83: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.664s
    no training
    label Controller-based, Reward 84: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.672s
    no training
    label Controller-based, Reward 85: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.681s
    no training
    label Controller-based, Reward 86: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.689s
    no training
    label Controller-based, Reward 87: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.697s
    no training
    label Controller-based, Reward 88: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.705s
    no training
    label Controller-based, Reward 89: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.714s
    no training
    label Controller-based, Reward 90: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.723s
    no training
    label Controller-based, Reward 91: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.732s
    no training
    label Controller-based, Reward 92: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.740s
    no training
    label Controller-based, Reward 93: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.749s
    no training
    label Controller-based, Reward 94: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.758s
    no training
    label Controller-based, Reward 95: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.766s
    no training
    label Controller-based, Reward 96: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.775s
    no training
    label Controller-based, Reward 97: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.783s
    no training
    label Controller-based, Reward 98: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.790s
    no training
    label Controller-based, Reward 99: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.799s
    label KACAgent, Reward 0: 20.000, Len(game): 20, Training Time: 0.004s, Prediction Time: 0.001s
    label KACAgent, Reward 1: 40.000, Len(game): 40, Training Time: 0.009s, Prediction Time: 0.007s
    label KACAgent, Reward 2: 15.000, Len(game): 15, Training Time: 0.015s, Prediction Time: 0.010s
    label KACAgent, Reward 3: 26.000, Len(game): 26, Training Time: 0.022s, Prediction Time: 0.016s
    label KACAgent, Reward 4: 113.000, Len(game): 113, Training Time: 0.037s, Prediction Time: 0.038s
    label KACAgent, Reward 5: 97.000, Len(game): 97, Training Time: 0.060s, Prediction Time: 0.061s
    label KACAgent, Reward 6: 101.000, Len(game): 101, Training Time: 0.097s, Prediction Time: 0.093s
    label KACAgent, Reward 7: 388.000, Len(game): 388, Training Time: 0.207s, Prediction Time: 0.219s
    label KACAgent, Reward 8: 284.000, Len(game): 284, Training Time: 0.385s, Prediction Time: 0.376s
    label KACAgent, Reward 9: 71.000, Len(game): 71, Training Time: 0.574s, Prediction Time: 0.489s
    label KACAgent, Reward 10: 130.000, Len(game): 130, Training Time: 0.809s, Prediction Time: 0.644s
    label KACAgent, Reward 11: 118.000, Len(game): 118, Training Time: 1.099s, Prediction Time: 0.830s
    label KACAgent, Reward 12: 262.000, Len(game): 262, Training Time: 1.471s, Prediction Time: 1.125s
    label KACAgent, Reward 13: 197.000, Len(game): 197, Training Time: 1.927s, Prediction Time: 1.463s
    label KACAgent, Reward 14: 217.000, Len(game): 217, Training Time: 2.488s, Prediction Time: 1.863s
    label KACAgent, Reward 15: 219.000, Len(game): 219, Training Time: 3.157s, Prediction Time: 2.340s
    label KACAgent, Reward 16: 299.000, Len(game): 299, Training Time: 3.157s, Prediction Time: 2.966s
    label KACAgent, Reward 17: 194.000, Len(game): 194, Training Time: 3.157s, Prediction Time: 3.123s
    label KACAgent, Reward 18: 637.000, Len(game): 637, Training Time: 3.157s, Prediction Time: 3.638s
    label KACAgent, Reward 19: 191.000, Len(game): 191, Training Time: 3.157s, Prediction Time: 3.793s
    label KACAgent, Reward 20: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 4.595s
    label KACAgent, Reward 21: 161.000, Len(game): 161, Training Time: 3.157s, Prediction Time: 4.727s
    label KACAgent, Reward 22: 335.000, Len(game): 335, Training Time: 3.157s, Prediction Time: 4.997s
    label KACAgent, Reward 23: 557.000, Len(game): 557, Training Time: 3.157s, Prediction Time: 5.442s
    label KACAgent, Reward 24: 414.000, Len(game): 414, Training Time: 3.157s, Prediction Time: 5.776s
    label KACAgent, Reward 25: 194.000, Len(game): 194, Training Time: 3.157s, Prediction Time: 5.932s
    label KACAgent, Reward 26: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 6.739s
    label KACAgent, Reward 27: 254.000, Len(game): 254, Training Time: 3.157s, Prediction Time: 6.943s
    label KACAgent, Reward 28: 204.000, Len(game): 204, Training Time: 3.157s, Prediction Time: 7.108s
    label KACAgent, Reward 29: 223.000, Len(game): 223, Training Time: 3.157s, Prediction Time: 7.285s
    label KACAgent, Reward 30: 234.000, Len(game): 234, Training Time: 3.157s, Prediction Time: 7.475s
    label KACAgent, Reward 31: 167.000, Len(game): 167, Training Time: 3.157s, Prediction Time: 7.612s
    label KACAgent, Reward 32: 316.000, Len(game): 316, Training Time: 3.157s, Prediction Time: 7.864s
    label KACAgent, Reward 33: 453.000, Len(game): 453, Training Time: 3.157s, Prediction Time: 8.228s
    label KACAgent, Reward 34: 146.000, Len(game): 146, Training Time: 3.157s, Prediction Time: 8.344s
    label KACAgent, Reward 35: 199.000, Len(game): 199, Training Time: 3.157s, Prediction Time: 8.503s
    label KACAgent, Reward 36: 193.000, Len(game): 193, Training Time: 3.157s, Prediction Time: 8.658s
    label KACAgent, Reward 37: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 9.461s
    label KACAgent, Reward 38: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 10.260s
    label KACAgent, Reward 39: 500.000, Len(game): 500, Training Time: 3.157s, Prediction Time: 10.659s
    label KACAgent, Reward 40: 211.000, Len(game): 211, Training Time: 3.157s, Prediction Time: 10.827s
    label KACAgent, Reward 41: 216.000, Len(game): 216, Training Time: 3.157s, Prediction Time: 11.004s
    label KACAgent, Reward 42: 225.000, Len(game): 225, Training Time: 3.157s, Prediction Time: 11.185s
    label KACAgent, Reward 43: 364.000, Len(game): 364, Training Time: 3.157s, Prediction Time: 11.476s
    label KACAgent, Reward 44: 152.000, Len(game): 152, Training Time: 3.157s, Prediction Time: 11.597s
    label KACAgent, Reward 45: 135.000, Len(game): 135, Training Time: 3.157s, Prediction Time: 11.703s
    label KACAgent, Reward 46: 367.000, Len(game): 367, Training Time: 3.157s, Prediction Time: 11.999s
    label KACAgent, Reward 47: 232.000, Len(game): 232, Training Time: 3.157s, Prediction Time: 12.187s
    label KACAgent, Reward 48: 252.000, Len(game): 252, Training Time: 3.157s, Prediction Time: 12.390s
    label KACAgent, Reward 49: 234.000, Len(game): 234, Training Time: 3.157s, Prediction Time: 12.576s
    label KACAgent, Reward 50: 311.000, Len(game): 311, Training Time: 3.157s, Prediction Time: 12.826s
    label KACAgent, Reward 51: 174.000, Len(game): 174, Training Time: 3.157s, Prediction Time: 12.968s
    label KACAgent, Reward 52: 251.000, Len(game): 251, Training Time: 3.157s, Prediction Time: 13.171s
    label KACAgent, Reward 53: 212.000, Len(game): 212, Training Time: 3.157s, Prediction Time: 13.341s
    label KACAgent, Reward 54: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 14.142s
    label KACAgent, Reward 55: 343.000, Len(game): 343, Training Time: 3.157s, Prediction Time: 14.415s
    label KACAgent, Reward 56: 476.000, Len(game): 476, Training Time: 3.157s, Prediction Time: 14.795s
    label KACAgent, Reward 57: 195.000, Len(game): 195, Training Time: 3.157s, Prediction Time: 14.949s
    label KACAgent, Reward 58: 177.000, Len(game): 177, Training Time: 3.157s, Prediction Time: 15.091s
    label KACAgent, Reward 59: 207.000, Len(game): 207, Training Time: 3.157s, Prediction Time: 15.256s
    label KACAgent, Reward 60: 135.000, Len(game): 135, Training Time: 3.157s, Prediction Time: 15.364s
    label KACAgent, Reward 61: 185.000, Len(game): 185, Training Time: 3.157s, Prediction Time: 15.511s
    label KACAgent, Reward 62: 201.000, Len(game): 201, Training Time: 3.157s, Prediction Time: 15.672s
    label KACAgent, Reward 63: 344.000, Len(game): 344, Training Time: 3.157s, Prediction Time: 15.947s
    label KACAgent, Reward 64: 197.000, Len(game): 197, Training Time: 3.157s, Prediction Time: 16.103s
    label KACAgent, Reward 65: 265.000, Len(game): 265, Training Time: 3.157s, Prediction Time: 16.320s
    label KACAgent, Reward 66: 168.000, Len(game): 168, Training Time: 3.157s, Prediction Time: 16.455s
    label KACAgent, Reward 67: 166.000, Len(game): 166, Training Time: 3.157s, Prediction Time: 16.587s
    label KACAgent, Reward 68: 191.000, Len(game): 191, Training Time: 3.157s, Prediction Time: 16.742s
    label KACAgent, Reward 69: 257.000, Len(game): 257, Training Time: 3.157s, Prediction Time: 16.951s
    label KACAgent, Reward 70: 221.000, Len(game): 221, Training Time: 3.157s, Prediction Time: 17.127s
    label KACAgent, Reward 71: 161.000, Len(game): 161, Training Time: 3.157s, Prediction Time: 17.254s
    label KACAgent, Reward 72: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 18.057s
    label KACAgent, Reward 73: 315.000, Len(game): 315, Training Time: 3.157s, Prediction Time: 18.309s
    label KACAgent, Reward 74: 229.000, Len(game): 229, Training Time: 3.157s, Prediction Time: 18.493s
    label KACAgent, Reward 75: 830.000, Len(game): 830, Training Time: 3.157s, Prediction Time: 19.158s
    label KACAgent, Reward 76: 206.000, Len(game): 206, Training Time: 3.157s, Prediction Time: 19.324s
    label KACAgent, Reward 77: 353.000, Len(game): 353, Training Time: 3.157s, Prediction Time: 19.610s
    label KACAgent, Reward 78: 184.000, Len(game): 184, Training Time: 3.157s, Prediction Time: 19.765s
    label KACAgent, Reward 79: 186.000, Len(game): 186, Training Time: 3.157s, Prediction Time: 19.912s
    label KACAgent, Reward 80: 387.000, Len(game): 387, Training Time: 3.157s, Prediction Time: 20.222s
    label KACAgent, Reward 81: 189.000, Len(game): 189, Training Time: 3.157s, Prediction Time: 20.371s
    label KACAgent, Reward 82: 185.000, Len(game): 185, Training Time: 3.157s, Prediction Time: 20.520s
    label KACAgent, Reward 83: 328.000, Len(game): 328, Training Time: 3.157s, Prediction Time: 20.782s
    label KACAgent, Reward 84: 353.000, Len(game): 353, Training Time: 3.157s, Prediction Time: 21.065s
    label KACAgent, Reward 85: 264.000, Len(game): 264, Training Time: 3.157s, Prediction Time: 21.276s
    label KACAgent, Reward 86: 221.000, Len(game): 221, Training Time: 3.157s, Prediction Time: 21.453s
    label KACAgent, Reward 87: 180.000, Len(game): 180, Training Time: 3.157s, Prediction Time: 21.596s
    label KACAgent, Reward 88: 310.000, Len(game): 310, Training Time: 3.157s, Prediction Time: 21.846s
    label KACAgent, Reward 89: 365.000, Len(game): 365, Training Time: 3.157s, Prediction Time: 22.138s
    label KACAgent, Reward 90: 248.000, Len(game): 248, Training Time: 3.157s, Prediction Time: 22.336s
    label KACAgent, Reward 91: 688.000, Len(game): 688, Training Time: 3.157s, Prediction Time: 22.885s
    label KACAgent, Reward 92: 162.000, Len(game): 162, Training Time: 3.157s, Prediction Time: 23.014s
    label KACAgent, Reward 93: 173.000, Len(game): 173, Training Time: 3.157s, Prediction Time: 23.153s
    label KACAgent, Reward 94: 165.000, Len(game): 165, Training Time: 3.157s, Prediction Time: 23.284s
    label KACAgent, Reward 95: 226.000, Len(game): 226, Training Time: 3.157s, Prediction Time: 23.464s
    label KACAgent, Reward 96: 200.000, Len(game): 200, Training Time: 3.157s, Prediction Time: 23.624s
    label KACAgent, Reward 97: 334.000, Len(game): 334, Training Time: 3.157s, Prediction Time: 23.894s
    label KACAgent, Reward 98: 310.000, Len(game): 310, Training Time: 3.157s, Prediction Time: 24.143s
    label KACAgent, Reward 99: 364.000, Len(game): 364, Training Time: 3.157s, Prediction Time: 24.436s
    label DQNAgent, Reward 0: 33.000, Len(game): 33, Training Time: 0.000s, Prediction Time: 0.000s
    label DQNAgent, Reward 1: 17.000, Len(game): 17, Training Time: 0.000s, Prediction Time: 0.000s
    label DQNAgent, Reward 2: 36.000, Len(game): 36, Training Time: 0.026s, Prediction Time: 0.001s
    label DQNAgent, Reward 3: 11.000, Len(game): 11, Training Time: 0.038s, Prediction Time: 0.002s
    label DQNAgent, Reward 4: 30.000, Len(game): 30, Training Time: 0.076s, Prediction Time: 0.003s
    label DQNAgent, Reward 5: 28.000, Len(game): 28, Training Time: 0.107s, Prediction Time: 0.004s
    label DQNAgent, Reward 6: 17.000, Len(game): 17, Training Time: 0.128s, Prediction Time: 0.005s
    label DQNAgent, Reward 7: 20.000, Len(game): 20, Training Time: 0.154s, Prediction Time: 0.006s
    label DQNAgent, Reward 8: 34.000, Len(game): 34, Training Time: 0.192s, Prediction Time: 0.006s
    label DQNAgent, Reward 9: 17.000, Len(game): 17, Training Time: 0.210s, Prediction Time: 0.007s
    label DQNAgent, Reward 10: 14.000, Len(game): 14, Training Time: 0.225s, Prediction Time: 0.008s
    label DQNAgent, Reward 11: 14.000, Len(game): 14, Training Time: 0.241s, Prediction Time: 0.008s
    label DQNAgent, Reward 12: 17.000, Len(game): 17, Training Time: 0.260s, Prediction Time: 0.009s
    label DQNAgent, Reward 13: 31.000, Len(game): 31, Training Time: 0.294s, Prediction Time: 0.010s
    label DQNAgent, Reward 14: 11.000, Len(game): 11, Training Time: 0.307s, Prediction Time: 0.010s
    label DQNAgent, Reward 15: 10.000, Len(game): 10, Training Time: 0.319s, Prediction Time: 0.010s
    label DQNAgent, Reward 16: 21.000, Len(game): 21, Training Time: 0.343s, Prediction Time: 0.010s
    label DQNAgent, Reward 17: 27.000, Len(game): 27, Training Time: 0.373s, Prediction Time: 0.011s
    label DQNAgent, Reward 18: 25.000, Len(game): 25, Training Time: 0.401s, Prediction Time: 0.012s
    label DQNAgent, Reward 19: 14.000, Len(game): 14, Training Time: 0.417s, Prediction Time: 0.012s
    label DQNAgent, Reward 20: 15.000, Len(game): 15, Training Time: 0.434s, Prediction Time: 0.012s
    label DQNAgent, Reward 21: 17.000, Len(game): 17, Training Time: 0.452s, Prediction Time: 0.013s
    label DQNAgent, Reward 22: 14.000, Len(game): 14, Training Time: 0.468s, Prediction Time: 0.014s
    label DQNAgent, Reward 23: 29.000, Len(game): 29, Training Time: 0.502s, Prediction Time: 0.015s
    label DQNAgent, Reward 24: 28.000, Len(game): 28, Training Time: 0.534s, Prediction Time: 0.016s
    label DQNAgent, Reward 25: 78.000, Len(game): 78, Training Time: 0.630s, Prediction Time: 0.018s
    label DQNAgent, Reward 26: 21.000, Len(game): 21, Training Time: 0.654s, Prediction Time: 0.018s
    label DQNAgent, Reward 27: 23.000, Len(game): 23, Training Time: 0.681s, Prediction Time: 0.019s
    label DQNAgent, Reward 28: 41.000, Len(game): 41, Training Time: 0.727s, Prediction Time: 0.020s
    label DQNAgent, Reward 29: 40.000, Len(game): 40, Training Time: 0.773s, Prediction Time: 0.021s
    label DQNAgent, Reward 30: 54.000, Len(game): 54, Training Time: 0.835s, Prediction Time: 0.022s
    label DQNAgent, Reward 31: 65.000, Len(game): 65, Training Time: 0.908s, Prediction Time: 0.024s
    label DQNAgent, Reward 32: 86.000, Len(game): 86, Training Time: 1.010s, Prediction Time: 0.026s
    label DQNAgent, Reward 33: 27.000, Len(game): 27, Training Time: 1.039s, Prediction Time: 0.027s
    label DQNAgent, Reward 34: 166.000, Len(game): 166, Training Time: 1.230s, Prediction Time: 0.032s
    label DQNAgent, Reward 35: 48.000, Len(game): 48, Training Time: 1.287s, Prediction Time: 0.034s
    label DQNAgent, Reward 36: 135.000, Len(game): 135, Training Time: 1.457s, Prediction Time: 0.040s
    label DQNAgent, Reward 37: 118.000, Len(game): 118, Training Time: 1.607s, Prediction Time: 0.045s
    label DQNAgent, Reward 38: 159.000, Len(game): 159, Training Time: 1.790s, Prediction Time: 0.053s
    label DQNAgent, Reward 39: 222.000, Len(game): 222, Training Time: 2.040s, Prediction Time: 0.061s
    label DQNAgent, Reward 40: 219.000, Len(game): 219, Training Time: 2.294s, Prediction Time: 0.070s
    label DQNAgent, Reward 41: 180.000, Len(game): 180, Training Time: 2.503s, Prediction Time: 0.077s
    label DQNAgent, Reward 42: 178.000, Len(game): 178, Training Time: 2.706s, Prediction Time: 0.084s
    label DQNAgent, Reward 43: 176.000, Len(game): 176, Training Time: 2.900s, Prediction Time: 0.092s
    label DQNAgent, Reward 44: 247.000, Len(game): 247, Training Time: 3.171s, Prediction Time: 0.102s
    label DQNAgent, Reward 45: 195.000, Len(game): 195, Training Time: 3.171s, Prediction Time: 0.111s
    label DQNAgent, Reward 46: 161.000, Len(game): 161, Training Time: 3.171s, Prediction Time: 0.118s
    label DQNAgent, Reward 47: 205.000, Len(game): 205, Training Time: 3.171s, Prediction Time: 0.128s
    label DQNAgent, Reward 48: 232.000, Len(game): 232, Training Time: 3.171s, Prediction Time: 0.139s
    label DQNAgent, Reward 49: 236.000, Len(game): 236, Training Time: 3.171s, Prediction Time: 0.149s
    label DQNAgent, Reward 50: 216.000, Len(game): 216, Training Time: 3.171s, Prediction Time: 0.158s
    label DQNAgent, Reward 51: 178.000, Len(game): 178, Training Time: 3.171s, Prediction Time: 0.166s
    label DQNAgent, Reward 52: 188.000, Len(game): 188, Training Time: 3.171s, Prediction Time: 0.175s
    label DQNAgent, Reward 53: 205.000, Len(game): 205, Training Time: 3.171s, Prediction Time: 0.183s
    label DQNAgent, Reward 54: 239.000, Len(game): 239, Training Time: 3.171s, Prediction Time: 0.193s
    label DQNAgent, Reward 55: 235.000, Len(game): 235, Training Time: 3.171s, Prediction Time: 0.204s
    label DQNAgent, Reward 56: 160.000, Len(game): 160, Training Time: 3.171s, Prediction Time: 0.212s
    label DQNAgent, Reward 57: 173.000, Len(game): 173, Training Time: 3.171s, Prediction Time: 0.219s
    label DQNAgent, Reward 58: 196.000, Len(game): 196, Training Time: 3.171s, Prediction Time: 0.227s
    label DQNAgent, Reward 59: 171.000, Len(game): 171, Training Time: 3.171s, Prediction Time: 0.234s
    label DQNAgent, Reward 60: 168.000, Len(game): 168, Training Time: 3.171s, Prediction Time: 0.241s
    label DQNAgent, Reward 61: 223.000, Len(game): 223, Training Time: 3.171s, Prediction Time: 0.250s
    label DQNAgent, Reward 62: 215.000, Len(game): 215, Training Time: 3.171s, Prediction Time: 0.260s
    label DQNAgent, Reward 63: 182.000, Len(game): 182, Training Time: 3.171s, Prediction Time: 0.267s
    label DQNAgent, Reward 64: 171.000, Len(game): 171, Training Time: 3.171s, Prediction Time: 0.274s
    label DQNAgent, Reward 65: 268.000, Len(game): 268, Training Time: 3.171s, Prediction Time: 0.285s
    label DQNAgent, Reward 66: 244.000, Len(game): 244, Training Time: 3.171s, Prediction Time: 0.295s
    label DQNAgent, Reward 67: 162.000, Len(game): 162, Training Time: 3.171s, Prediction Time: 0.301s
    label DQNAgent, Reward 68: 184.000, Len(game): 184, Training Time: 3.171s, Prediction Time: 0.308s
    label DQNAgent, Reward 69: 228.000, Len(game): 228, Training Time: 3.171s, Prediction Time: 0.318s
    label DQNAgent, Reward 70: 173.000, Len(game): 173, Training Time: 3.171s, Prediction Time: 0.325s
    label DQNAgent, Reward 71: 161.000, Len(game): 161, Training Time: 3.171s, Prediction Time: 0.332s
    label DQNAgent, Reward 72: 300.000, Len(game): 300, Training Time: 3.171s, Prediction Time: 0.344s
    label DQNAgent, Reward 73: 228.000, Len(game): 228, Training Time: 3.171s, Prediction Time: 0.353s
    label DQNAgent, Reward 74: 172.000, Len(game): 172, Training Time: 3.171s, Prediction Time: 0.360s
    label DQNAgent, Reward 75: 260.000, Len(game): 260, Training Time: 3.171s, Prediction Time: 0.371s
    label DQNAgent, Reward 76: 214.000, Len(game): 214, Training Time: 3.171s, Prediction Time: 0.379s
    label DQNAgent, Reward 77: 251.000, Len(game): 251, Training Time: 3.171s, Prediction Time: 0.389s
    label DQNAgent, Reward 78: 186.000, Len(game): 186, Training Time: 3.171s, Prediction Time: 0.397s
    label DQNAgent, Reward 79: 243.000, Len(game): 243, Training Time: 3.171s, Prediction Time: 0.405s
    label DQNAgent, Reward 80: 226.000, Len(game): 226, Training Time: 3.171s, Prediction Time: 0.415s
    label DQNAgent, Reward 81: 240.000, Len(game): 240, Training Time: 3.171s, Prediction Time: 0.426s
    label DQNAgent, Reward 82: 184.000, Len(game): 184, Training Time: 3.171s, Prediction Time: 0.433s
    label DQNAgent, Reward 83: 240.000, Len(game): 240, Training Time: 3.171s, Prediction Time: 0.443s
    label DQNAgent, Reward 84: 211.000, Len(game): 211, Training Time: 3.171s, Prediction Time: 0.451s
    label DQNAgent, Reward 85: 255.000, Len(game): 255, Training Time: 3.171s, Prediction Time: 0.461s
    label DQNAgent, Reward 86: 283.000, Len(game): 283, Training Time: 3.171s, Prediction Time: 0.473s
    label DQNAgent, Reward 87: 189.000, Len(game): 189, Training Time: 3.171s, Prediction Time: 0.480s
    label DQNAgent, Reward 88: 214.000, Len(game): 214, Training Time: 3.171s, Prediction Time: 0.489s
    label DQNAgent, Reward 89: 282.000, Len(game): 282, Training Time: 3.171s, Prediction Time: 0.501s
    label DQNAgent, Reward 90: 214.000, Len(game): 214, Training Time: 3.171s, Prediction Time: 0.509s
    label DQNAgent, Reward 91: 164.000, Len(game): 164, Training Time: 3.171s, Prediction Time: 0.517s
    label DQNAgent, Reward 92: 235.000, Len(game): 235, Training Time: 3.171s, Prediction Time: 0.525s
    label DQNAgent, Reward 93: 191.000, Len(game): 191, Training Time: 3.171s, Prediction Time: 0.534s
    label DQNAgent, Reward 94: 214.000, Len(game): 214, Training Time: 3.171s, Prediction Time: 0.543s
    label DQNAgent, Reward 95: 229.000, Len(game): 229, Training Time: 3.171s, Prediction Time: 0.552s
    label DQNAgent, Reward 96: 199.000, Len(game): 199, Training Time: 3.171s, Prediction Time: 0.560s
    label DQNAgent, Reward 97: 190.000, Len(game): 190, Training Time: 3.171s, Prediction Time: 0.568s
    label DQNAgent, Reward 98: 289.000, Len(game): 289, Training Time: 3.171s, Prediction Time: 0.579s
    label DQNAgent, Reward 99: 212.000, Len(game): 212, Training Time: 3.171s, Prediction Time: 0.588s
    Computed global error Bellman mean:  1.1717717636598608e-07  iter:  3
    label KQLearningHJBCP, Reward 0: 22.000, Len(game): 22, Training Time: 0.009s, Prediction Time: 0.000s
    Computed global error Bellman mean:  1.1552543799808745e-07  iter:  6
    label KQLearningHJBCP, Reward 1: 78.000, Len(game): 78, Training Time: 0.059s, Prediction Time: 0.011s
    Computed global error Bellman mean:  3.0488343749310424e-07  iter:  8
    label KQLearningHJBCP, Reward 2: 71.000, Len(game): 71, Training Time: 0.203s, Prediction Time: 0.023s
    Computed global error Bellman mean:  2.2164816109864766e-07  iter:  5
    label KQLearningHJBCP, Reward 3: 82.000, Len(game): 82, Training Time: 0.392s, Prediction Time: 0.042s
    Computed global error Bellman mean:  2.685937866324866e-07  iter:  5
    label KQLearningHJBCP, Reward 4: 76.000, Len(game): 76, Training Time: 0.717s, Prediction Time: 0.063s
    Computed global error Bellman mean:  2.2103691660173973e-07  iter:  5
    label KQLearningHJBCP, Reward 5: 83.000, Len(game): 83, Training Time: 1.269s, Prediction Time: 0.088s
    Computed global error Bellman mean:  2.3048877059193143e-07  iter:  6
    label KQLearningHJBCP, Reward 6: 195.000, Len(game): 195, Training Time: 2.550s, Prediction Time: 0.156s
    Computed global error Bellman mean:  0.10596339659857741  iter:  10
    label KQLearningHJBCP, Reward 7: 217.000, Len(game): 217, Training Time: 5.986s, Prediction Time: 0.239s
    label KQLearningHJBCP, Reward 8: 149.000, Len(game): 149, Training Time: 5.986s, Prediction Time: 0.312s
    label KQLearningHJBCP, Reward 9: 93.000, Len(game): 93, Training Time: 5.986s, Prediction Time: 0.358s
    label KQLearningHJBCP, Reward 10: 92.000, Len(game): 92, Training Time: 5.986s, Prediction Time: 0.400s
    label KQLearningHJBCP, Reward 11: 96.000, Len(game): 96, Training Time: 5.986s, Prediction Time: 0.448s
    label KQLearningHJBCP, Reward 12: 123.000, Len(game): 123, Training Time: 5.986s, Prediction Time: 0.510s
    label KQLearningHJBCP, Reward 13: 116.000, Len(game): 116, Training Time: 5.986s, Prediction Time: 0.566s
    label KQLearningHJBCP, Reward 14: 92.000, Len(game): 92, Training Time: 5.986s, Prediction Time: 0.612s
    label KQLearningHJBCP, Reward 15: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 0.662s
    label KQLearningHJBCP, Reward 16: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 0.710s
    label KQLearningHJBCP, Reward 17: 114.000, Len(game): 114, Training Time: 5.986s, Prediction Time: 0.769s
    label KQLearningHJBCP, Reward 18: 154.000, Len(game): 154, Training Time: 5.986s, Prediction Time: 0.847s
    label KQLearningHJBCP, Reward 19: 130.000, Len(game): 130, Training Time: 5.986s, Prediction Time: 0.911s
    label KQLearningHJBCP, Reward 20: 99.000, Len(game): 99, Training Time: 5.986s, Prediction Time: 0.959s
    label KQLearningHJBCP, Reward 21: 105.000, Len(game): 105, Training Time: 5.986s, Prediction Time: 1.014s
    label KQLearningHJBCP, Reward 22: 95.000, Len(game): 95, Training Time: 5.986s, Prediction Time: 1.059s
    label KQLearningHJBCP, Reward 23: 130.000, Len(game): 130, Training Time: 5.986s, Prediction Time: 1.123s
    label KQLearningHJBCP, Reward 24: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 1.174s
    label KQLearningHJBCP, Reward 25: 101.000, Len(game): 101, Training Time: 5.986s, Prediction Time: 1.228s
    label KQLearningHJBCP, Reward 26: 68.000, Len(game): 68, Training Time: 5.986s, Prediction Time: 1.261s
    label KQLearningHJBCP, Reward 27: 108.000, Len(game): 108, Training Time: 5.986s, Prediction Time: 1.315s
    label KQLearningHJBCP, Reward 28: 144.000, Len(game): 144, Training Time: 5.986s, Prediction Time: 1.386s
    label KQLearningHJBCP, Reward 29: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 1.432s
    label KQLearningHJBCP, Reward 30: 99.000, Len(game): 99, Training Time: 5.986s, Prediction Time: 1.481s
    label KQLearningHJBCP, Reward 31: 122.000, Len(game): 122, Training Time: 5.986s, Prediction Time: 1.541s
    label KQLearningHJBCP, Reward 32: 136.000, Len(game): 136, Training Time: 5.986s, Prediction Time: 1.609s
    label KQLearningHJBCP, Reward 33: 100.000, Len(game): 100, Training Time: 5.986s, Prediction Time: 1.659s
    label KQLearningHJBCP, Reward 34: 126.000, Len(game): 126, Training Time: 5.986s, Prediction Time: 1.721s
    label KQLearningHJBCP, Reward 35: 78.000, Len(game): 78, Training Time: 5.986s, Prediction Time: 1.761s
    label KQLearningHJBCP, Reward 36: 100.000, Len(game): 100, Training Time: 5.986s, Prediction Time: 1.813s
    label KQLearningHJBCP, Reward 37: 101.000, Len(game): 101, Training Time: 5.986s, Prediction Time: 1.863s
    label KQLearningHJBCP, Reward 38: 176.000, Len(game): 176, Training Time: 5.986s, Prediction Time: 1.950s
    label KQLearningHJBCP, Reward 39: 105.000, Len(game): 105, Training Time: 5.986s, Prediction Time: 2.003s
    label KQLearningHJBCP, Reward 40: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 2.053s
    label KQLearningHJBCP, Reward 41: 127.000, Len(game): 127, Training Time: 5.986s, Prediction Time: 2.116s
    label KQLearningHJBCP, Reward 42: 148.000, Len(game): 148, Training Time: 5.986s, Prediction Time: 2.191s
    label KQLearningHJBCP, Reward 43: 99.000, Len(game): 99, Training Time: 5.986s, Prediction Time: 2.240s
    label KQLearningHJBCP, Reward 44: 106.000, Len(game): 106, Training Time: 5.986s, Prediction Time: 2.293s
    label KQLearningHJBCP, Reward 45: 164.000, Len(game): 164, Training Time: 5.986s, Prediction Time: 2.375s
    label KQLearningHJBCP, Reward 46: 127.000, Len(game): 127, Training Time: 5.986s, Prediction Time: 2.439s
    label KQLearningHJBCP, Reward 47: 97.000, Len(game): 97, Training Time: 5.986s, Prediction Time: 2.486s
    label KQLearningHJBCP, Reward 48: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 2.532s
    label KQLearningHJBCP, Reward 49: 108.000, Len(game): 108, Training Time: 5.986s, Prediction Time: 2.585s
    label KQLearningHJBCP, Reward 50: 119.000, Len(game): 119, Training Time: 5.986s, Prediction Time: 2.645s
    label KQLearningHJBCP, Reward 51: 89.000, Len(game): 89, Training Time: 5.986s, Prediction Time: 2.689s
    label KQLearningHJBCP, Reward 52: 117.000, Len(game): 117, Training Time: 5.986s, Prediction Time: 2.747s
    label KQLearningHJBCP, Reward 53: 97.000, Len(game): 97, Training Time: 5.986s, Prediction Time: 2.794s
    label KQLearningHJBCP, Reward 54: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 2.843s
    label KQLearningHJBCP, Reward 55: 148.000, Len(game): 148, Training Time: 5.986s, Prediction Time: 2.917s
    label KQLearningHJBCP, Reward 56: 101.000, Len(game): 101, Training Time: 5.986s, Prediction Time: 2.968s
    label KQLearningHJBCP, Reward 57: 90.000, Len(game): 90, Training Time: 5.986s, Prediction Time: 3.012s
    label KQLearningHJBCP, Reward 58: 90.000, Len(game): 90, Training Time: 5.986s, Prediction Time: 3.056s
    label KQLearningHJBCP, Reward 59: 110.000, Len(game): 110, Training Time: 5.986s, Prediction Time: 3.111s
    label KQLearningHJBCP, Reward 60: 166.000, Len(game): 166, Training Time: 5.986s, Prediction Time: 3.193s
    label KQLearningHJBCP, Reward 61: 105.000, Len(game): 105, Training Time: 5.986s, Prediction Time: 3.246s
    label KQLearningHJBCP, Reward 62: 124.000, Len(game): 124, Training Time: 5.986s, Prediction Time: 3.310s
    label KQLearningHJBCP, Reward 63: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 3.359s
    label KQLearningHJBCP, Reward 64: 93.000, Len(game): 93, Training Time: 5.986s, Prediction Time: 3.406s
    label KQLearningHJBCP, Reward 65: 122.000, Len(game): 122, Training Time: 5.986s, Prediction Time: 3.465s
    label KQLearningHJBCP, Reward 66: 105.000, Len(game): 105, Training Time: 5.986s, Prediction Time: 3.517s
    label KQLearningHJBCP, Reward 67: 163.000, Len(game): 163, Training Time: 5.986s, Prediction Time: 3.596s
    label KQLearningHJBCP, Reward 68: 95.000, Len(game): 95, Training Time: 5.986s, Prediction Time: 3.645s
    label KQLearningHJBCP, Reward 69: 116.000, Len(game): 116, Training Time: 5.986s, Prediction Time: 3.702s
    label KQLearningHJBCP, Reward 70: 111.000, Len(game): 111, Training Time: 5.986s, Prediction Time: 3.757s
    label KQLearningHJBCP, Reward 71: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 3.807s
    label KQLearningHJBCP, Reward 72: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 3.857s
    label KQLearningHJBCP, Reward 73: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 3.905s
    label KQLearningHJBCP, Reward 74: 101.000, Len(game): 101, Training Time: 5.986s, Prediction Time: 3.955s
    label KQLearningHJBCP, Reward 75: 111.000, Len(game): 111, Training Time: 5.986s, Prediction Time: 4.010s
    label KQLearningHJBCP, Reward 76: 89.000, Len(game): 89, Training Time: 5.986s, Prediction Time: 4.054s
    label KQLearningHJBCP, Reward 77: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 4.100s
    label KQLearningHJBCP, Reward 78: 89.000, Len(game): 89, Training Time: 5.986s, Prediction Time: 4.145s
    label KQLearningHJBCP, Reward 79: 123.000, Len(game): 123, Training Time: 5.986s, Prediction Time: 4.206s
    label KQLearningHJBCP, Reward 80: 104.000, Len(game): 104, Training Time: 5.986s, Prediction Time: 4.259s
    label KQLearningHJBCP, Reward 81: 114.000, Len(game): 114, Training Time: 5.986s, Prediction Time: 4.316s
    label KQLearningHJBCP, Reward 82: 110.000, Len(game): 110, Training Time: 5.986s, Prediction Time: 4.370s
    label KQLearningHJBCP, Reward 83: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 4.421s
    label KQLearningHJBCP, Reward 84: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 4.470s
    label KQLearningHJBCP, Reward 85: 120.000, Len(game): 120, Training Time: 5.986s, Prediction Time: 4.532s
    label KQLearningHJBCP, Reward 86: 111.000, Len(game): 111, Training Time: 5.986s, Prediction Time: 4.587s
    label KQLearningHJBCP, Reward 87: 116.000, Len(game): 116, Training Time: 5.986s, Prediction Time: 4.644s
    label KQLearningHJBCP, Reward 88: 83.000, Len(game): 83, Training Time: 5.986s, Prediction Time: 4.685s
    label KQLearningHJBCP, Reward 89: 95.000, Len(game): 95, Training Time: 5.986s, Prediction Time: 4.732s
    label KQLearningHJBCP, Reward 90: 107.000, Len(game): 107, Training Time: 5.986s, Prediction Time: 4.785s
    label KQLearningHJBCP, Reward 91: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 4.834s
    label KQLearningHJBCP, Reward 92: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 4.883s
    label KQLearningHJBCP, Reward 93: 97.000, Len(game): 97, Training Time: 5.986s, Prediction Time: 4.931s
    label KQLearningHJBCP, Reward 94: 108.000, Len(game): 108, Training Time: 5.986s, Prediction Time: 4.984s
    label KQLearningHJBCP, Reward 95: 115.000, Len(game): 115, Training Time: 5.986s, Prediction Time: 5.041s
    label KQLearningHJBCP, Reward 96: 106.000, Len(game): 106, Training Time: 5.986s, Prediction Time: 5.094s
    label KQLearningHJBCP, Reward 97: 111.000, Len(game): 111, Training Time: 5.986s, Prediction Time: 5.149s
    label KQLearningHJBCP, Reward 98: 104.000, Len(game): 104, Training Time: 5.986s, Prediction Time: 5.202s
    label KQLearningHJBCP, Reward 99: 81.000, Len(game): 81, Training Time: 5.986s, Prediction Time: 5.244s
    Computed global error Bellman mean:  1.0343403334293693  iter:  0
    label KQLearning, Reward 0: 20.000, Len(game): 20, Training Time: 0.003s, Prediction Time: 0.000s
    Computed global error Bellman mean:  0.11291406057087777  iter:  5
    label KQLearning, Reward 1: 191.000, Len(game): 191, Training Time: 0.102s, Prediction Time: 0.028s
    Computed global error Bellman mean:  0.019494447560319525  iter:  5
    label KQLearning, Reward 2: 32.000, Len(game): 32, Training Time: 0.206s, Prediction Time: 0.036s
    Computed global error Bellman mean:  0.05866904492151179  iter:  5
    label KQLearning, Reward 3: 123.000, Len(game): 123, Training Time: 0.498s, Prediction Time: 0.069s
    Computed global error Bellman mean:  0.007963787895601543  iter:  5
    label KQLearning, Reward 4: 162.000, Len(game): 162, Training Time: 1.013s, Prediction Time: 0.128s
    Computed global error Bellman mean:  0.7506316675407901  iter:  5
    label KQLearning, Reward 5: 109.000, Len(game): 109, Training Time: 1.777s, Prediction Time: 0.168s
    Computed global error Bellman mean:  0.10648151504465554  iter:  5
    label KQLearning, Reward 6: 264.000, Len(game): 264, Training Time: 3.193s, Prediction Time: 0.276s
    label KQLearning, Reward 7: 191.000, Len(game): 191, Training Time: 3.193s, Prediction Time: 0.377s
    label KQLearning, Reward 8: 233.000, Len(game): 233, Training Time: 3.193s, Prediction Time: 0.499s
    label KQLearning, Reward 9: 247.000, Len(game): 247, Training Time: 3.193s, Prediction Time: 0.629s
    label KQLearning, Reward 10: 209.000, Len(game): 209, Training Time: 3.193s, Prediction Time: 0.738s
    label KQLearning, Reward 11: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 1.262s
    label KQLearning, Reward 12: 155.000, Len(game): 155, Training Time: 3.193s, Prediction Time: 1.343s
    label KQLearning, Reward 13: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 1.864s
    label KQLearning, Reward 14: 266.000, Len(game): 266, Training Time: 3.193s, Prediction Time: 2.003s
    label KQLearning, Reward 15: 224.000, Len(game): 224, Training Time: 3.193s, Prediction Time: 2.120s
    label KQLearning, Reward 16: 237.000, Len(game): 237, Training Time: 3.193s, Prediction Time: 2.243s
    label KQLearning, Reward 17: 65.000, Len(game): 65, Training Time: 3.193s, Prediction Time: 2.277s
    label KQLearning, Reward 18: 255.000, Len(game): 255, Training Time: 3.193s, Prediction Time: 2.410s
    label KQLearning, Reward 19: 78.000, Len(game): 78, Training Time: 3.193s, Prediction Time: 2.450s
    label KQLearning, Reward 20: 229.000, Len(game): 229, Training Time: 3.193s, Prediction Time: 2.569s
    label KQLearning, Reward 21: 228.000, Len(game): 228, Training Time: 3.193s, Prediction Time: 2.688s
    label KQLearning, Reward 22: 225.000, Len(game): 225, Training Time: 3.193s, Prediction Time: 2.806s
    label KQLearning, Reward 23: 311.000, Len(game): 311, Training Time: 3.193s, Prediction Time: 2.967s
    label KQLearning, Reward 24: 265.000, Len(game): 265, Training Time: 3.193s, Prediction Time: 3.105s
    label KQLearning, Reward 25: 209.000, Len(game): 209, Training Time: 3.193s, Prediction Time: 3.213s
    label KQLearning, Reward 26: 67.000, Len(game): 67, Training Time: 3.193s, Prediction Time: 3.247s
    label KQLearning, Reward 27: 235.000, Len(game): 235, Training Time: 3.193s, Prediction Time: 3.368s
    label KQLearning, Reward 28: 193.000, Len(game): 193, Training Time: 3.193s, Prediction Time: 3.467s
    label KQLearning, Reward 29: 215.000, Len(game): 215, Training Time: 3.193s, Prediction Time: 3.581s
    label KQLearning, Reward 30: 256.000, Len(game): 256, Training Time: 3.193s, Prediction Time: 3.717s
    label KQLearning, Reward 31: 259.000, Len(game): 259, Training Time: 3.193s, Prediction Time: 3.852s
    label KQLearning, Reward 32: 241.000, Len(game): 241, Training Time: 3.193s, Prediction Time: 3.978s
    label KQLearning, Reward 33: 271.000, Len(game): 271, Training Time: 3.193s, Prediction Time: 4.122s
    label KQLearning, Reward 34: 200.000, Len(game): 200, Training Time: 3.193s, Prediction Time: 4.227s
    label KQLearning, Reward 35: 225.000, Len(game): 225, Training Time: 3.193s, Prediction Time: 4.345s
    label KQLearning, Reward 36: 234.000, Len(game): 234, Training Time: 3.193s, Prediction Time: 4.468s
    label KQLearning, Reward 37: 189.000, Len(game): 189, Training Time: 3.193s, Prediction Time: 4.566s
    label KQLearning, Reward 38: 72.000, Len(game): 72, Training Time: 3.193s, Prediction Time: 4.604s
    label KQLearning, Reward 39: 201.000, Len(game): 201, Training Time: 3.193s, Prediction Time: 4.709s
    label KQLearning, Reward 40: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 5.232s
    label KQLearning, Reward 41: 187.000, Len(game): 187, Training Time: 3.193s, Prediction Time: 5.330s
    label KQLearning, Reward 42: 193.000, Len(game): 193, Training Time: 3.193s, Prediction Time: 5.432s
    label KQLearning, Reward 43: 239.000, Len(game): 239, Training Time: 3.193s, Prediction Time: 5.559s
    label KQLearning, Reward 44: 226.000, Len(game): 226, Training Time: 3.193s, Prediction Time: 5.676s
    label KQLearning, Reward 45: 211.000, Len(game): 211, Training Time: 3.193s, Prediction Time: 5.786s
    label KQLearning, Reward 46: 262.000, Len(game): 262, Training Time: 3.193s, Prediction Time: 5.921s
    label KQLearning, Reward 47: 225.000, Len(game): 225, Training Time: 3.193s, Prediction Time: 6.039s
    label KQLearning, Reward 48: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 6.566s
    label KQLearning, Reward 49: 203.000, Len(game): 203, Training Time: 3.193s, Prediction Time: 6.673s
    label KQLearning, Reward 50: 245.000, Len(game): 245, Training Time: 3.193s, Prediction Time: 6.802s
    label KQLearning, Reward 51: 275.000, Len(game): 275, Training Time: 3.193s, Prediction Time: 6.949s
    label KQLearning, Reward 52: 257.000, Len(game): 257, Training Time: 3.193s, Prediction Time: 7.083s
    label KQLearning, Reward 53: 77.000, Len(game): 77, Training Time: 3.193s, Prediction Time: 7.124s
    label KQLearning, Reward 54: 252.000, Len(game): 252, Training Time: 3.193s, Prediction Time: 7.256s
    label KQLearning, Reward 55: 214.000, Len(game): 214, Training Time: 3.193s, Prediction Time: 7.368s
    label KQLearning, Reward 56: 165.000, Len(game): 165, Training Time: 3.193s, Prediction Time: 7.454s
    label KQLearning, Reward 57: 231.000, Len(game): 231, Training Time: 3.193s, Prediction Time: 7.574s
    label KQLearning, Reward 58: 178.000, Len(game): 178, Training Time: 3.193s, Prediction Time: 7.665s
    label KQLearning, Reward 59: 239.000, Len(game): 239, Training Time: 3.193s, Prediction Time: 7.794s
    label KQLearning, Reward 60: 161.000, Len(game): 161, Training Time: 3.193s, Prediction Time: 7.879s
    label KQLearning, Reward 61: 93.000, Len(game): 93, Training Time: 3.193s, Prediction Time: 7.928s
    label KQLearning, Reward 62: 207.000, Len(game): 207, Training Time: 3.193s, Prediction Time: 8.034s
    label KQLearning, Reward 63: 199.000, Len(game): 199, Training Time: 3.193s, Prediction Time: 8.139s
    label KQLearning, Reward 64: 258.000, Len(game): 258, Training Time: 3.193s, Prediction Time: 8.277s
    label KQLearning, Reward 65: 168.000, Len(game): 168, Training Time: 3.193s, Prediction Time: 8.365s
    label KQLearning, Reward 66: 211.000, Len(game): 211, Training Time: 3.193s, Prediction Time: 8.477s
    label KQLearning, Reward 67: 71.000, Len(game): 71, Training Time: 3.193s, Prediction Time: 8.513s
    label KQLearning, Reward 68: 70.000, Len(game): 70, Training Time: 3.193s, Prediction Time: 8.550s
    label KQLearning, Reward 69: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 9.076s
    label KQLearning, Reward 70: 217.000, Len(game): 217, Training Time: 3.193s, Prediction Time: 9.190s
    label KQLearning, Reward 71: 262.000, Len(game): 262, Training Time: 3.193s, Prediction Time: 9.327s
    label KQLearning, Reward 72: 234.000, Len(game): 234, Training Time: 3.193s, Prediction Time: 9.449s
    label KQLearning, Reward 73: 65.000, Len(game): 65, Training Time: 3.193s, Prediction Time: 9.484s
    label KQLearning, Reward 74: 217.000, Len(game): 217, Training Time: 3.193s, Prediction Time: 9.597s
    label KQLearning, Reward 75: 238.000, Len(game): 238, Training Time: 3.193s, Prediction Time: 9.720s
    label KQLearning, Reward 76: 243.000, Len(game): 243, Training Time: 3.193s, Prediction Time: 9.848s
    label KQLearning, Reward 77: 217.000, Len(game): 217, Training Time: 3.193s, Prediction Time: 9.964s
    label KQLearning, Reward 78: 219.000, Len(game): 219, Training Time: 3.193s, Prediction Time: 10.080s
    label KQLearning, Reward 79: 252.000, Len(game): 252, Training Time: 3.193s, Prediction Time: 10.213s
    label KQLearning, Reward 80: 207.000, Len(game): 207, Training Time: 3.193s, Prediction Time: 10.322s
    label KQLearning, Reward 81: 100.000, Len(game): 100, Training Time: 3.193s, Prediction Time: 10.373s
    label KQLearning, Reward 82: 225.000, Len(game): 225, Training Time: 3.193s, Prediction Time: 10.492s
    label KQLearning, Reward 83: 153.000, Len(game): 153, Training Time: 3.193s, Prediction Time: 10.573s
    label KQLearning, Reward 84: 333.000, Len(game): 333, Training Time: 3.193s, Prediction Time: 10.749s
    label KQLearning, Reward 85: 246.000, Len(game): 246, Training Time: 3.193s, Prediction Time: 10.880s
    label KQLearning, Reward 86: 307.000, Len(game): 307, Training Time: 3.193s, Prediction Time: 11.040s
    label KQLearning, Reward 87: 245.000, Len(game): 245, Training Time: 3.193s, Prediction Time: 11.169s
    label KQLearning, Reward 88: 269.000, Len(game): 269, Training Time: 3.193s, Prediction Time: 11.309s
    label KQLearning, Reward 89: 206.000, Len(game): 206, Training Time: 3.193s, Prediction Time: 11.417s
    label KQLearning, Reward 90: 173.000, Len(game): 173, Training Time: 3.193s, Prediction Time: 11.507s
    label KQLearning, Reward 91: 165.000, Len(game): 165, Training Time: 3.193s, Prediction Time: 11.593s
    label KQLearning, Reward 92: 211.000, Len(game): 211, Training Time: 3.193s, Prediction Time: 11.701s
    label KQLearning, Reward 93: 248.000, Len(game): 248, Training Time: 3.193s, Prediction Time: 11.829s
    label KQLearning, Reward 94: 219.000, Len(game): 219, Training Time: 3.193s, Prediction Time: 11.946s
    label KQLearning, Reward 95: 200.000, Len(game): 200, Training Time: 3.193s, Prediction Time: 12.049s
    label KQLearning, Reward 96: 226.000, Len(game): 226, Training Time: 3.193s, Prediction Time: 12.169s
    label KQLearning, Reward 97: 232.000, Len(game): 232, Training Time: 3.193s, Prediction Time: 12.288s
    label KQLearning, Reward 98: 230.000, Len(game): 230, Training Time: 3.193s, Prediction Time: 12.407s
    label KQLearning, Reward 99: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 12.936s
    0
    label PPOAgent, Reward 0: 12.000, Len(game): 12, Training Time: 0.003s, Prediction Time: 0.003s
    label PPOAgent, Reward 1: 23.000, Len(game): 23, Training Time: 0.007s, Prediction Time: 0.007s
    label PPOAgent, Reward 2: 12.000, Len(game): 12, Training Time: 0.010s, Prediction Time: 0.010s
    label PPOAgent, Reward 3: 14.000, Len(game): 14, Training Time: 0.013s, Prediction Time: 0.013s
    label PPOAgent, Reward 4: 24.000, Len(game): 24, Training Time: 0.018s, Prediction Time: 0.018s
    label PPOAgent, Reward 5: 63.000, Len(game): 63, Training Time: 0.030s, Prediction Time: 0.030s
    label PPOAgent, Reward 6: 35.000, Len(game): 35, Training Time: 0.037s, Prediction Time: 0.037s
    label PPOAgent, Reward 7: 21.000, Len(game): 21, Training Time: 0.040s, Prediction Time: 0.040s
    label PPOAgent, Reward 8: 14.000, Len(game): 14, Training Time: 0.043s, Prediction Time: 0.043s
    label PPOAgent, Reward 9: 11.000, Len(game): 11, Training Time: 0.045s, Prediction Time: 0.045s
    label PPOAgent, Reward 10: 14.000, Len(game): 14, Training Time: 0.048s, Prediction Time: 0.048s
    label PPOAgent, Reward 11: 15.000, Len(game): 15, Training Time: 0.050s, Prediction Time: 0.050s
    label PPOAgent, Reward 12: 15.000, Len(game): 15, Training Time: 0.053s, Prediction Time: 0.053s
    label PPOAgent, Reward 13: 16.000, Len(game): 16, Training Time: 0.057s, Prediction Time: 0.057s
    label PPOAgent, Reward 14: 12.000, Len(game): 12, Training Time: 0.059s, Prediction Time: 0.059s
    label PPOAgent, Reward 15: 14.000, Len(game): 14, Training Time: 0.062s, Prediction Time: 0.062s
    label PPOAgent, Reward 16: 14.000, Len(game): 14, Training Time: 0.064s, Prediction Time: 0.064s
    label PPOAgent, Reward 17: 21.000, Len(game): 21, Training Time: 0.068s, Prediction Time: 0.068s
    label PPOAgent, Reward 18: 15.000, Len(game): 15, Training Time: 0.071s, Prediction Time: 0.071s
    label PPOAgent, Reward 19: 31.000, Len(game): 31, Training Time: 0.077s, Prediction Time: 0.077s
    label PPOAgent, Reward 20: 11.000, Len(game): 11, Training Time: 0.078s, Prediction Time: 0.078s
    label PPOAgent, Reward 21: 16.000, Len(game): 16, Training Time: 0.081s, Prediction Time: 0.081s
    label PPOAgent, Reward 22: 10.000, Len(game): 10, Training Time: 0.082s, Prediction Time: 0.082s
    label PPOAgent, Reward 23: 43.000, Len(game): 43, Training Time: 0.091s, Prediction Time: 0.091s
    label PPOAgent, Reward 24: 22.000, Len(game): 22, Training Time: 0.095s, Prediction Time: 0.095s
    label PPOAgent, Reward 25: 27.000, Len(game): 27, Training Time: 0.100s, Prediction Time: 0.100s
    label PPOAgent, Reward 26: 19.000, Len(game): 19, Training Time: 0.103s, Prediction Time: 0.103s
    label PPOAgent, Reward 27: 16.000, Len(game): 16, Training Time: 0.106s, Prediction Time: 0.106s
    label PPOAgent, Reward 28: 27.000, Len(game): 27, Training Time: 0.112s, Prediction Time: 0.112s
    label PPOAgent, Reward 29: 37.000, Len(game): 37, Training Time: 0.119s, Prediction Time: 0.119s
    label PPOAgent, Reward 30: 15.000, Len(game): 15, Training Time: 0.121s, Prediction Time: 0.121s
    label PPOAgent, Reward 31: 18.000, Len(game): 18, Training Time: 0.124s, Prediction Time: 0.124s
    label PPOAgent, Reward 32: 14.000, Len(game): 14, Training Time: 0.127s, Prediction Time: 0.127s
    label PPOAgent, Reward 33: 22.000, Len(game): 22, Training Time: 0.130s, Prediction Time: 0.130s
    label PPOAgent, Reward 34: 8.000, Len(game): 8, Training Time: 0.133s, Prediction Time: 0.133s
    label PPOAgent, Reward 35: 44.000, Len(game): 44, Training Time: 0.140s, Prediction Time: 0.140s
    label PPOAgent, Reward 36: 13.000, Len(game): 13, Training Time: 0.143s, Prediction Time: 0.143s
    label PPOAgent, Reward 37: 17.000, Len(game): 17, Training Time: 0.146s, Prediction Time: 0.146s
    label PPOAgent, Reward 38: 20.000, Len(game): 20, Training Time: 0.150s, Prediction Time: 0.150s
    label PPOAgent, Reward 39: 53.000, Len(game): 53, Training Time: 0.160s, Prediction Time: 0.160s
    label PPOAgent, Reward 40: 22.000, Len(game): 22, Training Time: 0.164s, Prediction Time: 0.164s
    label PPOAgent, Reward 41: 47.000, Len(game): 47, Training Time: 0.174s, Prediction Time: 0.174s
    label PPOAgent, Reward 42: 36.000, Len(game): 36, Training Time: 0.181s, Prediction Time: 0.181s
    label PPOAgent, Reward 43: 19.000, Len(game): 19, Training Time: 0.184s, Prediction Time: 0.184s
    label PPOAgent, Reward 44: 11.000, Len(game): 11, Training Time: 0.186s, Prediction Time: 0.186s
    label PPOAgent, Reward 45: 12.000, Len(game): 12, Training Time: 0.189s, Prediction Time: 0.189s
    label PPOAgent, Reward 46: 38.000, Len(game): 38, Training Time: 0.196s, Prediction Time: 0.196s
    label PPOAgent, Reward 47: 37.000, Len(game): 37, Training Time: 0.203s, Prediction Time: 0.203s
    label PPOAgent, Reward 48: 17.000, Len(game): 17, Training Time: 0.205s, Prediction Time: 0.205s
    label PPOAgent, Reward 49: 46.000, Len(game): 46, Training Time: 0.212s, Prediction Time: 0.212s
    label PPOAgent, Reward 50: 16.000, Len(game): 16, Training Time: 0.215s, Prediction Time: 0.215s
    label PPOAgent, Reward 51: 24.000, Len(game): 24, Training Time: 0.219s, Prediction Time: 0.219s
    label PPOAgent, Reward 52: 18.000, Len(game): 18, Training Time: 0.221s, Prediction Time: 0.221s
    label PPOAgent, Reward 53: 25.000, Len(game): 25, Training Time: 0.265s, Prediction Time: 0.265s
    label PPOAgent, Reward 54: 16.000, Len(game): 16, Training Time: 0.268s, Prediction Time: 0.268s
    label PPOAgent, Reward 55: 37.000, Len(game): 37, Training Time: 0.275s, Prediction Time: 0.275s
    label PPOAgent, Reward 56: 20.000, Len(game): 20, Training Time: 0.279s, Prediction Time: 0.279s
    label PPOAgent, Reward 57: 28.000, Len(game): 28, Training Time: 0.284s, Prediction Time: 0.284s
    label PPOAgent, Reward 58: 15.000, Len(game): 15, Training Time: 0.287s, Prediction Time: 0.287s
    label PPOAgent, Reward 59: 11.000, Len(game): 11, Training Time: 0.289s, Prediction Time: 0.289s
    label PPOAgent, Reward 60: 17.000, Len(game): 17, Training Time: 0.293s, Prediction Time: 0.293s
    label PPOAgent, Reward 61: 13.000, Len(game): 13, Training Time: 0.295s, Prediction Time: 0.295s
    label PPOAgent, Reward 62: 24.000, Len(game): 24, Training Time: 0.299s, Prediction Time: 0.299s
    label PPOAgent, Reward 63: 15.000, Len(game): 15, Training Time: 0.301s, Prediction Time: 0.301s
    label PPOAgent, Reward 64: 14.000, Len(game): 14, Training Time: 0.303s, Prediction Time: 0.303s
    label PPOAgent, Reward 65: 10.000, Len(game): 10, Training Time: 0.304s, Prediction Time: 0.304s
    label PPOAgent, Reward 66: 15.000, Len(game): 15, Training Time: 0.308s, Prediction Time: 0.308s
    label PPOAgent, Reward 67: 9.000, Len(game): 9, Training Time: 0.309s, Prediction Time: 0.309s
    label PPOAgent, Reward 68: 16.000, Len(game): 16, Training Time: 0.312s, Prediction Time: 0.312s
    label PPOAgent, Reward 69: 18.000, Len(game): 18, Training Time: 0.315s, Prediction Time: 0.315s
    label PPOAgent, Reward 70: 15.000, Len(game): 15, Training Time: 0.319s, Prediction Time: 0.319s
    label PPOAgent, Reward 71: 24.000, Len(game): 24, Training Time: 0.325s, Prediction Time: 0.325s
    label PPOAgent, Reward 72: 51.000, Len(game): 51, Training Time: 0.335s, Prediction Time: 0.335s
    label PPOAgent, Reward 73: 14.000, Len(game): 14, Training Time: 0.338s, Prediction Time: 0.338s
    label PPOAgent, Reward 74: 32.000, Len(game): 32, Training Time: 0.344s, Prediction Time: 0.344s
    label PPOAgent, Reward 75: 16.000, Len(game): 16, Training Time: 0.347s, Prediction Time: 0.347s
    label PPOAgent, Reward 76: 89.000, Len(game): 89, Training Time: 0.366s, Prediction Time: 0.366s
    label PPOAgent, Reward 77: 17.000, Len(game): 17, Training Time: 0.369s, Prediction Time: 0.369s
    label PPOAgent, Reward 78: 21.000, Len(game): 21, Training Time: 0.373s, Prediction Time: 0.373s
    label PPOAgent, Reward 79: 17.000, Len(game): 17, Training Time: 0.376s, Prediction Time: 0.376s
    label PPOAgent, Reward 80: 26.000, Len(game): 26, Training Time: 0.381s, Prediction Time: 0.381s
    label PPOAgent, Reward 81: 31.000, Len(game): 31, Training Time: 0.388s, Prediction Time: 0.388s
    label PPOAgent, Reward 82: 20.000, Len(game): 20, Training Time: 0.391s, Prediction Time: 0.391s
    label PPOAgent, Reward 83: 14.000, Len(game): 14, Training Time: 0.394s, Prediction Time: 0.394s
    label PPOAgent, Reward 84: 13.000, Len(game): 13, Training Time: 0.397s, Prediction Time: 0.397s
    label PPOAgent, Reward 85: 15.000, Len(game): 15, Training Time: 0.400s, Prediction Time: 0.400s
    label PPOAgent, Reward 86: 16.000, Len(game): 16, Training Time: 0.403s, Prediction Time: 0.403s
    label PPOAgent, Reward 87: 19.000, Len(game): 19, Training Time: 0.407s, Prediction Time: 0.407s
    label PPOAgent, Reward 88: 13.000, Len(game): 13, Training Time: 0.409s, Prediction Time: 0.409s
    label PPOAgent, Reward 89: 12.000, Len(game): 12, Training Time: 0.412s, Prediction Time: 0.412s
    label PPOAgent, Reward 90: 20.000, Len(game): 20, Training Time: 0.415s, Prediction Time: 0.415s
    label PPOAgent, Reward 91: 20.000, Len(game): 20, Training Time: 0.420s, Prediction Time: 0.420s
    label PPOAgent, Reward 92: 56.000, Len(game): 56, Training Time: 0.431s, Prediction Time: 0.431s
    label PPOAgent, Reward 93: 11.000, Len(game): 11, Training Time: 0.434s, Prediction Time: 0.434s
    label PPOAgent, Reward 94: 26.000, Len(game): 26, Training Time: 0.438s, Prediction Time: 0.438s
    label PPOAgent, Reward 95: 11.000, Len(game): 11, Training Time: 0.440s, Prediction Time: 0.440s
    label PPOAgent, Reward 96: 21.000, Len(game): 21, Training Time: 0.444s, Prediction Time: 0.444s
    label PPOAgent, Reward 97: 36.000, Len(game): 36, Training Time: 0.451s, Prediction Time: 0.451s
    label PPOAgent, Reward 98: 26.000, Len(game): 26, Training Time: 0.455s, Prediction Time: 0.455s
    label PPOAgent, Reward 99: 15.000, Len(game): 15, Training Time: 0.458s, Prediction Time: 0.458s
    label PolicyGradient, Reward 0: 10.000, Len(game): 10, Training Time: 0.004s, Prediction Time: 0.000s
    label PolicyGradient, Reward 1: 13.000, Len(game): 13, Training Time: 0.007s, Prediction Time: 0.003s
    label PolicyGradient, Reward 2: 13.000, Len(game): 13, Training Time: 0.011s, Prediction Time: 0.006s
    label PolicyGradient, Reward 3: 27.000, Len(game): 27, Training Time: 0.017s, Prediction Time: 0.011s
    label PolicyGradient, Reward 4: 10.000, Len(game): 10, Training Time: 0.023s, Prediction Time: 0.013s
    label PolicyGradient, Reward 5: 15.000, Len(game): 15, Training Time: 0.032s, Prediction Time: 0.016s
    label PolicyGradient, Reward 6: 12.000, Len(game): 12, Training Time: 0.040s, Prediction Time: 0.018s
    label PolicyGradient, Reward 7: 12.000, Len(game): 12, Training Time: 0.047s, Prediction Time: 0.021s
    label PolicyGradient, Reward 8: 17.000, Len(game): 17, Training Time: 0.056s, Prediction Time: 0.026s
    label PolicyGradient, Reward 9: 17.000, Len(game): 17, Training Time: 0.065s, Prediction Time: 0.031s
    label PolicyGradient, Reward 10: 15.000, Len(game): 15, Training Time: 0.075s, Prediction Time: 0.036s
    label PolicyGradient, Reward 11: 23.000, Len(game): 23, Training Time: 0.087s, Prediction Time: 0.043s
    label PolicyGradient, Reward 12: 59.000, Len(game): 59, Training Time: 0.103s, Prediction Time: 0.058s
    label PolicyGradient, Reward 13: 37.000, Len(game): 37, Training Time: 0.124s, Prediction Time: 0.070s
    label PolicyGradient, Reward 14: 21.000, Len(game): 21, Training Time: 0.146s, Prediction Time: 0.079s
    label PolicyGradient, Reward 15: 42.000, Len(game): 42, Training Time: 0.171s, Prediction Time: 0.095s
    label PolicyGradient, Reward 16: 15.000, Len(game): 15, Training Time: 0.197s, Prediction Time: 0.106s
    label PolicyGradient, Reward 17: 19.000, Len(game): 19, Training Time: 0.230s, Prediction Time: 0.118s
    label PolicyGradient, Reward 18: 63.000, Len(game): 63, Training Time: 0.274s, Prediction Time: 0.148s
    label PolicyGradient, Reward 19: 30.000, Len(game): 30, Training Time: 0.324s, Prediction Time: 0.169s
    label PolicyGradient, Reward 20: 74.000, Len(game): 74, Training Time: 0.392s, Prediction Time: 0.206s
    label PolicyGradient, Reward 21: 28.000, Len(game): 28, Training Time: 0.457s, Prediction Time: 0.235s
    label PolicyGradient, Reward 22: 96.000, Len(game): 96, Training Time: 0.547s, Prediction Time: 0.292s
    label PolicyGradient, Reward 23: 27.000, Len(game): 27, Training Time: 0.633s, Prediction Time: 0.330s
    label PolicyGradient, Reward 24: 16.000, Len(game): 16, Training Time: 0.731s, Prediction Time: 0.367s
    label PolicyGradient, Reward 25: 33.000, Len(game): 33, Training Time: 0.831s, Prediction Time: 0.414s
    label PolicyGradient, Reward 26: 13.000, Len(game): 13, Training Time: 0.942s, Prediction Time: 0.459s
    label PolicyGradient, Reward 27: 47.000, Len(game): 47, Training Time: 1.070s, Prediction Time: 0.521s
    label PolicyGradient, Reward 28: 92.000, Len(game): 92, Training Time: 1.212s, Prediction Time: 0.606s
    label PolicyGradient, Reward 29: 34.000, Len(game): 34, Training Time: 1.364s, Prediction Time: 0.670s
    label PolicyGradient, Reward 30: 128.000, Len(game): 128, Training Time: 1.567s, Prediction Time: 0.790s
    label PolicyGradient, Reward 31: 49.000, Len(game): 49, Training Time: 1.766s, Prediction Time: 0.890s
    label PolicyGradient, Reward 32: 124.000, Len(game): 124, Training Time: 2.011s, Prediction Time: 1.034s
    label PolicyGradient, Reward 33: 45.000, Len(game): 45, Training Time: 2.287s, Prediction Time: 1.158s
    label PolicyGradient, Reward 34: 124.000, Len(game): 124, Training Time: 2.597s, Prediction Time: 1.329s
    label PolicyGradient, Reward 35: 25.000, Len(game): 25, Training Time: 2.924s, Prediction Time: 1.462s
    label PolicyGradient, Reward 36: 17.000, Len(game): 17, Training Time: 3.234s, Prediction Time: 1.612s
    label PolicyGradient, Reward 37: 38.000, Len(game): 38, Training Time: 3.234s, Prediction Time: 1.772s
    label PolicyGradient, Reward 38: 48.000, Len(game): 48, Training Time: 3.234s, Prediction Time: 1.801s
    label PolicyGradient, Reward 39: 47.000, Len(game): 47, Training Time: 3.234s, Prediction Time: 1.832s
    label PolicyGradient, Reward 40: 22.000, Len(game): 22, Training Time: 3.234s, Prediction Time: 1.845s
    label PolicyGradient, Reward 41: 25.000, Len(game): 25, Training Time: 3.234s, Prediction Time: 1.861s
    label PolicyGradient, Reward 42: 38.000, Len(game): 38, Training Time: 3.234s, Prediction Time: 1.884s
    label PolicyGradient, Reward 43: 36.000, Len(game): 36, Training Time: 3.234s, Prediction Time: 1.907s
    label PolicyGradient, Reward 44: 23.000, Len(game): 23, Training Time: 3.234s, Prediction Time: 1.924s
    label PolicyGradient, Reward 45: 23.000, Len(game): 23, Training Time: 3.234s, Prediction Time: 1.938s
    label PolicyGradient, Reward 46: 36.000, Len(game): 36, Training Time: 3.234s, Prediction Time: 1.962s
    label PolicyGradient, Reward 47: 33.000, Len(game): 33, Training Time: 3.234s, Prediction Time: 1.981s
    label PolicyGradient, Reward 48: 32.000, Len(game): 32, Training Time: 3.234s, Prediction Time: 2.001s
    label PolicyGradient, Reward 49: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.020s
    label PolicyGradient, Reward 50: 50.000, Len(game): 50, Training Time: 3.234s, Prediction Time: 2.051s
    label PolicyGradient, Reward 51: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.072s
    label PolicyGradient, Reward 52: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.097s
    label PolicyGradient, Reward 53: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.117s
    label PolicyGradient, Reward 54: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.138s
    label PolicyGradient, Reward 55: 28.000, Len(game): 28, Training Time: 3.234s, Prediction Time: 2.156s
    label PolicyGradient, Reward 56: 33.000, Len(game): 33, Training Time: 3.234s, Prediction Time: 2.176s
    label PolicyGradient, Reward 57: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.197s
    label PolicyGradient, Reward 58: 25.000, Len(game): 25, Training Time: 3.234s, Prediction Time: 2.213s
    label PolicyGradient, Reward 59: 25.000, Len(game): 25, Training Time: 3.234s, Prediction Time: 2.228s
    label PolicyGradient, Reward 60: 29.000, Len(game): 29, Training Time: 3.234s, Prediction Time: 2.246s
    label PolicyGradient, Reward 61: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.266s
    label PolicyGradient, Reward 62: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.286s
    label PolicyGradient, Reward 63: 38.000, Len(game): 38, Training Time: 3.234s, Prediction Time: 2.311s
    label PolicyGradient, Reward 64: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.336s
    label PolicyGradient, Reward 65: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.359s
    label PolicyGradient, Reward 66: 22.000, Len(game): 22, Training Time: 3.234s, Prediction Time: 2.373s
    label PolicyGradient, Reward 67: 17.000, Len(game): 17, Training Time: 3.234s, Prediction Time: 2.383s
    label PolicyGradient, Reward 68: 16.000, Len(game): 16, Training Time: 3.234s, Prediction Time: 2.393s
    label PolicyGradient, Reward 69: 21.000, Len(game): 21, Training Time: 3.234s, Prediction Time: 2.406s
    label PolicyGradient, Reward 70: 41.000, Len(game): 41, Training Time: 3.234s, Prediction Time: 2.432s
    label PolicyGradient, Reward 71: 23.000, Len(game): 23, Training Time: 3.234s, Prediction Time: 2.445s
    label PolicyGradient, Reward 72: 41.000, Len(game): 41, Training Time: 3.234s, Prediction Time: 2.471s
    label PolicyGradient, Reward 73: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.490s
    label PolicyGradient, Reward 74: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.510s
    label PolicyGradient, Reward 75: 30.000, Len(game): 30, Training Time: 3.234s, Prediction Time: 2.530s
    label PolicyGradient, Reward 76: 36.000, Len(game): 36, Training Time: 3.234s, Prediction Time: 2.552s
    label PolicyGradient, Reward 77: 26.000, Len(game): 26, Training Time: 3.234s, Prediction Time: 2.569s
    label PolicyGradient, Reward 78: 30.000, Len(game): 30, Training Time: 3.234s, Prediction Time: 2.587s
    label PolicyGradient, Reward 79: 26.000, Len(game): 26, Training Time: 3.234s, Prediction Time: 2.603s
    label PolicyGradient, Reward 80: 59.000, Len(game): 59, Training Time: 3.234s, Prediction Time: 2.639s
    label PolicyGradient, Reward 81: 42.000, Len(game): 42, Training Time: 3.234s, Prediction Time: 2.664s
    label PolicyGradient, Reward 82: 33.000, Len(game): 33, Training Time: 3.234s, Prediction Time: 2.686s
    label PolicyGradient, Reward 83: 37.000, Len(game): 37, Training Time: 3.234s, Prediction Time: 2.708s
    label PolicyGradient, Reward 84: 44.000, Len(game): 44, Training Time: 3.234s, Prediction Time: 2.735s
    label PolicyGradient, Reward 85: 35.000, Len(game): 35, Training Time: 3.234s, Prediction Time: 2.757s
    label PolicyGradient, Reward 86: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.781s
    label PolicyGradient, Reward 87: 38.000, Len(game): 38, Training Time: 3.234s, Prediction Time: 2.805s
    label PolicyGradient, Reward 88: 20.000, Len(game): 20, Training Time: 3.234s, Prediction Time: 2.818s
    label PolicyGradient, Reward 89: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.840s
    label PolicyGradient, Reward 90: 40.000, Len(game): 40, Training Time: 3.234s, Prediction Time: 2.866s
    label PolicyGradient, Reward 91: 47.000, Len(game): 47, Training Time: 3.234s, Prediction Time: 2.896s
    label PolicyGradient, Reward 92: 27.000, Len(game): 27, Training Time: 3.234s, Prediction Time: 2.912s
    label PolicyGradient, Reward 93: 42.000, Len(game): 42, Training Time: 3.234s, Prediction Time: 2.938s
    label PolicyGradient, Reward 94: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.962s
    label PolicyGradient, Reward 95: 26.000, Len(game): 26, Training Time: 3.234s, Prediction Time: 2.978s
    label PolicyGradient, Reward 96: 35.000, Len(game): 35, Training Time: 3.234s, Prediction Time: 3.000s
    label PolicyGradient, Reward 97: 23.000, Len(game): 23, Training Time: 3.234s, Prediction Time: 3.014s
    label PolicyGradient, Reward 98: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 3.035s
    label PolicyGradient, Reward 99: 35.000, Len(game): 35, Training Time: 3.234s, Prediction Time: 3.057s
    label Controller-based, Reward 0: 138.000, Len(game): 138, Training Time: 0.003s, Prediction Time: 0.001s
    label Controller-based, Reward 1: 230.000, Len(game): 230, Training Time: 0.006s, Prediction Time: 0.003s
    label Controller-based, Reward 2: 9.000, Len(game): 9, Training Time: 0.018s, Prediction Time: 0.003s
    label Controller-based, Reward 3: 148.000, Len(game): 148, Training Time: 0.028s, Prediction Time: 0.005s
    label Controller-based, Reward 4: 9.000, Len(game): 9, Training Time: 0.037s, Prediction Time: 0.005s
    label Controller-based, Reward 5: 263.000, Len(game): 263, Training Time: 0.049s, Prediction Time: 0.006s
    label Controller-based, Reward 6: 206.000, Len(game): 206, Training Time: 0.061s, Prediction Time: 0.008s
    label Controller-based, Reward 7: 249.000, Len(game): 249, Training Time: 0.073s, Prediction Time: 0.010s
    label Controller-based, Reward 8: 281.000, Len(game): 281, Training Time: 0.086s, Prediction Time: 0.012s
    label Controller-based, Reward 9: 153.000, Len(game): 153, Training Time: 0.097s, Prediction Time: 0.013s
    no training
    label Controller-based, Reward 10: 1000.000, Len(game): 1000, Training Time: 0.097s, Prediction Time: 0.021s
    label Controller-based, Reward 11: 750.000, Len(game): 750, Training Time: 0.114s, Prediction Time: 0.028s
    no training
    label Controller-based, Reward 12: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.037s
    no training
    label Controller-based, Reward 13: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.045s
    no training
    label Controller-based, Reward 14: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.053s
    no training
    label Controller-based, Reward 15: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.061s
    no training
    label Controller-based, Reward 16: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.070s
    no training
    label Controller-based, Reward 17: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.078s
    no training
    label Controller-based, Reward 18: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.086s
    no training
    label Controller-based, Reward 19: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.095s
    no training
    label Controller-based, Reward 20: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.103s
    no training
    label Controller-based, Reward 21: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.113s
    no training
    label Controller-based, Reward 22: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.121s
    no training
    label Controller-based, Reward 23: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.128s
    no training
    label Controller-based, Reward 24: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.137s
    no training
    label Controller-based, Reward 25: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.146s
    no training
    label Controller-based, Reward 26: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.154s
    no training
    label Controller-based, Reward 27: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.161s
    no training
    label Controller-based, Reward 28: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.170s
    no training
    label Controller-based, Reward 29: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.179s
    no training
    label Controller-based, Reward 30: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.187s
    no training
    label Controller-based, Reward 31: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.195s
    no training
    label Controller-based, Reward 32: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.203s
    no training
    label Controller-based, Reward 33: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.210s
    no training
    label Controller-based, Reward 34: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.217s
    no training
    label Controller-based, Reward 35: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.225s
    no training
    label Controller-based, Reward 36: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.234s
    no training
    label Controller-based, Reward 37: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.242s
    no training
    label Controller-based, Reward 38: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.250s
    no training
    label Controller-based, Reward 39: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.259s
    no training
    label Controller-based, Reward 40: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.267s
    no training
    label Controller-based, Reward 41: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.275s
    no training
    label Controller-based, Reward 42: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.284s
    no training
    label Controller-based, Reward 43: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.292s
    no training
    label Controller-based, Reward 44: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.301s
    no training
    label Controller-based, Reward 45: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.308s
    no training
    label Controller-based, Reward 46: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.317s
    no training
    label Controller-based, Reward 47: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.326s
    no training
    label Controller-based, Reward 48: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.334s
    no training
    label Controller-based, Reward 49: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.342s
    no training
    label Controller-based, Reward 50: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.351s
    no training
    label Controller-based, Reward 51: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.358s
    no training
    label Controller-based, Reward 52: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.366s
    no training
    label Controller-based, Reward 53: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.374s
    no training
    label Controller-based, Reward 54: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.382s
    no training
    label Controller-based, Reward 55: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.391s
    no training
    label Controller-based, Reward 56: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.399s
    no training
    label Controller-based, Reward 57: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.407s
    no training
    label Controller-based, Reward 58: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.415s
    no training
    label Controller-based, Reward 59: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.423s
    no training
    label Controller-based, Reward 60: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.431s
    no training
    label Controller-based, Reward 61: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.439s
    no training
    label Controller-based, Reward 62: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.447s
    no training
    label Controller-based, Reward 63: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.455s
    no training
    label Controller-based, Reward 64: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.463s
    no training
    label Controller-based, Reward 65: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.471s
    no training
    label Controller-based, Reward 66: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.480s
    no training
    label Controller-based, Reward 67: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.488s
    no training
    label Controller-based, Reward 68: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.496s
    no training
    label Controller-based, Reward 69: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.505s
    no training
    label Controller-based, Reward 70: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.513s
    no training
    label Controller-based, Reward 71: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.522s
    no training
    label Controller-based, Reward 72: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.530s
    no training
    label Controller-based, Reward 73: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.538s
    no training
    label Controller-based, Reward 74: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.546s
    no training
    label Controller-based, Reward 75: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.555s
    no training
    label Controller-based, Reward 76: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.563s
    no training
    label Controller-based, Reward 77: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.572s
    no training
    label Controller-based, Reward 78: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.581s
    no training
    label Controller-based, Reward 79: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.589s
    no training
    label Controller-based, Reward 80: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.597s
    no training
    label Controller-based, Reward 81: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.605s
    no training
    label Controller-based, Reward 82: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.613s
    no training
    label Controller-based, Reward 83: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.621s
    no training
    label Controller-based, Reward 84: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.630s
    no training
    label Controller-based, Reward 85: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.638s
    no training
    label Controller-based, Reward 86: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.647s
    no training
    label Controller-based, Reward 87: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.655s
    no training
    label Controller-based, Reward 88: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.664s
    no training
    label Controller-based, Reward 89: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.671s
    no training
    label Controller-based, Reward 90: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.681s
    no training
    label Controller-based, Reward 91: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.689s
    no training
    label Controller-based, Reward 92: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.698s
    no training
    label Controller-based, Reward 93: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.707s
    no training
    label Controller-based, Reward 94: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.715s
    no training
    label Controller-based, Reward 95: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.722s
    no training
    label Controller-based, Reward 96: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.731s
    no training
    label Controller-based, Reward 97: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.740s
    no training
    label Controller-based, Reward 98: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.749s
    no training
    label Controller-based, Reward 99: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.756s
    label KACAgent, Reward 0: 14.000, Len(game): 14, Training Time: 0.004s, Prediction Time: 0.000s
    label KACAgent, Reward 1: 48.000, Len(game): 48, Training Time: 0.008s, Prediction Time: 0.009s
    label KACAgent, Reward 2: 15.000, Len(game): 15, Training Time: 0.015s, Prediction Time: 0.012s
    label KACAgent, Reward 3: 70.000, Len(game): 70, Training Time: 0.024s, Prediction Time: 0.026s
    label KACAgent, Reward 4: 60.000, Len(game): 60, Training Time: 0.038s, Prediction Time: 0.040s
    label KACAgent, Reward 5: 105.000, Len(game): 105, Training Time: 0.063s, Prediction Time: 0.064s
    label KACAgent, Reward 6: 136.000, Len(game): 136, Training Time: 0.106s, Prediction Time: 0.105s
    label KACAgent, Reward 7: 71.000, Len(game): 71, Training Time: 0.158s, Prediction Time: 0.139s
    label KACAgent, Reward 8: 97.000, Len(game): 97, Training Time: 0.226s, Prediction Time: 0.190s
    label KACAgent, Reward 9: 191.000, Len(game): 191, Training Time: 0.337s, Prediction Time: 0.283s
    label KACAgent, Reward 10: 78.000, Len(game): 78, Training Time: 0.465s, Prediction Time: 0.363s
    label KACAgent, Reward 11: 142.000, Len(game): 142, Training Time: 0.629s, Prediction Time: 0.475s
    label KACAgent, Reward 12: 149.000, Len(game): 149, Training Time: 0.833s, Prediction Time: 0.633s
    label KACAgent, Reward 13: 362.000, Len(game): 362, Training Time: 1.147s, Prediction Time: 0.915s
    label KACAgent, Reward 14: 209.000, Len(game): 209, Training Time: 1.565s, Prediction Time: 1.223s
    label KACAgent, Reward 15: 286.000, Len(game): 286, Training Time: 2.114s, Prediction Time: 1.648s
    label KACAgent, Reward 16: 297.000, Len(game): 297, Training Time: 2.809s, Prediction Time: 2.164s
    label KACAgent, Reward 17: 366.000, Len(game): 366, Training Time: 3.681s, Prediction Time: 2.861s
    label KACAgent, Reward 18: 273.000, Len(game): 273, Training Time: 3.681s, Prediction Time: 3.597s
    label KACAgent, Reward 19: 384.000, Len(game): 384, Training Time: 3.681s, Prediction Time: 3.916s
    label KACAgent, Reward 20: 268.000, Len(game): 268, Training Time: 3.681s, Prediction Time: 4.126s
    label KACAgent, Reward 21: 345.000, Len(game): 345, Training Time: 3.681s, Prediction Time: 4.396s
    label KACAgent, Reward 22: 311.000, Len(game): 311, Training Time: 3.681s, Prediction Time: 4.638s
    label KACAgent, Reward 23: 369.000, Len(game): 369, Training Time: 3.681s, Prediction Time: 4.926s
    label KACAgent, Reward 24: 357.000, Len(game): 357, Training Time: 3.681s, Prediction Time: 5.203s
    label KACAgent, Reward 25: 391.000, Len(game): 391, Training Time: 3.681s, Prediction Time: 5.510s
    label KACAgent, Reward 26: 340.000, Len(game): 340, Training Time: 3.681s, Prediction Time: 5.775s
    label KACAgent, Reward 27: 337.000, Len(game): 337, Training Time: 3.681s, Prediction Time: 6.037s
    label KACAgent, Reward 28: 354.000, Len(game): 354, Training Time: 3.681s, Prediction Time: 6.316s
    label KACAgent, Reward 29: 290.000, Len(game): 290, Training Time: 3.681s, Prediction Time: 6.541s
    label KACAgent, Reward 30: 112.000, Len(game): 112, Training Time: 3.681s, Prediction Time: 6.627s
    label KACAgent, Reward 31: 390.000, Len(game): 390, Training Time: 3.681s, Prediction Time: 6.930s
    label KACAgent, Reward 32: 287.000, Len(game): 287, Training Time: 3.681s, Prediction Time: 7.156s
    label KACAgent, Reward 33: 294.000, Len(game): 294, Training Time: 3.681s, Prediction Time: 7.383s
    label KACAgent, Reward 34: 287.000, Len(game): 287, Training Time: 3.681s, Prediction Time: 7.608s
    label KACAgent, Reward 35: 341.000, Len(game): 341, Training Time: 3.681s, Prediction Time: 7.880s
    label KACAgent, Reward 36: 356.000, Len(game): 356, Training Time: 3.681s, Prediction Time: 8.162s
    label KACAgent, Reward 37: 302.000, Len(game): 302, Training Time: 3.681s, Prediction Time: 8.394s
    label KACAgent, Reward 38: 434.000, Len(game): 434, Training Time: 3.681s, Prediction Time: 8.737s
    label KACAgent, Reward 39: 285.000, Len(game): 285, Training Time: 3.681s, Prediction Time: 8.961s
    label KACAgent, Reward 40: 356.000, Len(game): 356, Training Time: 3.681s, Prediction Time: 9.239s
    label KACAgent, Reward 41: 309.000, Len(game): 309, Training Time: 3.681s, Prediction Time: 9.480s
    label KACAgent, Reward 42: 384.000, Len(game): 384, Training Time: 3.681s, Prediction Time: 9.777s
    label KACAgent, Reward 43: 439.000, Len(game): 439, Training Time: 3.681s, Prediction Time: 10.119s
    label KACAgent, Reward 44: 414.000, Len(game): 414, Training Time: 3.681s, Prediction Time: 10.441s
    label KACAgent, Reward 45: 379.000, Len(game): 379, Training Time: 3.681s, Prediction Time: 10.737s
    label KACAgent, Reward 46: 283.000, Len(game): 283, Training Time: 3.681s, Prediction Time: 10.956s
    label KACAgent, Reward 47: 315.000, Len(game): 315, Training Time: 3.681s, Prediction Time: 11.203s
    label KACAgent, Reward 48: 266.000, Len(game): 266, Training Time: 3.681s, Prediction Time: 11.410s
    label KACAgent, Reward 49: 306.000, Len(game): 306, Training Time: 3.681s, Prediction Time: 11.649s
    label KACAgent, Reward 50: 378.000, Len(game): 378, Training Time: 3.681s, Prediction Time: 11.946s
    label KACAgent, Reward 51: 196.000, Len(game): 196, Training Time: 3.681s, Prediction Time: 12.098s
    label KACAgent, Reward 52: 508.000, Len(game): 508, Training Time: 3.681s, Prediction Time: 12.494s
    label KACAgent, Reward 53: 312.000, Len(game): 312, Training Time: 3.681s, Prediction Time: 12.735s
    label KACAgent, Reward 54: 393.000, Len(game): 393, Training Time: 3.681s, Prediction Time: 13.039s
    label KACAgent, Reward 55: 276.000, Len(game): 276, Training Time: 3.681s, Prediction Time: 13.255s
    label KACAgent, Reward 56: 405.000, Len(game): 405, Training Time: 3.681s, Prediction Time: 13.572s
    label KACAgent, Reward 57: 461.000, Len(game): 461, Training Time: 3.681s, Prediction Time: 13.929s
    label KACAgent, Reward 58: 294.000, Len(game): 294, Training Time: 3.681s, Prediction Time: 14.161s
    label KACAgent, Reward 59: 331.000, Len(game): 331, Training Time: 3.681s, Prediction Time: 14.420s
    label KACAgent, Reward 60: 314.000, Len(game): 314, Training Time: 3.681s, Prediction Time: 14.664s
    label KACAgent, Reward 61: 281.000, Len(game): 281, Training Time: 3.681s, Prediction Time: 14.883s
    label KACAgent, Reward 62: 300.000, Len(game): 300, Training Time: 3.681s, Prediction Time: 15.116s
    label KACAgent, Reward 63: 282.000, Len(game): 282, Training Time: 3.681s, Prediction Time: 15.335s
    label KACAgent, Reward 64: 304.000, Len(game): 304, Training Time: 3.681s, Prediction Time: 15.571s
    label KACAgent, Reward 65: 296.000, Len(game): 296, Training Time: 3.681s, Prediction Time: 15.801s
    label KACAgent, Reward 66: 346.000, Len(game): 346, Training Time: 3.681s, Prediction Time: 16.071s
    label KACAgent, Reward 67: 318.000, Len(game): 318, Training Time: 3.681s, Prediction Time: 16.318s
    label KACAgent, Reward 68: 319.000, Len(game): 319, Training Time: 3.681s, Prediction Time: 16.565s
    label KACAgent, Reward 69: 312.000, Len(game): 312, Training Time: 3.681s, Prediction Time: 16.810s
    label KACAgent, Reward 70: 186.000, Len(game): 186, Training Time: 3.681s, Prediction Time: 16.955s
    label KACAgent, Reward 71: 362.000, Len(game): 362, Training Time: 3.681s, Prediction Time: 17.237s
    label KACAgent, Reward 72: 433.000, Len(game): 433, Training Time: 3.681s, Prediction Time: 17.573s
    label KACAgent, Reward 73: 285.000, Len(game): 285, Training Time: 3.681s, Prediction Time: 17.795s
    label KACAgent, Reward 74: 332.000, Len(game): 332, Training Time: 3.681s, Prediction Time: 18.053s
    label KACAgent, Reward 75: 346.000, Len(game): 346, Training Time: 3.681s, Prediction Time: 18.322s
    label KACAgent, Reward 76: 364.000, Len(game): 364, Training Time: 3.681s, Prediction Time: 18.609s
    label KACAgent, Reward 77: 652.000, Len(game): 652, Training Time: 3.681s, Prediction Time: 19.121s
    label KACAgent, Reward 78: 376.000, Len(game): 376, Training Time: 3.681s, Prediction Time: 19.421s
    label KACAgent, Reward 79: 620.000, Len(game): 620, Training Time: 3.681s, Prediction Time: 19.907s
    label KACAgent, Reward 80: 338.000, Len(game): 338, Training Time: 3.681s, Prediction Time: 20.175s
    label KACAgent, Reward 81: 319.000, Len(game): 319, Training Time: 3.681s, Prediction Time: 20.432s
    label KACAgent, Reward 82: 332.000, Len(game): 332, Training Time: 3.681s, Prediction Time: 20.694s
    label KACAgent, Reward 83: 302.000, Len(game): 302, Training Time: 3.681s, Prediction Time: 20.934s
    label KACAgent, Reward 84: 427.000, Len(game): 427, Training Time: 3.681s, Prediction Time: 21.268s
    label KACAgent, Reward 85: 298.000, Len(game): 298, Training Time: 3.681s, Prediction Time: 21.501s
    label KACAgent, Reward 86: 308.000, Len(game): 308, Training Time: 3.681s, Prediction Time: 21.749s
    label KACAgent, Reward 87: 263.000, Len(game): 263, Training Time: 3.681s, Prediction Time: 21.957s
    label KACAgent, Reward 88: 316.000, Len(game): 316, Training Time: 3.681s, Prediction Time: 22.208s
    label KACAgent, Reward 89: 373.000, Len(game): 373, Training Time: 3.681s, Prediction Time: 22.514s
    label KACAgent, Reward 90: 322.000, Len(game): 322, Training Time: 3.681s, Prediction Time: 22.769s
    label KACAgent, Reward 91: 337.000, Len(game): 337, Training Time: 3.681s, Prediction Time: 23.036s
    label KACAgent, Reward 92: 328.000, Len(game): 328, Training Time: 3.681s, Prediction Time: 23.293s
    label KACAgent, Reward 93: 472.000, Len(game): 472, Training Time: 3.681s, Prediction Time: 23.669s
    label KACAgent, Reward 94: 372.000, Len(game): 372, Training Time: 3.681s, Prediction Time: 23.969s
    label KACAgent, Reward 95: 293.000, Len(game): 293, Training Time: 3.681s, Prediction Time: 24.202s
    label KACAgent, Reward 96: 386.000, Len(game): 386, Training Time: 3.681s, Prediction Time: 24.509s
    label KACAgent, Reward 97: 323.000, Len(game): 323, Training Time: 3.681s, Prediction Time: 24.765s
    label KACAgent, Reward 98: 364.000, Len(game): 364, Training Time: 3.681s, Prediction Time: 25.065s
    label KACAgent, Reward 99: 334.000, Len(game): 334, Training Time: 3.681s, Prediction Time: 25.328s
    label DQNAgent, Reward 0: 31.000, Len(game): 31, Training Time: 0.001s, Prediction Time: 0.000s
    label DQNAgent, Reward 1: 37.000, Len(game): 37, Training Time: 0.007s, Prediction Time: 0.000s
    label DQNAgent, Reward 2: 29.000, Len(game): 29, Training Time: 0.043s, Prediction Time: 0.000s
    label DQNAgent, Reward 3: 17.000, Len(game): 17, Training Time: 0.061s, Prediction Time: 0.000s
    label DQNAgent, Reward 4: 16.000, Len(game): 16, Training Time: 0.078s, Prediction Time: 0.001s
    label DQNAgent, Reward 5: 11.000, Len(game): 11, Training Time: 0.091s, Prediction Time: 0.001s
    label DQNAgent, Reward 6: 23.000, Len(game): 23, Training Time: 0.118s, Prediction Time: 0.001s
    label DQNAgent, Reward 7: 12.000, Len(game): 12, Training Time: 0.132s, Prediction Time: 0.001s
    label DQNAgent, Reward 8: 19.000, Len(game): 19, Training Time: 0.155s, Prediction Time: 0.002s
    label DQNAgent, Reward 9: 13.000, Len(game): 13, Training Time: 0.172s, Prediction Time: 0.003s
    label DQNAgent, Reward 10: 14.000, Len(game): 14, Training Time: 0.191s, Prediction Time: 0.003s
    label DQNAgent, Reward 11: 21.000, Len(game): 21, Training Time: 0.213s, Prediction Time: 0.004s
    label DQNAgent, Reward 12: 9.000, Len(game): 9, Training Time: 0.223s, Prediction Time: 0.005s
    label DQNAgent, Reward 13: 11.000, Len(game): 11, Training Time: 0.235s, Prediction Time: 0.006s
    label DQNAgent, Reward 14: 18.000, Len(game): 18, Training Time: 0.256s, Prediction Time: 0.006s
    label DQNAgent, Reward 15: 9.000, Len(game): 9, Training Time: 0.266s, Prediction Time: 0.007s
    label DQNAgent, Reward 16: 14.000, Len(game): 14, Training Time: 0.281s, Prediction Time: 0.007s
    label DQNAgent, Reward 17: 13.000, Len(game): 13, Training Time: 0.295s, Prediction Time: 0.008s
    label DQNAgent, Reward 18: 13.000, Len(game): 13, Training Time: 0.310s, Prediction Time: 0.008s
    label DQNAgent, Reward 19: 9.000, Len(game): 9, Training Time: 0.320s, Prediction Time: 0.008s
    label DQNAgent, Reward 20: 10.000, Len(game): 10, Training Time: 0.331s, Prediction Time: 0.009s
    label DQNAgent, Reward 21: 26.000, Len(game): 26, Training Time: 0.359s, Prediction Time: 0.011s
    label DQNAgent, Reward 22: 16.000, Len(game): 16, Training Time: 0.378s, Prediction Time: 0.011s
    label DQNAgent, Reward 23: 28.000, Len(game): 28, Training Time: 0.409s, Prediction Time: 0.011s
    label DQNAgent, Reward 24: 11.000, Len(game): 11, Training Time: 0.422s, Prediction Time: 0.012s
    label DQNAgent, Reward 25: 18.000, Len(game): 18, Training Time: 0.442s, Prediction Time: 0.013s
    label DQNAgent, Reward 26: 27.000, Len(game): 27, Training Time: 0.475s, Prediction Time: 0.013s
    label DQNAgent, Reward 27: 16.000, Len(game): 16, Training Time: 0.492s, Prediction Time: 0.013s
    label DQNAgent, Reward 28: 11.000, Len(game): 11, Training Time: 0.505s, Prediction Time: 0.013s
    label DQNAgent, Reward 29: 11.000, Len(game): 11, Training Time: 0.516s, Prediction Time: 0.014s
    label DQNAgent, Reward 30: 19.000, Len(game): 19, Training Time: 0.540s, Prediction Time: 0.015s
    label DQNAgent, Reward 31: 16.000, Len(game): 16, Training Time: 0.562s, Prediction Time: 0.016s
    label DQNAgent, Reward 32: 18.000, Len(game): 18, Training Time: 0.582s, Prediction Time: 0.017s
    label DQNAgent, Reward 33: 12.000, Len(game): 12, Training Time: 0.594s, Prediction Time: 0.017s
    label DQNAgent, Reward 34: 15.000, Len(game): 15, Training Time: 0.610s, Prediction Time: 0.018s
    label DQNAgent, Reward 35: 33.000, Len(game): 33, Training Time: 0.646s, Prediction Time: 0.019s
    label DQNAgent, Reward 36: 34.000, Len(game): 34, Training Time: 0.685s, Prediction Time: 0.020s
    label DQNAgent, Reward 37: 25.000, Len(game): 25, Training Time: 0.712s, Prediction Time: 0.020s
    label DQNAgent, Reward 38: 11.000, Len(game): 11, Training Time: 0.724s, Prediction Time: 0.022s
    label DQNAgent, Reward 39: 20.000, Len(game): 20, Training Time: 0.746s, Prediction Time: 0.023s
    label DQNAgent, Reward 40: 13.000, Len(game): 13, Training Time: 0.760s, Prediction Time: 0.023s
    label DQNAgent, Reward 41: 38.000, Len(game): 38, Training Time: 0.802s, Prediction Time: 0.024s
    label DQNAgent, Reward 42: 20.000, Len(game): 20, Training Time: 0.825s, Prediction Time: 0.025s
    label DQNAgent, Reward 43: 14.000, Len(game): 14, Training Time: 0.840s, Prediction Time: 0.025s
    label DQNAgent, Reward 44: 28.000, Len(game): 28, Training Time: 0.871s, Prediction Time: 0.026s
    label DQNAgent, Reward 45: 24.000, Len(game): 24, Training Time: 0.899s, Prediction Time: 0.027s
    label DQNAgent, Reward 46: 33.000, Len(game): 33, Training Time: 0.936s, Prediction Time: 0.028s
    label DQNAgent, Reward 47: 19.000, Len(game): 19, Training Time: 0.957s, Prediction Time: 0.029s
    label DQNAgent, Reward 48: 23.000, Len(game): 23, Training Time: 0.981s, Prediction Time: 0.029s
    label DQNAgent, Reward 49: 36.000, Len(game): 36, Training Time: 1.020s, Prediction Time: 0.030s
    label DQNAgent, Reward 50: 111.000, Len(game): 111, Training Time: 1.144s, Prediction Time: 0.035s
    label DQNAgent, Reward 51: 176.000, Len(game): 176, Training Time: 1.343s, Prediction Time: 0.041s
    label DQNAgent, Reward 52: 202.000, Len(game): 202, Training Time: 1.569s, Prediction Time: 0.048s
    label DQNAgent, Reward 53: 151.000, Len(game): 151, Training Time: 1.736s, Prediction Time: 0.053s
    label DQNAgent, Reward 54: 163.000, Len(game): 163, Training Time: 1.936s, Prediction Time: 0.060s
    label DQNAgent, Reward 55: 178.000, Len(game): 178, Training Time: 2.147s, Prediction Time: 0.067s
    label DQNAgent, Reward 56: 443.000, Len(game): 443, Training Time: 2.657s, Prediction Time: 0.084s
    label DQNAgent, Reward 57: 234.000, Len(game): 234, Training Time: 2.931s, Prediction Time: 0.094s
    label DQNAgent, Reward 58: 179.000, Len(game): 179, Training Time: 3.131s, Prediction Time: 0.102s
    label DQNAgent, Reward 59: 202.000, Len(game): 202, Training Time: 3.131s, Prediction Time: 0.110s
    label DQNAgent, Reward 60: 325.000, Len(game): 325, Training Time: 3.131s, Prediction Time: 0.123s
    label DQNAgent, Reward 61: 221.000, Len(game): 221, Training Time: 3.131s, Prediction Time: 0.132s
    label DQNAgent, Reward 62: 212.000, Len(game): 212, Training Time: 3.131s, Prediction Time: 0.140s
    label DQNAgent, Reward 63: 186.000, Len(game): 186, Training Time: 3.131s, Prediction Time: 0.147s
    label DQNAgent, Reward 64: 285.000, Len(game): 285, Training Time: 3.131s, Prediction Time: 0.160s
    label DQNAgent, Reward 65: 342.000, Len(game): 342, Training Time: 3.131s, Prediction Time: 0.173s
    label DQNAgent, Reward 66: 205.000, Len(game): 205, Training Time: 3.131s, Prediction Time: 0.182s
    label DQNAgent, Reward 67: 150.000, Len(game): 150, Training Time: 3.131s, Prediction Time: 0.188s
    label DQNAgent, Reward 68: 215.000, Len(game): 215, Training Time: 3.131s, Prediction Time: 0.198s
    label DQNAgent, Reward 69: 163.000, Len(game): 163, Training Time: 3.131s, Prediction Time: 0.204s
    label DQNAgent, Reward 70: 252.000, Len(game): 252, Training Time: 3.131s, Prediction Time: 0.214s
    label DQNAgent, Reward 71: 316.000, Len(game): 316, Training Time: 3.131s, Prediction Time: 0.228s
    label DQNAgent, Reward 72: 218.000, Len(game): 218, Training Time: 3.131s, Prediction Time: 0.237s
    label DQNAgent, Reward 73: 225.000, Len(game): 225, Training Time: 3.131s, Prediction Time: 0.246s
    label DQNAgent, Reward 74: 305.000, Len(game): 305, Training Time: 3.131s, Prediction Time: 0.258s
    label DQNAgent, Reward 75: 254.000, Len(game): 254, Training Time: 3.131s, Prediction Time: 0.269s
    label DQNAgent, Reward 76: 185.000, Len(game): 185, Training Time: 3.131s, Prediction Time: 0.277s
    label DQNAgent, Reward 77: 262.000, Len(game): 262, Training Time: 3.131s, Prediction Time: 0.287s
    label DQNAgent, Reward 78: 212.000, Len(game): 212, Training Time: 3.131s, Prediction Time: 0.296s
    label DQNAgent, Reward 79: 334.000, Len(game): 334, Training Time: 3.131s, Prediction Time: 0.310s
    label DQNAgent, Reward 80: 236.000, Len(game): 236, Training Time: 3.131s, Prediction Time: 0.321s
    label DQNAgent, Reward 81: 185.000, Len(game): 185, Training Time: 3.131s, Prediction Time: 0.329s
    label DQNAgent, Reward 82: 166.000, Len(game): 166, Training Time: 3.131s, Prediction Time: 0.335s
    label DQNAgent, Reward 83: 325.000, Len(game): 325, Training Time: 3.131s, Prediction Time: 0.348s
    label DQNAgent, Reward 84: 140.000, Len(game): 140, Training Time: 3.131s, Prediction Time: 0.353s
    label DQNAgent, Reward 85: 346.000, Len(game): 346, Training Time: 3.131s, Prediction Time: 0.367s
    label DQNAgent, Reward 86: 246.000, Len(game): 246, Training Time: 3.131s, Prediction Time: 0.377s
    label DQNAgent, Reward 87: 202.000, Len(game): 202, Training Time: 3.131s, Prediction Time: 0.385s
    label DQNAgent, Reward 88: 262.000, Len(game): 262, Training Time: 3.131s, Prediction Time: 0.395s
    label DQNAgent, Reward 89: 223.000, Len(game): 223, Training Time: 3.131s, Prediction Time: 0.404s
    label DQNAgent, Reward 90: 188.000, Len(game): 188, Training Time: 3.131s, Prediction Time: 0.412s
    label DQNAgent, Reward 91: 173.000, Len(game): 173, Training Time: 3.131s, Prediction Time: 0.419s
    label DQNAgent, Reward 92: 167.000, Len(game): 167, Training Time: 3.131s, Prediction Time: 0.426s
    label DQNAgent, Reward 93: 242.000, Len(game): 242, Training Time: 3.131s, Prediction Time: 0.435s
    label DQNAgent, Reward 94: 310.000, Len(game): 310, Training Time: 3.131s, Prediction Time: 0.448s
    label DQNAgent, Reward 95: 320.000, Len(game): 320, Training Time: 3.131s, Prediction Time: 0.461s
    label DQNAgent, Reward 96: 145.000, Len(game): 145, Training Time: 3.131s, Prediction Time: 0.467s
    label DQNAgent, Reward 97: 156.000, Len(game): 156, Training Time: 3.131s, Prediction Time: 0.472s
    label DQNAgent, Reward 98: 178.000, Len(game): 178, Training Time: 3.131s, Prediction Time: 0.480s
    label DQNAgent, Reward 99: 217.000, Len(game): 217, Training Time: 3.131s, Prediction Time: 0.488s
    Computed global error Bellman mean:  6.83823226615183e-08  iter:  3
    label KQLearningHJBCP, Reward 0: 35.000, Len(game): 35, Training Time: 0.012s, Prediction Time: 0.000s
    Computed global error Bellman mean:  6.153183776724802e-08  iter:  4
    label KQLearningHJBCP, Reward 1: 40.000, Len(game): 40, Training Time: 0.036s, Prediction Time: 0.007s
    Computed global error Bellman mean:  6.570681726583113e-08  iter:  6
    label KQLearningHJBCP, Reward 2: 47.000, Len(game): 47, Training Time: 0.099s, Prediction Time: 0.015s
    no training
    label KQLearningHJBCP, Reward 3: 1000.000, Len(game): 1000, Training Time: 0.099s, Prediction Time: 0.211s
    Computed global error Bellman mean:  1.207237322271833e-07  iter:  9
    label KQLearningHJBCP, Reward 4: 529.000, Len(game): 529, Training Time: 2.215s, Prediction Time: 0.318s
    no training
    label KQLearningHJBCP, Reward 5: 1000.000, Len(game): 1000, Training Time: 2.215s, Prediction Time: 0.748s
    Computed global error Bellman mean:  1.9297541858819368e-07  iter:  8
    label KQLearningHJBCP, Reward 6: 779.000, Len(game): 779, Training Time: 10.511s, Prediction Time: 1.100s
    label KQLearningHJBCP, Reward 7: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 1.790s
    label KQLearningHJBCP, Reward 8: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 2.465s
    label KQLearningHJBCP, Reward 9: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 3.151s
    label KQLearningHJBCP, Reward 10: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 3.831s
    label KQLearningHJBCP, Reward 11: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 4.517s
    label KQLearningHJBCP, Reward 12: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 5.202s
    label KQLearningHJBCP, Reward 13: 163.000, Len(game): 163, Training Time: 10.511s, Prediction Time: 5.314s
    label KQLearningHJBCP, Reward 14: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 5.998s
    label KQLearningHJBCP, Reward 15: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 6.679s
    label KQLearningHJBCP, Reward 16: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 7.358s
    label KQLearningHJBCP, Reward 17: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 8.046s
    label KQLearningHJBCP, Reward 18: 173.000, Len(game): 173, Training Time: 10.511s, Prediction Time: 8.163s
    label KQLearningHJBCP, Reward 19: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 8.845s
    label KQLearningHJBCP, Reward 20: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 9.524s
    label KQLearningHJBCP, Reward 21: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 10.206s
    label KQLearningHJBCP, Reward 22: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 10.891s
    label KQLearningHJBCP, Reward 23: 382.000, Len(game): 382, Training Time: 10.511s, Prediction Time: 11.151s
    label KQLearningHJBCP, Reward 24: 459.000, Len(game): 459, Training Time: 10.511s, Prediction Time: 11.466s
    label KQLearningHJBCP, Reward 25: 264.000, Len(game): 264, Training Time: 10.511s, Prediction Time: 11.646s
    label KQLearningHJBCP, Reward 26: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 12.332s
    label KQLearningHJBCP, Reward 27: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 13.027s
    label KQLearningHJBCP, Reward 28: 515.000, Len(game): 515, Training Time: 10.511s, Prediction Time: 13.374s
    label KQLearningHJBCP, Reward 29: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 14.055s
    label KQLearningHJBCP, Reward 30: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 14.744s
    label KQLearningHJBCP, Reward 31: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 15.427s
    label KQLearningHJBCP, Reward 32: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 16.106s
    label KQLearningHJBCP, Reward 33: 240.000, Len(game): 240, Training Time: 10.511s, Prediction Time: 16.271s
    label KQLearningHJBCP, Reward 34: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 16.953s
    label KQLearningHJBCP, Reward 35: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 17.631s
    label KQLearningHJBCP, Reward 36: 273.000, Len(game): 273, Training Time: 10.511s, Prediction Time: 17.814s
    label KQLearningHJBCP, Reward 37: 688.000, Len(game): 688, Training Time: 10.511s, Prediction Time: 18.282s
    label KQLearningHJBCP, Reward 38: 861.000, Len(game): 861, Training Time: 10.511s, Prediction Time: 18.862s
    label KQLearningHJBCP, Reward 39: 242.000, Len(game): 242, Training Time: 10.511s, Prediction Time: 19.025s
    label KQLearningHJBCP, Reward 40: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 19.712s
    label KQLearningHJBCP, Reward 41: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 20.401s
    label KQLearningHJBCP, Reward 42: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 21.084s
    label KQLearningHJBCP, Reward 43: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 21.775s
    label KQLearningHJBCP, Reward 44: 985.000, Len(game): 985, Training Time: 10.511s, Prediction Time: 22.457s
    label KQLearningHJBCP, Reward 45: 530.000, Len(game): 530, Training Time: 10.511s, Prediction Time: 22.820s
    label KQLearningHJBCP, Reward 46: 228.000, Len(game): 228, Training Time: 10.511s, Prediction Time: 22.977s
    label KQLearningHJBCP, Reward 47: 405.000, Len(game): 405, Training Time: 10.511s, Prediction Time: 23.254s
    label KQLearningHJBCP, Reward 48: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 23.943s
    label KQLearningHJBCP, Reward 49: 366.000, Len(game): 366, Training Time: 10.511s, Prediction Time: 24.198s
    label KQLearningHJBCP, Reward 50: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 24.887s
    label KQLearningHJBCP, Reward 51: 208.000, Len(game): 208, Training Time: 10.511s, Prediction Time: 25.035s
    label KQLearningHJBCP, Reward 52: 497.000, Len(game): 497, Training Time: 10.511s, Prediction Time: 25.380s
    label KQLearningHJBCP, Reward 53: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 26.065s
    label KQLearningHJBCP, Reward 54: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 26.751s
    label KQLearningHJBCP, Reward 55: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 27.442s
    label KQLearningHJBCP, Reward 56: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 28.128s
    label KQLearningHJBCP, Reward 57: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 28.812s
    label KQLearningHJBCP, Reward 58: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 29.494s
    label KQLearningHJBCP, Reward 59: 288.000, Len(game): 288, Training Time: 10.511s, Prediction Time: 29.694s
    label KQLearningHJBCP, Reward 60: 244.000, Len(game): 244, Training Time: 10.511s, Prediction Time: 29.859s
    label KQLearningHJBCP, Reward 61: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 30.550s
    label KQLearningHJBCP, Reward 62: 224.000, Len(game): 224, Training Time: 10.511s, Prediction Time: 30.705s
    label KQLearningHJBCP, Reward 63: 259.000, Len(game): 259, Training Time: 10.511s, Prediction Time: 30.885s
    label KQLearningHJBCP, Reward 64: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 31.574s
    label KQLearningHJBCP, Reward 65: 285.000, Len(game): 285, Training Time: 10.511s, Prediction Time: 31.774s
    label KQLearningHJBCP, Reward 66: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 32.483s
    label KQLearningHJBCP, Reward 67: 268.000, Len(game): 268, Training Time: 10.511s, Prediction Time: 32.672s
    label KQLearningHJBCP, Reward 68: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 33.366s
    label KQLearningHJBCP, Reward 69: 237.000, Len(game): 237, Training Time: 10.511s, Prediction Time: 33.533s
    label KQLearningHJBCP, Reward 70: 252.000, Len(game): 252, Training Time: 10.511s, Prediction Time: 33.706s
    label KQLearningHJBCP, Reward 71: 751.000, Len(game): 751, Training Time: 10.511s, Prediction Time: 34.228s
    label KQLearningHJBCP, Reward 72: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 34.919s
    label KQLearningHJBCP, Reward 73: 170.000, Len(game): 170, Training Time: 10.511s, Prediction Time: 35.037s
    label KQLearningHJBCP, Reward 74: 336.000, Len(game): 336, Training Time: 10.511s, Prediction Time: 35.267s
    label KQLearningHJBCP, Reward 75: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 35.953s
    label KQLearningHJBCP, Reward 76: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 36.639s
    label KQLearningHJBCP, Reward 77: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 37.328s
    label KQLearningHJBCP, Reward 78: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 38.012s
    label KQLearningHJBCP, Reward 79: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 38.710s
    label KQLearningHJBCP, Reward 80: 204.000, Len(game): 204, Training Time: 10.511s, Prediction Time: 38.853s
    label KQLearningHJBCP, Reward 81: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 39.536s
    label KQLearningHJBCP, Reward 82: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 40.223s
    label KQLearningHJBCP, Reward 83: 757.000, Len(game): 757, Training Time: 10.511s, Prediction Time: 40.738s
    label KQLearningHJBCP, Reward 84: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 41.428s
    label KQLearningHJBCP, Reward 85: 204.000, Len(game): 204, Training Time: 10.511s, Prediction Time: 41.567s
    label KQLearningHJBCP, Reward 86: 281.000, Len(game): 281, Training Time: 10.511s, Prediction Time: 41.761s
    label KQLearningHJBCP, Reward 87: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 42.444s
    label KQLearningHJBCP, Reward 88: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 43.123s
    label KQLearningHJBCP, Reward 89: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 43.802s
    label KQLearningHJBCP, Reward 90: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 44.488s
    label KQLearningHJBCP, Reward 91: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 45.172s
    label KQLearningHJBCP, Reward 92: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 45.858s
    label KQLearningHJBCP, Reward 93: 177.000, Len(game): 177, Training Time: 10.511s, Prediction Time: 45.978s
    label KQLearningHJBCP, Reward 94: 563.000, Len(game): 563, Training Time: 10.511s, Prediction Time: 46.358s
    label KQLearningHJBCP, Reward 95: 382.000, Len(game): 382, Training Time: 10.511s, Prediction Time: 46.617s
    label KQLearningHJBCP, Reward 96: 704.000, Len(game): 704, Training Time: 10.511s, Prediction Time: 47.097s
    label KQLearningHJBCP, Reward 97: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 47.782s
    label KQLearningHJBCP, Reward 98: 539.000, Len(game): 539, Training Time: 10.511s, Prediction Time: 48.150s
    label KQLearningHJBCP, Reward 99: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 48.828s
    Computed global error Bellman mean:  3.9266451670430215e-08  iter:  2
    label KQLearning, Reward 0: 18.000, Len(game): 18, Training Time: 0.005s, Prediction Time: 0.000s
    Computed global error Bellman mean:  0.3964756234060676  iter:  5
    label KQLearning, Reward 1: 51.000, Len(game): 51, Training Time: 0.022s, Prediction Time: 0.008s
    Computed global error Bellman mean:  0.4553239146341465  iter:  5
    label KQLearning, Reward 2: 35.000, Len(game): 35, Training Time: 0.052s, Prediction Time: 0.014s
    Computed global error Bellman mean:  3.059006677079503e-07  iter:  4
    label KQLearning, Reward 3: 53.000, Len(game): 53, Training Time: 0.089s, Prediction Time: 0.024s
    Computed global error Bellman mean:  0.02300332025657364  iter:  5
    label KQLearning, Reward 4: 63.000, Len(game): 63, Training Time: 0.171s, Prediction Time: 0.036s
    Computed global error Bellman mean:  0.12679985761031448  iter:  5
    label KQLearning, Reward 5: 45.000, Len(game): 45, Training Time: 0.319s, Prediction Time: 0.048s
    Computed global error Bellman mean:  0.052994993189221073  iter:  5
    label KQLearning, Reward 6: 31.000, Len(game): 31, Training Time: 0.488s, Prediction Time: 0.058s
    Computed global error Bellman mean:  0.02126114540487673  iter:  5
    label KQLearning, Reward 7: 50.000, Len(game): 50, Training Time: 0.717s, Prediction Time: 0.072s
    Computed global error Bellman mean:  0.0009045258385653113  iter:  5
    label KQLearning, Reward 8: 59.000, Len(game): 59, Training Time: 1.065s, Prediction Time: 0.092s
    Computed global error Bellman mean:  0.028279770743522326  iter:  5
    label KQLearning, Reward 9: 84.000, Len(game): 84, Training Time: 1.535s, Prediction Time: 0.123s
    Computed global error Bellman mean:  0.0038382381461582412  iter:  5
    label KQLearning, Reward 10: 51.000, Len(game): 51, Training Time: 2.153s, Prediction Time: 0.143s
    Computed global error Bellman mean:  0.015333520935118947  iter:  5
    label KQLearning, Reward 11: 88.000, Len(game): 88, Training Time: 2.922s, Prediction Time: 0.177s
    Computed global error Bellman mean:  0.0067286687677481725  iter:  5
    label KQLearning, Reward 12: 74.000, Len(game): 74, Training Time: 3.841s, Prediction Time: 0.205s
    label KQLearning, Reward 13: 91.000, Len(game): 91, Training Time: 3.841s, Prediction Time: 0.244s
    label KQLearning, Reward 14: 84.000, Len(game): 84, Training Time: 3.841s, Prediction Time: 0.279s
    label KQLearning, Reward 15: 153.000, Len(game): 153, Training Time: 3.841s, Prediction Time: 0.351s
    label KQLearning, Reward 16: 67.000, Len(game): 67, Training Time: 3.841s, Prediction Time: 0.382s
    label KQLearning, Reward 17: 99.000, Len(game): 99, Training Time: 3.841s, Prediction Time: 0.427s
    label KQLearning, Reward 18: 105.000, Len(game): 105, Training Time: 3.841s, Prediction Time: 0.477s
    label KQLearning, Reward 19: 67.000, Len(game): 67, Training Time: 3.841s, Prediction Time: 0.507s
    label KQLearning, Reward 20: 81.000, Len(game): 81, Training Time: 3.841s, Prediction Time: 0.544s
    label KQLearning, Reward 21: 70.000, Len(game): 70, Training Time: 3.841s, Prediction Time: 0.576s
    label KQLearning, Reward 22: 87.000, Len(game): 87, Training Time: 3.841s, Prediction Time: 0.615s
    label KQLearning, Reward 23: 136.000, Len(game): 136, Training Time: 3.841s, Prediction Time: 0.678s
    label KQLearning, Reward 24: 106.000, Len(game): 106, Training Time: 3.841s, Prediction Time: 0.727s
    label KQLearning, Reward 25: 111.000, Len(game): 111, Training Time: 3.841s, Prediction Time: 0.777s
    label KQLearning, Reward 26: 73.000, Len(game): 73, Training Time: 3.841s, Prediction Time: 0.811s
    label KQLearning, Reward 27: 352.000, Len(game): 352, Training Time: 3.841s, Prediction Time: 0.974s
    label KQLearning, Reward 28: 83.000, Len(game): 83, Training Time: 3.841s, Prediction Time: 1.014s
    label KQLearning, Reward 29: 75.000, Len(game): 75, Training Time: 3.841s, Prediction Time: 1.051s
    label KQLearning, Reward 30: 65.000, Len(game): 65, Training Time: 3.841s, Prediction Time: 1.082s
    label KQLearning, Reward 31: 1000.000, Len(game): 1000, Training Time: 3.841s, Prediction Time: 1.543s
    label KQLearning, Reward 32: 84.000, Len(game): 84, Training Time: 3.841s, Prediction Time: 1.581s
    label KQLearning, Reward 33: 103.000, Len(game): 103, Training Time: 3.841s, Prediction Time: 1.628s
    label KQLearning, Reward 34: 72.000, Len(game): 72, Training Time: 3.841s, Prediction Time: 1.660s
    label KQLearning, Reward 35: 79.000, Len(game): 79, Training Time: 3.841s, Prediction Time: 1.695s
    label KQLearning, Reward 36: 88.000, Len(game): 88, Training Time: 3.841s, Prediction Time: 1.734s
    label KQLearning, Reward 37: 84.000, Len(game): 84, Training Time: 3.841s, Prediction Time: 1.772s
    label KQLearning, Reward 38: 60.000, Len(game): 60, Training Time: 3.841s, Prediction Time: 1.798s
    label KQLearning, Reward 39: 64.000, Len(game): 64, Training Time: 3.841s, Prediction Time: 1.827s
    label KQLearning, Reward 40: 74.000, Len(game): 74, Training Time: 3.841s, Prediction Time: 1.861s
    label KQLearning, Reward 41: 74.000, Len(game): 74, Training Time: 3.841s, Prediction Time: 1.894s
    label KQLearning, Reward 42: 72.000, Len(game): 72, Training Time: 3.841s, Prediction Time: 1.927s
    label KQLearning, Reward 43: 83.000, Len(game): 83, Training Time: 3.841s, Prediction Time: 1.965s
    label KQLearning, Reward 44: 91.000, Len(game): 91, Training Time: 3.841s, Prediction Time: 2.005s
    label KQLearning, Reward 45: 75.000, Len(game): 75, Training Time: 3.841s, Prediction Time: 2.039s
    label KQLearning, Reward 46: 83.000, Len(game): 83, Training Time: 3.841s, Prediction Time: 2.076s
    label KQLearning, Reward 47: 101.000, Len(game): 101, Training Time: 3.841s, Prediction Time: 2.122s
    label KQLearning, Reward 48: 75.000, Len(game): 75, Training Time: 3.841s, Prediction Time: 2.156s
    label KQLearning, Reward 49: 85.000, Len(game): 85, Training Time: 3.841s, Prediction Time: 2.196s
    label KQLearning, Reward 50: 85.000, Len(game): 85, Training Time: 3.841s, Prediction Time: 2.235s
    label KQLearning, Reward 51: 67.000, Len(game): 67, Training Time: 3.841s, Prediction Time: 2.265s
    label KQLearning, Reward 52: 187.000, Len(game): 187, Training Time: 3.841s, Prediction Time: 2.354s
    label KQLearning, Reward 53: 118.000, Len(game): 118, Training Time: 3.841s, Prediction Time: 2.408s
    label KQLearning, Reward 54: 86.000, Len(game): 86, Training Time: 3.841s, Prediction Time: 2.451s
    label KQLearning, Reward 55: 64.000, Len(game): 64, Training Time: 3.841s, Prediction Time: 2.480s
    label KQLearning, Reward 56: 93.000, Len(game): 93, Training Time: 3.841s, Prediction Time: 2.523s
    label KQLearning, Reward 57: 220.000, Len(game): 220, Training Time: 3.841s, Prediction Time: 2.625s
    label KQLearning, Reward 58: 78.000, Len(game): 78, Training Time: 3.841s, Prediction Time: 2.660s
    label KQLearning, Reward 59: 81.000, Len(game): 81, Training Time: 3.841s, Prediction Time: 2.698s
    label KQLearning, Reward 60: 113.000, Len(game): 113, Training Time: 3.841s, Prediction Time: 2.750s
    label KQLearning, Reward 61: 96.000, Len(game): 96, Training Time: 3.841s, Prediction Time: 2.796s
    label KQLearning, Reward 62: 85.000, Len(game): 85, Training Time: 3.841s, Prediction Time: 2.837s
    label KQLearning, Reward 63: 73.000, Len(game): 73, Training Time: 3.841s, Prediction Time: 2.871s
    label KQLearning, Reward 64: 73.000, Len(game): 73, Training Time: 3.841s, Prediction Time: 2.904s
    label KQLearning, Reward 65: 62.000, Len(game): 62, Training Time: 3.841s, Prediction Time: 2.932s
    label KQLearning, Reward 66: 68.000, Len(game): 68, Training Time: 3.841s, Prediction Time: 2.963s
    label KQLearning, Reward 67: 91.000, Len(game): 91, Training Time: 3.841s, Prediction Time: 3.006s
    label KQLearning, Reward 68: 105.000, Len(game): 105, Training Time: 3.841s, Prediction Time: 3.055s
    label KQLearning, Reward 69: 77.000, Len(game): 77, Training Time: 3.841s, Prediction Time: 3.090s
    label KQLearning, Reward 70: 67.000, Len(game): 67, Training Time: 3.841s, Prediction Time: 3.121s
    label KQLearning, Reward 71: 65.000, Len(game): 65, Training Time: 3.841s, Prediction Time: 3.151s
    label KQLearning, Reward 72: 151.000, Len(game): 151, Training Time: 3.841s, Prediction Time: 3.220s
    label KQLearning, Reward 73: 106.000, Len(game): 106, Training Time: 3.841s, Prediction Time: 3.269s
    label KQLearning, Reward 74: 96.000, Len(game): 96, Training Time: 3.841s, Prediction Time: 3.312s
    label KQLearning, Reward 75: 89.000, Len(game): 89, Training Time: 3.841s, Prediction Time: 3.353s
    label KQLearning, Reward 76: 110.000, Len(game): 110, Training Time: 3.841s, Prediction Time: 3.405s
    label KQLearning, Reward 77: 76.000, Len(game): 76, Training Time: 3.841s, Prediction Time: 3.440s
    label KQLearning, Reward 78: 59.000, Len(game): 59, Training Time: 3.841s, Prediction Time: 3.466s
    label KQLearning, Reward 79: 117.000, Len(game): 117, Training Time: 3.841s, Prediction Time: 3.519s
    label KQLearning, Reward 80: 81.000, Len(game): 81, Training Time: 3.841s, Prediction Time: 3.559s
    label KQLearning, Reward 81: 92.000, Len(game): 92, Training Time: 3.841s, Prediction Time: 3.600s
    label KQLearning, Reward 82: 108.000, Len(game): 108, Training Time: 3.841s, Prediction Time: 3.652s
    label KQLearning, Reward 83: 79.000, Len(game): 79, Training Time: 3.841s, Prediction Time: 3.689s
    label KQLearning, Reward 84: 93.000, Len(game): 93, Training Time: 3.841s, Prediction Time: 3.733s
    label KQLearning, Reward 85: 63.000, Len(game): 63, Training Time: 3.841s, Prediction Time: 3.761s
    label KQLearning, Reward 86: 80.000, Len(game): 80, Training Time: 3.841s, Prediction Time: 3.798s
    label KQLearning, Reward 87: 97.000, Len(game): 97, Training Time: 3.841s, Prediction Time: 3.842s
    label KQLearning, Reward 88: 114.000, Len(game): 114, Training Time: 3.841s, Prediction Time: 3.895s
    label KQLearning, Reward 89: 87.000, Len(game): 87, Training Time: 3.841s, Prediction Time: 3.935s
    label KQLearning, Reward 90: 70.000, Len(game): 70, Training Time: 3.841s, Prediction Time: 3.967s
    label KQLearning, Reward 91: 76.000, Len(game): 76, Training Time: 3.841s, Prediction Time: 4.003s
    label KQLearning, Reward 92: 86.000, Len(game): 86, Training Time: 3.841s, Prediction Time: 4.043s
    label KQLearning, Reward 93: 75.000, Len(game): 75, Training Time: 3.841s, Prediction Time: 4.077s
    label KQLearning, Reward 94: 107.000, Len(game): 107, Training Time: 3.841s, Prediction Time: 4.128s
    label KQLearning, Reward 95: 79.000, Len(game): 79, Training Time: 3.841s, Prediction Time: 4.166s
    label KQLearning, Reward 96: 104.000, Len(game): 104, Training Time: 3.841s, Prediction Time: 4.214s
    label KQLearning, Reward 97: 90.000, Len(game): 90, Training Time: 3.841s, Prediction Time: 4.256s
    label KQLearning, Reward 98: 81.000, Len(game): 81, Training Time: 3.841s, Prediction Time: 4.294s
    label KQLearning, Reward 99: 95.000, Len(game): 95, Training Time: 3.841s, Prediction Time: 4.338s
    1
    label PPOAgent, Reward 0: 21.000, Len(game): 21, Training Time: 0.004s, Prediction Time: 0.004s
    label PPOAgent, Reward 1: 53.000, Len(game): 53, Training Time: 0.014s, Prediction Time: 0.014s
    label PPOAgent, Reward 2: 11.000, Len(game): 11, Training Time: 0.016s, Prediction Time: 0.016s
    label PPOAgent, Reward 3: 36.000, Len(game): 36, Training Time: 0.023s, Prediction Time: 0.023s
    label PPOAgent, Reward 4: 14.000, Len(game): 14, Training Time: 0.025s, Prediction Time: 0.025s
    label PPOAgent, Reward 5: 20.000, Len(game): 20, Training Time: 0.029s, Prediction Time: 0.029s
    label PPOAgent, Reward 6: 22.000, Len(game): 22, Training Time: 0.033s, Prediction Time: 0.033s
    label PPOAgent, Reward 7: 57.000, Len(game): 57, Training Time: 0.044s, Prediction Time: 0.044s
    label PPOAgent, Reward 8: 17.000, Len(game): 17, Training Time: 0.047s, Prediction Time: 0.047s
    label PPOAgent, Reward 9: 19.000, Len(game): 19, Training Time: 0.050s, Prediction Time: 0.050s
    label PPOAgent, Reward 10: 9.000, Len(game): 9, Training Time: 0.053s, Prediction Time: 0.053s
    label PPOAgent, Reward 11: 16.000, Len(game): 16, Training Time: 0.055s, Prediction Time: 0.055s
    label PPOAgent, Reward 12: 19.000, Len(game): 19, Training Time: 0.058s, Prediction Time: 0.058s
    label PPOAgent, Reward 13: 12.000, Len(game): 12, Training Time: 0.061s, Prediction Time: 0.061s
    label PPOAgent, Reward 14: 39.000, Len(game): 39, Training Time: 0.069s, Prediction Time: 0.069s
    label PPOAgent, Reward 15: 16.000, Len(game): 16, Training Time: 0.071s, Prediction Time: 0.071s
    label PPOAgent, Reward 16: 19.000, Len(game): 19, Training Time: 0.075s, Prediction Time: 0.075s
    label PPOAgent, Reward 17: 17.000, Len(game): 17, Training Time: 0.078s, Prediction Time: 0.078s
    label PPOAgent, Reward 18: 31.000, Len(game): 31, Training Time: 0.084s, Prediction Time: 0.084s
    label PPOAgent, Reward 19: 17.000, Len(game): 17, Training Time: 0.087s, Prediction Time: 0.087s
    label PPOAgent, Reward 20: 13.000, Len(game): 13, Training Time: 0.090s, Prediction Time: 0.090s
    label PPOAgent, Reward 21: 18.000, Len(game): 18, Training Time: 0.094s, Prediction Time: 0.094s
    label PPOAgent, Reward 22: 20.000, Len(game): 20, Training Time: 0.097s, Prediction Time: 0.097s
    label PPOAgent, Reward 23: 31.000, Len(game): 31, Training Time: 0.103s, Prediction Time: 0.103s
    label PPOAgent, Reward 24: 16.000, Len(game): 16, Training Time: 0.106s, Prediction Time: 0.106s
    label PPOAgent, Reward 25: 41.000, Len(game): 41, Training Time: 0.114s, Prediction Time: 0.114s
    label PPOAgent, Reward 26: 12.000, Len(game): 12, Training Time: 0.116s, Prediction Time: 0.116s
    label PPOAgent, Reward 27: 29.000, Len(game): 29, Training Time: 0.121s, Prediction Time: 0.121s
    label PPOAgent, Reward 28: 31.000, Len(game): 31, Training Time: 0.128s, Prediction Time: 0.128s
    label PPOAgent, Reward 29: 27.000, Len(game): 27, Training Time: 0.133s, Prediction Time: 0.133s
    label PPOAgent, Reward 30: 18.000, Len(game): 18, Training Time: 0.136s, Prediction Time: 0.136s
    label PPOAgent, Reward 31: 16.000, Len(game): 16, Training Time: 0.137s, Prediction Time: 0.137s
    label PPOAgent, Reward 32: 33.000, Len(game): 33, Training Time: 0.142s, Prediction Time: 0.142s
    label PPOAgent, Reward 33: 41.000, Len(game): 41, Training Time: 0.150s, Prediction Time: 0.150s
    label PPOAgent, Reward 34: 22.000, Len(game): 22, Training Time: 0.154s, Prediction Time: 0.154s
    label PPOAgent, Reward 35: 13.000, Len(game): 13, Training Time: 0.157s, Prediction Time: 0.157s
    label PPOAgent, Reward 36: 19.000, Len(game): 19, Training Time: 0.160s, Prediction Time: 0.160s
    label PPOAgent, Reward 37: 15.000, Len(game): 15, Training Time: 0.162s, Prediction Time: 0.162s
    label PPOAgent, Reward 38: 30.000, Len(game): 30, Training Time: 0.168s, Prediction Time: 0.168s
    label PPOAgent, Reward 39: 49.000, Len(game): 49, Training Time: 0.176s, Prediction Time: 0.176s
    label PPOAgent, Reward 40: 21.000, Len(game): 21, Training Time: 0.181s, Prediction Time: 0.181s
    label PPOAgent, Reward 41: 21.000, Len(game): 21, Training Time: 0.186s, Prediction Time: 0.186s
    label PPOAgent, Reward 42: 49.000, Len(game): 49, Training Time: 0.194s, Prediction Time: 0.194s
    label PPOAgent, Reward 43: 23.000, Len(game): 23, Training Time: 0.199s, Prediction Time: 0.199s
    label PPOAgent, Reward 44: 48.000, Len(game): 48, Training Time: 0.208s, Prediction Time: 0.208s
    label PPOAgent, Reward 45: 54.000, Len(game): 54, Training Time: 0.217s, Prediction Time: 0.217s
    label PPOAgent, Reward 46: 55.000, Len(game): 55, Training Time: 0.268s, Prediction Time: 0.268s
    label PPOAgent, Reward 47: 22.000, Len(game): 22, Training Time: 0.273s, Prediction Time: 0.273s
    label PPOAgent, Reward 48: 34.000, Len(game): 34, Training Time: 0.280s, Prediction Time: 0.280s
    label PPOAgent, Reward 49: 21.000, Len(game): 21, Training Time: 0.285s, Prediction Time: 0.285s
    label PPOAgent, Reward 50: 27.000, Len(game): 27, Training Time: 0.290s, Prediction Time: 0.290s
    label PPOAgent, Reward 51: 38.000, Len(game): 38, Training Time: 0.298s, Prediction Time: 0.298s
    label PPOAgent, Reward 52: 41.000, Len(game): 41, Training Time: 0.306s, Prediction Time: 0.306s
    label PPOAgent, Reward 53: 11.000, Len(game): 11, Training Time: 0.308s, Prediction Time: 0.308s
    label PPOAgent, Reward 54: 17.000, Len(game): 17, Training Time: 0.311s, Prediction Time: 0.311s
    label PPOAgent, Reward 55: 32.000, Len(game): 32, Training Time: 0.317s, Prediction Time: 0.317s
    label PPOAgent, Reward 56: 45.000, Len(game): 45, Training Time: 0.328s, Prediction Time: 0.328s
    label PPOAgent, Reward 57: 27.000, Len(game): 27, Training Time: 0.332s, Prediction Time: 0.332s
    label PPOAgent, Reward 58: 21.000, Len(game): 21, Training Time: 0.338s, Prediction Time: 0.338s
    label PPOAgent, Reward 59: 16.000, Len(game): 16, Training Time: 0.341s, Prediction Time: 0.341s
    label PPOAgent, Reward 60: 14.000, Len(game): 14, Training Time: 0.344s, Prediction Time: 0.344s
    label PPOAgent, Reward 61: 25.000, Len(game): 25, Training Time: 0.348s, Prediction Time: 0.348s
    label PPOAgent, Reward 62: 15.000, Len(game): 15, Training Time: 0.352s, Prediction Time: 0.352s
    label PPOAgent, Reward 63: 16.000, Len(game): 16, Training Time: 0.355s, Prediction Time: 0.355s
    label PPOAgent, Reward 64: 24.000, Len(game): 24, Training Time: 0.360s, Prediction Time: 0.360s
    label PPOAgent, Reward 65: 25.000, Len(game): 25, Training Time: 0.365s, Prediction Time: 0.365s
    label PPOAgent, Reward 66: 12.000, Len(game): 12, Training Time: 0.367s, Prediction Time: 0.367s
    label PPOAgent, Reward 67: 20.000, Len(game): 20, Training Time: 0.370s, Prediction Time: 0.370s
    label PPOAgent, Reward 68: 28.000, Len(game): 28, Training Time: 0.375s, Prediction Time: 0.375s
    label PPOAgent, Reward 69: 41.000, Len(game): 41, Training Time: 0.383s, Prediction Time: 0.383s
    label PPOAgent, Reward 70: 23.000, Len(game): 23, Training Time: 0.388s, Prediction Time: 0.388s
    label PPOAgent, Reward 71: 18.000, Len(game): 18, Training Time: 0.391s, Prediction Time: 0.391s
    label PPOAgent, Reward 72: 18.000, Len(game): 18, Training Time: 0.395s, Prediction Time: 0.395s
    label PPOAgent, Reward 73: 28.000, Len(game): 28, Training Time: 0.400s, Prediction Time: 0.400s
    label PPOAgent, Reward 74: 21.000, Len(game): 21, Training Time: 0.404s, Prediction Time: 0.404s
    label PPOAgent, Reward 75: 23.000, Len(game): 23, Training Time: 0.408s, Prediction Time: 0.408s
    label PPOAgent, Reward 76: 25.000, Len(game): 25, Training Time: 0.413s, Prediction Time: 0.413s
    label PPOAgent, Reward 77: 23.000, Len(game): 23, Training Time: 0.418s, Prediction Time: 0.418s
    label PPOAgent, Reward 78: 14.000, Len(game): 14, Training Time: 0.423s, Prediction Time: 0.423s
    label PPOAgent, Reward 79: 16.000, Len(game): 16, Training Time: 0.426s, Prediction Time: 0.426s
    label PPOAgent, Reward 80: 22.000, Len(game): 22, Training Time: 0.433s, Prediction Time: 0.433s
    label PPOAgent, Reward 81: 20.000, Len(game): 20, Training Time: 0.439s, Prediction Time: 0.439s
    label PPOAgent, Reward 82: 43.000, Len(game): 43, Training Time: 0.450s, Prediction Time: 0.450s
    label PPOAgent, Reward 83: 48.000, Len(game): 48, Training Time: 0.460s, Prediction Time: 0.460s
    label PPOAgent, Reward 84: 36.000, Len(game): 36, Training Time: 0.467s, Prediction Time: 0.467s
    label PPOAgent, Reward 85: 42.000, Len(game): 42, Training Time: 0.475s, Prediction Time: 0.475s
    label PPOAgent, Reward 86: 18.000, Len(game): 18, Training Time: 0.478s, Prediction Time: 0.478s
    label PPOAgent, Reward 87: 21.000, Len(game): 21, Training Time: 0.481s, Prediction Time: 0.481s
    label PPOAgent, Reward 88: 35.000, Len(game): 35, Training Time: 0.490s, Prediction Time: 0.490s
    label PPOAgent, Reward 89: 51.000, Len(game): 51, Training Time: 0.499s, Prediction Time: 0.499s
    label PPOAgent, Reward 90: 10.000, Len(game): 10, Training Time: 0.501s, Prediction Time: 0.501s
    label PPOAgent, Reward 91: 42.000, Len(game): 42, Training Time: 0.509s, Prediction Time: 0.509s
    label PPOAgent, Reward 92: 67.000, Len(game): 67, Training Time: 0.560s, Prediction Time: 0.560s
    label PPOAgent, Reward 93: 14.000, Len(game): 14, Training Time: 0.562s, Prediction Time: 0.562s
    label PPOAgent, Reward 94: 21.000, Len(game): 21, Training Time: 0.567s, Prediction Time: 0.567s
    label PPOAgent, Reward 95: 73.000, Len(game): 73, Training Time: 0.580s, Prediction Time: 0.580s
    label PPOAgent, Reward 96: 53.000, Len(game): 53, Training Time: 0.590s, Prediction Time: 0.590s
    label PPOAgent, Reward 97: 34.000, Len(game): 34, Training Time: 0.597s, Prediction Time: 0.597s
    label PPOAgent, Reward 98: 33.000, Len(game): 33, Training Time: 0.602s, Prediction Time: 0.602s
    label PPOAgent, Reward 99: 20.000, Len(game): 20, Training Time: 0.606s, Prediction Time: 0.606s
    label PolicyGradient, Reward 0: 15.000, Len(game): 15, Training Time: 0.004s, Prediction Time: 0.001s
    label PolicyGradient, Reward 1: 16.000, Len(game): 16, Training Time: 0.007s, Prediction Time: 0.004s
    label PolicyGradient, Reward 2: 15.000, Len(game): 15, Training Time: 0.012s, Prediction Time: 0.008s
    label PolicyGradient, Reward 3: 45.000, Len(game): 45, Training Time: 0.021s, Prediction Time: 0.019s
    label PolicyGradient, Reward 4: 14.000, Len(game): 14, Training Time: 0.030s, Prediction Time: 0.022s
    label PolicyGradient, Reward 5: 41.000, Len(game): 41, Training Time: 0.040s, Prediction Time: 0.031s
    label PolicyGradient, Reward 6: 51.000, Len(game): 51, Training Time: 0.054s, Prediction Time: 0.043s
    label PolicyGradient, Reward 7: 42.000, Len(game): 42, Training Time: 0.069s, Prediction Time: 0.055s
    label PolicyGradient, Reward 8: 68.000, Len(game): 68, Training Time: 0.094s, Prediction Time: 0.074s
    label PolicyGradient, Reward 9: 30.000, Len(game): 30, Training Time: 0.118s, Prediction Time: 0.087s
    label PolicyGradient, Reward 10: 161.000, Len(game): 161, Training Time: 0.170s, Prediction Time: 0.136s
    label PolicyGradient, Reward 11: 77.000, Len(game): 77, Training Time: 0.238s, Prediction Time: 0.177s
    label PolicyGradient, Reward 12: 105.000, Len(game): 105, Training Time: 0.322s, Prediction Time: 0.237s
    label PolicyGradient, Reward 13: 65.000, Len(game): 65, Training Time: 0.429s, Prediction Time: 0.289s
    label PolicyGradient, Reward 14: 120.000, Len(game): 120, Training Time: 0.564s, Prediction Time: 0.372s
    label PolicyGradient, Reward 15: 48.000, Len(game): 48, Training Time: 0.706s, Prediction Time: 0.443s
    label PolicyGradient, Reward 16: 186.000, Len(game): 186, Training Time: 0.925s, Prediction Time: 0.584s
    label PolicyGradient, Reward 17: 67.000, Len(game): 67, Training Time: 1.151s, Prediction Time: 0.697s
    label PolicyGradient, Reward 18: 234.000, Len(game): 234, Training Time: 1.481s, Prediction Time: 0.913s
    label PolicyGradient, Reward 19: 694.000, Len(game): 694, Training Time: 2.184s, Prediction Time: 1.466s
    label PolicyGradient, Reward 20: 234.000, Len(game): 234, Training Time: 2.962s, Prediction Time: 1.990s
    label PolicyGradient, Reward 21: 939.000, Len(game): 939, Training Time: 4.512s, Prediction Time: 3.131s
    label PolicyGradient, Reward 22: 119.000, Len(game): 119, Training Time: 4.512s, Prediction Time: 4.060s
    label PolicyGradient, Reward 23: 66.000, Len(game): 66, Training Time: 4.512s, Prediction Time: 4.128s
    label PolicyGradient, Reward 24: 78.000, Len(game): 78, Training Time: 4.512s, Prediction Time: 4.199s
    label PolicyGradient, Reward 25: 117.000, Len(game): 117, Training Time: 4.512s, Prediction Time: 4.303s
    label PolicyGradient, Reward 26: 143.000, Len(game): 143, Training Time: 4.512s, Prediction Time: 4.428s
    label PolicyGradient, Reward 27: 76.000, Len(game): 76, Training Time: 4.512s, Prediction Time: 4.494s
    label PolicyGradient, Reward 28: 160.000, Len(game): 160, Training Time: 4.512s, Prediction Time: 4.635s
    label PolicyGradient, Reward 29: 236.000, Len(game): 236, Training Time: 4.512s, Prediction Time: 4.847s
    label PolicyGradient, Reward 30: 142.000, Len(game): 142, Training Time: 4.512s, Prediction Time: 4.974s
    label PolicyGradient, Reward 31: 135.000, Len(game): 135, Training Time: 4.512s, Prediction Time: 5.097s
    label PolicyGradient, Reward 32: 101.000, Len(game): 101, Training Time: 4.512s, Prediction Time: 5.187s
    label PolicyGradient, Reward 33: 219.000, Len(game): 219, Training Time: 4.512s, Prediction Time: 5.381s
    label PolicyGradient, Reward 34: 129.000, Len(game): 129, Training Time: 4.512s, Prediction Time: 5.499s
    label PolicyGradient, Reward 35: 107.000, Len(game): 107, Training Time: 4.512s, Prediction Time: 5.597s
    label PolicyGradient, Reward 36: 155.000, Len(game): 155, Training Time: 4.512s, Prediction Time: 5.735s
    label PolicyGradient, Reward 37: 76.000, Len(game): 76, Training Time: 4.512s, Prediction Time: 5.804s
    label PolicyGradient, Reward 38: 150.000, Len(game): 150, Training Time: 4.512s, Prediction Time: 5.936s
    label PolicyGradient, Reward 39: 113.000, Len(game): 113, Training Time: 4.512s, Prediction Time: 6.037s
    label PolicyGradient, Reward 40: 148.000, Len(game): 148, Training Time: 4.512s, Prediction Time: 6.169s
    label PolicyGradient, Reward 41: 117.000, Len(game): 117, Training Time: 4.512s, Prediction Time: 6.274s
    label PolicyGradient, Reward 42: 118.000, Len(game): 118, Training Time: 4.512s, Prediction Time: 6.378s
    label PolicyGradient, Reward 43: 202.000, Len(game): 202, Training Time: 4.512s, Prediction Time: 6.557s
    label PolicyGradient, Reward 44: 62.000, Len(game): 62, Training Time: 4.512s, Prediction Time: 6.611s
    label PolicyGradient, Reward 45: 119.000, Len(game): 119, Training Time: 4.512s, Prediction Time: 6.716s
    label PolicyGradient, Reward 46: 179.000, Len(game): 179, Training Time: 4.512s, Prediction Time: 6.876s
    label PolicyGradient, Reward 47: 148.000, Len(game): 148, Training Time: 4.512s, Prediction Time: 7.006s
    label PolicyGradient, Reward 48: 165.000, Len(game): 165, Training Time: 4.512s, Prediction Time: 7.153s
    label PolicyGradient, Reward 49: 167.000, Len(game): 167, Training Time: 4.512s, Prediction Time: 7.299s
    label PolicyGradient, Reward 50: 123.000, Len(game): 123, Training Time: 4.512s, Prediction Time: 7.408s
    label PolicyGradient, Reward 51: 83.000, Len(game): 83, Training Time: 4.512s, Prediction Time: 7.482s
    label PolicyGradient, Reward 52: 93.000, Len(game): 93, Training Time: 4.512s, Prediction Time: 7.564s
    label PolicyGradient, Reward 53: 119.000, Len(game): 119, Training Time: 4.512s, Prediction Time: 7.672s
    label PolicyGradient, Reward 54: 89.000, Len(game): 89, Training Time: 4.512s, Prediction Time: 7.753s
    label PolicyGradient, Reward 55: 101.000, Len(game): 101, Training Time: 4.512s, Prediction Time: 7.845s
    label PolicyGradient, Reward 56: 109.000, Len(game): 109, Training Time: 4.512s, Prediction Time: 7.940s
    label PolicyGradient, Reward 57: 162.000, Len(game): 162, Training Time: 4.512s, Prediction Time: 8.084s
    label PolicyGradient, Reward 58: 91.000, Len(game): 91, Training Time: 4.512s, Prediction Time: 8.166s
    label PolicyGradient, Reward 59: 95.000, Len(game): 95, Training Time: 4.512s, Prediction Time: 8.249s
    label PolicyGradient, Reward 60: 131.000, Len(game): 131, Training Time: 4.512s, Prediction Time: 8.364s
    label PolicyGradient, Reward 61: 110.000, Len(game): 110, Training Time: 4.512s, Prediction Time: 8.460s
    label PolicyGradient, Reward 62: 102.000, Len(game): 102, Training Time: 4.512s, Prediction Time: 8.548s
    label PolicyGradient, Reward 63: 122.000, Len(game): 122, Training Time: 4.512s, Prediction Time: 8.662s
    label PolicyGradient, Reward 64: 157.000, Len(game): 157, Training Time: 4.512s, Prediction Time: 8.799s
    label PolicyGradient, Reward 65: 85.000, Len(game): 85, Training Time: 4.512s, Prediction Time: 8.874s
    label PolicyGradient, Reward 66: 111.000, Len(game): 111, Training Time: 4.512s, Prediction Time: 8.972s
    label PolicyGradient, Reward 67: 264.000, Len(game): 264, Training Time: 4.512s, Prediction Time: 9.210s
    label PolicyGradient, Reward 68: 94.000, Len(game): 94, Training Time: 4.512s, Prediction Time: 9.294s
    label PolicyGradient, Reward 69: 94.000, Len(game): 94, Training Time: 4.512s, Prediction Time: 9.381s
    label PolicyGradient, Reward 70: 64.000, Len(game): 64, Training Time: 4.512s, Prediction Time: 9.438s
    label PolicyGradient, Reward 71: 144.000, Len(game): 144, Training Time: 4.512s, Prediction Time: 9.565s
    label PolicyGradient, Reward 72: 102.000, Len(game): 102, Training Time: 4.512s, Prediction Time: 9.653s
    label PolicyGradient, Reward 73: 184.000, Len(game): 184, Training Time: 4.512s, Prediction Time: 9.814s
    label PolicyGradient, Reward 74: 141.000, Len(game): 141, Training Time: 4.512s, Prediction Time: 9.936s
    label PolicyGradient, Reward 75: 125.000, Len(game): 125, Training Time: 4.512s, Prediction Time: 10.046s
    label PolicyGradient, Reward 76: 108.000, Len(game): 108, Training Time: 4.512s, Prediction Time: 10.142s
    label PolicyGradient, Reward 77: 147.000, Len(game): 147, Training Time: 4.512s, Prediction Time: 10.273s
    label PolicyGradient, Reward 78: 114.000, Len(game): 114, Training Time: 4.512s, Prediction Time: 10.376s
    label PolicyGradient, Reward 79: 95.000, Len(game): 95, Training Time: 4.512s, Prediction Time: 10.460s
    label PolicyGradient, Reward 80: 100.000, Len(game): 100, Training Time: 4.512s, Prediction Time: 10.551s
    label PolicyGradient, Reward 81: 144.000, Len(game): 144, Training Time: 4.512s, Prediction Time: 10.681s
    label PolicyGradient, Reward 82: 124.000, Len(game): 124, Training Time: 4.512s, Prediction Time: 10.789s
    label PolicyGradient, Reward 83: 128.000, Len(game): 128, Training Time: 4.512s, Prediction Time: 10.901s
    label PolicyGradient, Reward 84: 131.000, Len(game): 131, Training Time: 4.512s, Prediction Time: 11.017s
    label PolicyGradient, Reward 85: 83.000, Len(game): 83, Training Time: 4.512s, Prediction Time: 11.091s
    label PolicyGradient, Reward 86: 83.000, Len(game): 83, Training Time: 4.512s, Prediction Time: 11.165s
    label PolicyGradient, Reward 87: 58.000, Len(game): 58, Training Time: 4.512s, Prediction Time: 11.215s
    label PolicyGradient, Reward 88: 130.000, Len(game): 130, Training Time: 4.512s, Prediction Time: 11.329s
    label PolicyGradient, Reward 89: 96.000, Len(game): 96, Training Time: 4.512s, Prediction Time: 11.411s
    label PolicyGradient, Reward 90: 98.000, Len(game): 98, Training Time: 4.512s, Prediction Time: 11.497s
    label PolicyGradient, Reward 91: 167.000, Len(game): 167, Training Time: 4.512s, Prediction Time: 11.645s
    label PolicyGradient, Reward 92: 106.000, Len(game): 106, Training Time: 4.512s, Prediction Time: 11.738s
    label PolicyGradient, Reward 93: 144.000, Len(game): 144, Training Time: 4.512s, Prediction Time: 11.866s
    label PolicyGradient, Reward 94: 85.000, Len(game): 85, Training Time: 4.512s, Prediction Time: 11.939s
    label PolicyGradient, Reward 95: 130.000, Len(game): 130, Training Time: 4.512s, Prediction Time: 12.053s
    label PolicyGradient, Reward 96: 98.000, Len(game): 98, Training Time: 4.512s, Prediction Time: 12.137s
    label PolicyGradient, Reward 97: 150.000, Len(game): 150, Training Time: 4.512s, Prediction Time: 12.268s
    label PolicyGradient, Reward 98: 132.000, Len(game): 132, Training Time: 4.512s, Prediction Time: 12.387s
    label PolicyGradient, Reward 99: 177.000, Len(game): 177, Training Time: 4.512s, Prediction Time: 12.543s
    label Controller-based, Reward 0: 127.000, Len(game): 127, Training Time: 0.002s, Prediction Time: 0.001s
    label Controller-based, Reward 1: 75.000, Len(game): 75, Training Time: 0.003s, Prediction Time: 0.002s
    label Controller-based, Reward 2: 9.000, Len(game): 9, Training Time: 0.011s, Prediction Time: 0.002s
    label Controller-based, Reward 3: 199.000, Len(game): 199, Training Time: 0.022s, Prediction Time: 0.004s
    label Controller-based, Reward 4: 286.000, Len(game): 286, Training Time: 0.033s, Prediction Time: 0.007s
    label Controller-based, Reward 5: 364.000, Len(game): 364, Training Time: 0.046s, Prediction Time: 0.010s
    no training
    label Controller-based, Reward 6: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.019s
    no training
    label Controller-based, Reward 7: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.028s
    no training
    label Controller-based, Reward 8: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.036s
    no training
    label Controller-based, Reward 9: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.045s
    no training
    label Controller-based, Reward 10: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.053s
    no training
    label Controller-based, Reward 11: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.063s
    no training
    label Controller-based, Reward 12: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.071s
    no training
    label Controller-based, Reward 13: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.079s
    no training
    label Controller-based, Reward 14: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.087s
    no training
    label Controller-based, Reward 15: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.095s
    no training
    label Controller-based, Reward 16: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.103s
    no training
    label Controller-based, Reward 17: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.111s
    no training
    label Controller-based, Reward 18: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.119s
    no training
    label Controller-based, Reward 19: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.128s
    no training
    label Controller-based, Reward 20: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.136s
    no training
    label Controller-based, Reward 21: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.144s
    no training
    label Controller-based, Reward 22: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.152s
    no training
    label Controller-based, Reward 23: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.160s
    no training
    label Controller-based, Reward 24: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.168s
    no training
    label Controller-based, Reward 25: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.177s
    no training
    label Controller-based, Reward 26: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.185s
    no training
    label Controller-based, Reward 27: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.193s
    no training
    label Controller-based, Reward 28: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.201s
    no training
    label Controller-based, Reward 29: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.209s
    no training
    label Controller-based, Reward 30: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.217s
    no training
    label Controller-based, Reward 31: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.226s
    no training
    label Controller-based, Reward 32: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.233s
    no training
    label Controller-based, Reward 33: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.241s
    no training
    label Controller-based, Reward 34: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.250s
    no training
    label Controller-based, Reward 35: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.258s
    no training
    label Controller-based, Reward 36: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.265s
    no training
    label Controller-based, Reward 37: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.273s
    no training
    label Controller-based, Reward 38: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.280s
    no training
    label Controller-based, Reward 39: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.288s
    no training
    label Controller-based, Reward 40: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.296s
    no training
    label Controller-based, Reward 41: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.305s
    no training
    label Controller-based, Reward 42: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.313s
    no training
    label Controller-based, Reward 43: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.321s
    no training
    label Controller-based, Reward 44: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.329s
    no training
    label Controller-based, Reward 45: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.338s
    no training
    label Controller-based, Reward 46: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.346s
    no training
    label Controller-based, Reward 47: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.354s
    no training
    label Controller-based, Reward 48: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.362s
    no training
    label Controller-based, Reward 49: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.371s
    no training
    label Controller-based, Reward 50: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.379s
    no training
    label Controller-based, Reward 51: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.387s
    no training
    label Controller-based, Reward 52: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.396s
    no training
    label Controller-based, Reward 53: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.403s
    no training
    label Controller-based, Reward 54: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.412s
    no training
    label Controller-based, Reward 55: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.420s
    no training
    label Controller-based, Reward 56: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.428s
    no training
    label Controller-based, Reward 57: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.436s
    no training
    label Controller-based, Reward 58: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.443s
    no training
    label Controller-based, Reward 59: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.451s
    no training
    label Controller-based, Reward 60: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.459s
    no training
    label Controller-based, Reward 61: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.466s
    no training
    label Controller-based, Reward 62: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.474s
    no training
    label Controller-based, Reward 63: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.483s
    no training
    label Controller-based, Reward 64: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.491s
    no training
    label Controller-based, Reward 65: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.500s
    no training
    label Controller-based, Reward 66: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.507s
    no training
    label Controller-based, Reward 67: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.516s
    no training
    label Controller-based, Reward 68: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.524s
    no training
    label Controller-based, Reward 69: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.533s
    no training
    label Controller-based, Reward 70: 1000.000, Len(game): 1000, Training Time: 0.049s, Prediction Time: 0.541s
    no training
    label Controller-based, Reward 71: 1000.000, Len(game): 1000, Training Time: 0.049s, Prediction Time: 0.548s
    no training
    label Controller-based, Reward 72: 1000.000, Len(game): 1000, Training Time: 0.049s, Prediction Time: 0.556s
    no training
    label Controller-based, Reward 73: 1000.000, Len(game): 1000, Training Time: 0.049s, Prediction Time: 0.564s
    no training
    label Controller-based, Reward 74: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.572s
    no training
    label Controller-based, Reward 75: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.578s
    no training
    label Controller-based, Reward 76: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.587s
    no training
    label Controller-based, Reward 77: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.596s
    no training
    label Controller-based, Reward 78: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.604s
    no training
    label Controller-based, Reward 79: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.612s
    no training
    label Controller-based, Reward 80: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.621s
    no training
    label Controller-based, Reward 81: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.629s
    no training
    label Controller-based, Reward 82: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.638s
    no training
    label Controller-based, Reward 83: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.645s
    no training
    label Controller-based, Reward 84: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.653s
    no training
    label Controller-based, Reward 85: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.662s
    no training
    label Controller-based, Reward 86: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.671s
    no training
    label Controller-based, Reward 87: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.679s
    no training
    label Controller-based, Reward 88: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.687s
    no training
    label Controller-based, Reward 89: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.696s
    no training
    label Controller-based, Reward 90: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.704s
    no training
    label Controller-based, Reward 91: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.712s
    no training
    label Controller-based, Reward 92: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.720s
    no training
    label Controller-based, Reward 93: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.729s
    no training
    label Controller-based, Reward 94: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.736s
    no training
    label Controller-based, Reward 95: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.745s
    no training
    label Controller-based, Reward 96: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.753s
    no training
    label Controller-based, Reward 97: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.761s
    no training
    label Controller-based, Reward 98: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.770s
    no training
    label Controller-based, Reward 99: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.777s
    label KACAgent, Reward 0: 14.000, Len(game): 14, Training Time: 0.004s, Prediction Time: 0.000s
    label KACAgent, Reward 1: 39.000, Len(game): 39, Training Time: 0.009s, Prediction Time: 0.007s
    label KACAgent, Reward 2: 65.000, Len(game): 65, Training Time: 0.024s, Prediction Time: 0.019s
    label KACAgent, Reward 3: 91.000, Len(game): 91, Training Time: 0.040s, Prediction Time: 0.038s
    label KACAgent, Reward 4: 80.000, Len(game): 80, Training Time: 0.063s, Prediction Time: 0.056s
    label KACAgent, Reward 5: 117.000, Len(game): 117, Training Time: 0.102s, Prediction Time: 0.091s
    label KACAgent, Reward 6: 69.000, Len(game): 69, Training Time: 0.148s, Prediction Time: 0.123s
    label KACAgent, Reward 7: 164.000, Len(game): 164, Training Time: 0.217s, Prediction Time: 0.189s
    label KACAgent, Reward 8: 103.000, Len(game): 103, Training Time: 0.309s, Prediction Time: 0.255s
    label KACAgent, Reward 9: 278.000, Len(game): 278, Training Time: 0.478s, Prediction Time: 0.404s
    label KACAgent, Reward 10: 114.000, Len(game): 114, Training Time: 0.670s, Prediction Time: 0.527s
    label KACAgent, Reward 11: 984.000, Len(game): 984, Training Time: 1.226s, Prediction Time: 1.141s
    label KACAgent, Reward 12: 412.000, Len(game): 412, Training Time: 2.074s, Prediction Time: 1.789s
    label KACAgent, Reward 13: 111.000, Len(game): 111, Training Time: 2.990s, Prediction Time: 2.340s
    label KACAgent, Reward 14: 202.000, Len(game): 202, Training Time: 4.040s, Prediction Time: 3.047s
    label KACAgent, Reward 15: 344.000, Len(game): 344, Training Time: 4.040s, Prediction Time: 3.935s
    label KACAgent, Reward 16: 250.000, Len(game): 250, Training Time: 4.040s, Prediction Time: 4.140s
    label KACAgent, Reward 17: 205.000, Len(game): 205, Training Time: 4.040s, Prediction Time: 4.307s
    label KACAgent, Reward 18: 332.000, Len(game): 332, Training Time: 4.040s, Prediction Time: 4.577s
    label KACAgent, Reward 19: 345.000, Len(game): 345, Training Time: 4.040s, Prediction Time: 4.856s
    label KACAgent, Reward 20: 283.000, Len(game): 283, Training Time: 4.040s, Prediction Time: 5.084s
    label KACAgent, Reward 21: 299.000, Len(game): 299, Training Time: 4.040s, Prediction Time: 5.330s
    label KACAgent, Reward 22: 473.000, Len(game): 473, Training Time: 4.040s, Prediction Time: 5.721s
    label KACAgent, Reward 23: 280.000, Len(game): 280, Training Time: 4.040s, Prediction Time: 5.955s
    label KACAgent, Reward 24: 304.000, Len(game): 304, Training Time: 4.040s, Prediction Time: 6.200s
    label KACAgent, Reward 25: 195.000, Len(game): 195, Training Time: 4.040s, Prediction Time: 6.359s
    label KACAgent, Reward 26: 240.000, Len(game): 240, Training Time: 4.040s, Prediction Time: 6.558s
    label KACAgent, Reward 27: 168.000, Len(game): 168, Training Time: 4.040s, Prediction Time: 6.694s
    label KACAgent, Reward 28: 177.000, Len(game): 177, Training Time: 4.040s, Prediction Time: 6.835s
    label KACAgent, Reward 29: 282.000, Len(game): 282, Training Time: 4.040s, Prediction Time: 7.061s
    label KACAgent, Reward 30: 300.000, Len(game): 300, Training Time: 4.040s, Prediction Time: 7.304s
    label KACAgent, Reward 31: 243.000, Len(game): 243, Training Time: 4.040s, Prediction Time: 7.497s
    label KACAgent, Reward 32: 467.000, Len(game): 467, Training Time: 4.040s, Prediction Time: 7.880s
    label KACAgent, Reward 33: 158.000, Len(game): 158, Training Time: 4.040s, Prediction Time: 8.013s
    label KACAgent, Reward 34: 301.000, Len(game): 301, Training Time: 4.040s, Prediction Time: 8.261s
    label KACAgent, Reward 35: 154.000, Len(game): 154, Training Time: 4.040s, Prediction Time: 8.385s
    label KACAgent, Reward 36: 262.000, Len(game): 262, Training Time: 4.040s, Prediction Time: 8.597s
    label KACAgent, Reward 37: 283.000, Len(game): 283, Training Time: 4.040s, Prediction Time: 8.826s
    label KACAgent, Reward 38: 586.000, Len(game): 586, Training Time: 4.040s, Prediction Time: 9.296s
    label KACAgent, Reward 39: 313.000, Len(game): 313, Training Time: 4.040s, Prediction Time: 9.549s
    label KACAgent, Reward 40: 275.000, Len(game): 275, Training Time: 4.040s, Prediction Time: 9.774s
    label KACAgent, Reward 41: 191.000, Len(game): 191, Training Time: 4.040s, Prediction Time: 9.927s
    label KACAgent, Reward 42: 223.000, Len(game): 223, Training Time: 4.040s, Prediction Time: 10.107s
    label KACAgent, Reward 43: 323.000, Len(game): 323, Training Time: 4.040s, Prediction Time: 10.365s
    label KACAgent, Reward 44: 205.000, Len(game): 205, Training Time: 4.040s, Prediction Time: 10.530s
    label KACAgent, Reward 45: 242.000, Len(game): 242, Training Time: 4.040s, Prediction Time: 10.724s
    label KACAgent, Reward 46: 164.000, Len(game): 164, Training Time: 4.040s, Prediction Time: 10.854s
    label KACAgent, Reward 47: 314.000, Len(game): 314, Training Time: 4.040s, Prediction Time: 11.104s
    label KACAgent, Reward 48: 320.000, Len(game): 320, Training Time: 4.040s, Prediction Time: 11.360s
    label KACAgent, Reward 49: 145.000, Len(game): 145, Training Time: 4.040s, Prediction Time: 11.477s
    label KACAgent, Reward 50: 325.000, Len(game): 325, Training Time: 4.040s, Prediction Time: 11.736s
    label KACAgent, Reward 51: 219.000, Len(game): 219, Training Time: 4.040s, Prediction Time: 11.912s
    label KACAgent, Reward 52: 178.000, Len(game): 178, Training Time: 4.040s, Prediction Time: 12.058s
    label KACAgent, Reward 53: 232.000, Len(game): 232, Training Time: 4.040s, Prediction Time: 12.250s
    label KACAgent, Reward 54: 333.000, Len(game): 333, Training Time: 4.040s, Prediction Time: 12.521s
    label KACAgent, Reward 55: 256.000, Len(game): 256, Training Time: 4.040s, Prediction Time: 12.726s
    label KACAgent, Reward 56: 302.000, Len(game): 302, Training Time: 4.040s, Prediction Time: 12.970s
    label KACAgent, Reward 57: 303.000, Len(game): 303, Training Time: 4.040s, Prediction Time: 13.216s
    label KACAgent, Reward 58: 344.000, Len(game): 344, Training Time: 4.040s, Prediction Time: 13.494s
    label KACAgent, Reward 59: 216.000, Len(game): 216, Training Time: 4.040s, Prediction Time: 13.669s
    label KACAgent, Reward 60: 419.000, Len(game): 419, Training Time: 4.040s, Prediction Time: 14.008s
    label KACAgent, Reward 61: 264.000, Len(game): 264, Training Time: 4.040s, Prediction Time: 14.219s
    label KACAgent, Reward 62: 247.000, Len(game): 247, Training Time: 4.040s, Prediction Time: 14.415s
    label KACAgent, Reward 63: 185.000, Len(game): 185, Training Time: 4.040s, Prediction Time: 14.564s
    label KACAgent, Reward 64: 252.000, Len(game): 252, Training Time: 4.040s, Prediction Time: 14.768s
    label KACAgent, Reward 65: 324.000, Len(game): 324, Training Time: 4.040s, Prediction Time: 15.034s
    label KACAgent, Reward 66: 192.000, Len(game): 192, Training Time: 4.040s, Prediction Time: 15.188s
    label KACAgent, Reward 67: 189.000, Len(game): 189, Training Time: 4.040s, Prediction Time: 15.342s
    label KACAgent, Reward 68: 342.000, Len(game): 342, Training Time: 4.040s, Prediction Time: 15.617s
    label KACAgent, Reward 69: 309.000, Len(game): 309, Training Time: 4.040s, Prediction Time: 15.868s
    label KACAgent, Reward 70: 327.000, Len(game): 327, Training Time: 4.040s, Prediction Time: 16.134s
    label KACAgent, Reward 71: 215.000, Len(game): 215, Training Time: 4.040s, Prediction Time: 16.310s
    label KACAgent, Reward 72: 277.000, Len(game): 277, Training Time: 4.040s, Prediction Time: 16.536s
    label KACAgent, Reward 73: 188.000, Len(game): 188, Training Time: 4.040s, Prediction Time: 16.687s
    label KACAgent, Reward 74: 211.000, Len(game): 211, Training Time: 4.040s, Prediction Time: 16.858s
    label KACAgent, Reward 75: 313.000, Len(game): 313, Training Time: 4.040s, Prediction Time: 17.113s
    label KACAgent, Reward 76: 193.000, Len(game): 193, Training Time: 4.040s, Prediction Time: 17.268s
    label KACAgent, Reward 77: 191.000, Len(game): 191, Training Time: 4.040s, Prediction Time: 17.420s
    label KACAgent, Reward 78: 356.000, Len(game): 356, Training Time: 4.040s, Prediction Time: 17.708s
    label KACAgent, Reward 79: 348.000, Len(game): 348, Training Time: 4.040s, Prediction Time: 17.989s
    label KACAgent, Reward 80: 338.000, Len(game): 338, Training Time: 4.040s, Prediction Time: 18.260s
    label KACAgent, Reward 81: 194.000, Len(game): 194, Training Time: 4.040s, Prediction Time: 18.416s
    label KACAgent, Reward 82: 189.000, Len(game): 189, Training Time: 4.040s, Prediction Time: 18.568s
    label KACAgent, Reward 83: 332.000, Len(game): 332, Training Time: 4.040s, Prediction Time: 18.838s
    label KACAgent, Reward 84: 136.000, Len(game): 136, Training Time: 4.040s, Prediction Time: 18.949s
    label KACAgent, Reward 85: 276.000, Len(game): 276, Training Time: 4.040s, Prediction Time: 19.174s
    label KACAgent, Reward 86: 207.000, Len(game): 207, Training Time: 4.040s, Prediction Time: 19.342s
    label KACAgent, Reward 87: 306.000, Len(game): 306, Training Time: 4.040s, Prediction Time: 19.588s
    label KACAgent, Reward 88: 303.000, Len(game): 303, Training Time: 4.040s, Prediction Time: 19.830s
    label KACAgent, Reward 89: 286.000, Len(game): 286, Training Time: 4.040s, Prediction Time: 20.057s
    label KACAgent, Reward 90: 311.000, Len(game): 311, Training Time: 4.040s, Prediction Time: 20.307s
    label KACAgent, Reward 91: 247.000, Len(game): 247, Training Time: 4.040s, Prediction Time: 20.504s
    label KACAgent, Reward 92: 221.000, Len(game): 221, Training Time: 4.040s, Prediction Time: 20.680s
    label KACAgent, Reward 93: 190.000, Len(game): 190, Training Time: 4.040s, Prediction Time: 20.832s
    label KACAgent, Reward 94: 266.000, Len(game): 266, Training Time: 4.040s, Prediction Time: 21.046s
    label KACAgent, Reward 95: 290.000, Len(game): 290, Training Time: 4.040s, Prediction Time: 21.284s
    label KACAgent, Reward 96: 286.000, Len(game): 286, Training Time: 4.040s, Prediction Time: 21.521s
    label KACAgent, Reward 97: 256.000, Len(game): 256, Training Time: 4.040s, Prediction Time: 21.731s
    label KACAgent, Reward 98: 238.000, Len(game): 238, Training Time: 4.040s, Prediction Time: 21.924s
    label KACAgent, Reward 99: 132.000, Len(game): 132, Training Time: 4.040s, Prediction Time: 22.030s
    label DQNAgent, Reward 0: 22.000, Len(game): 22, Training Time: 0.000s, Prediction Time: 0.000s
    label DQNAgent, Reward 1: 14.000, Len(game): 14, Training Time: 0.000s, Prediction Time: 0.000s
    label DQNAgent, Reward 2: 16.000, Len(game): 16, Training Time: 0.000s, Prediction Time: 0.000s
    label DQNAgent, Reward 3: 23.000, Len(game): 23, Training Time: 0.019s, Prediction Time: 0.001s
    label DQNAgent, Reward 4: 22.000, Len(game): 22, Training Time: 0.043s, Prediction Time: 0.002s
    label DQNAgent, Reward 5: 17.000, Len(game): 17, Training Time: 0.062s, Prediction Time: 0.002s
    label DQNAgent, Reward 6: 14.000, Len(game): 14, Training Time: 0.077s, Prediction Time: 0.002s
    label DQNAgent, Reward 7: 11.000, Len(game): 11, Training Time: 0.088s, Prediction Time: 0.003s
    label DQNAgent, Reward 8: 12.000, Len(game): 12, Training Time: 0.100s, Prediction Time: 0.004s
    label DQNAgent, Reward 9: 18.000, Len(game): 18, Training Time: 0.120s, Prediction Time: 0.004s
    label DQNAgent, Reward 10: 24.000, Len(game): 24, Training Time: 0.149s, Prediction Time: 0.004s
    label DQNAgent, Reward 11: 30.000, Len(game): 30, Training Time: 0.183s, Prediction Time: 0.004s
    label DQNAgent, Reward 12: 24.000, Len(game): 24, Training Time: 0.209s, Prediction Time: 0.004s
    label DQNAgent, Reward 13: 14.000, Len(game): 14, Training Time: 0.223s, Prediction Time: 0.004s
    label DQNAgent, Reward 14: 23.000, Len(game): 23, Training Time: 0.252s, Prediction Time: 0.005s
    label DQNAgent, Reward 15: 13.000, Len(game): 13, Training Time: 0.270s, Prediction Time: 0.006s
    label DQNAgent, Reward 16: 12.000, Len(game): 12, Training Time: 0.287s, Prediction Time: 0.007s
    label DQNAgent, Reward 17: 16.000, Len(game): 16, Training Time: 0.308s, Prediction Time: 0.007s
    label DQNAgent, Reward 18: 22.000, Len(game): 22, Training Time: 0.339s, Prediction Time: 0.007s
    label DQNAgent, Reward 19: 38.000, Len(game): 38, Training Time: 0.389s, Prediction Time: 0.008s
    label DQNAgent, Reward 20: 16.000, Len(game): 16, Training Time: 0.408s, Prediction Time: 0.008s
    label DQNAgent, Reward 21: 25.000, Len(game): 25, Training Time: 0.435s, Prediction Time: 0.009s
    label DQNAgent, Reward 22: 13.000, Len(game): 13, Training Time: 0.448s, Prediction Time: 0.010s
    label DQNAgent, Reward 23: 22.000, Len(game): 22, Training Time: 0.473s, Prediction Time: 0.011s
    label DQNAgent, Reward 24: 15.000, Len(game): 15, Training Time: 0.490s, Prediction Time: 0.011s
    label DQNAgent, Reward 25: 28.000, Len(game): 28, Training Time: 0.521s, Prediction Time: 0.012s
    label DQNAgent, Reward 26: 19.000, Len(game): 19, Training Time: 0.542s, Prediction Time: 0.013s
    label DQNAgent, Reward 27: 24.000, Len(game): 24, Training Time: 0.569s, Prediction Time: 0.014s
    label DQNAgent, Reward 28: 11.000, Len(game): 11, Training Time: 0.582s, Prediction Time: 0.014s
    label DQNAgent, Reward 29: 12.000, Len(game): 12, Training Time: 0.596s, Prediction Time: 0.014s
    label DQNAgent, Reward 30: 17.000, Len(game): 17, Training Time: 0.619s, Prediction Time: 0.014s
    label DQNAgent, Reward 31: 41.000, Len(game): 41, Training Time: 0.675s, Prediction Time: 0.016s
    label DQNAgent, Reward 32: 23.000, Len(game): 23, Training Time: 0.707s, Prediction Time: 0.016s
    label DQNAgent, Reward 33: 10.000, Len(game): 10, Training Time: 0.718s, Prediction Time: 0.017s
    label DQNAgent, Reward 34: 9.000, Len(game): 9, Training Time: 0.729s, Prediction Time: 0.018s
    label DQNAgent, Reward 35: 11.000, Len(game): 11, Training Time: 0.745s, Prediction Time: 0.018s
    label DQNAgent, Reward 36: 10.000, Len(game): 10, Training Time: 0.759s, Prediction Time: 0.018s
    label DQNAgent, Reward 37: 14.000, Len(game): 14, Training Time: 0.776s, Prediction Time: 0.019s
    label DQNAgent, Reward 38: 14.000, Len(game): 14, Training Time: 0.792s, Prediction Time: 0.020s
    label DQNAgent, Reward 39: 12.000, Len(game): 12, Training Time: 0.805s, Prediction Time: 0.021s
    label DQNAgent, Reward 40: 14.000, Len(game): 14, Training Time: 0.822s, Prediction Time: 0.022s
    label DQNAgent, Reward 41: 13.000, Len(game): 13, Training Time: 0.837s, Prediction Time: 0.022s
    label DQNAgent, Reward 42: 12.000, Len(game): 12, Training Time: 0.851s, Prediction Time: 0.022s
    label DQNAgent, Reward 43: 33.000, Len(game): 33, Training Time: 0.890s, Prediction Time: 0.023s
    label DQNAgent, Reward 44: 10.000, Len(game): 10, Training Time: 0.902s, Prediction Time: 0.023s
    label DQNAgent, Reward 45: 16.000, Len(game): 16, Training Time: 0.919s, Prediction Time: 0.023s
    label DQNAgent, Reward 46: 13.000, Len(game): 13, Training Time: 0.934s, Prediction Time: 0.024s
    label DQNAgent, Reward 47: 18.000, Len(game): 18, Training Time: 0.955s, Prediction Time: 0.024s
    label DQNAgent, Reward 48: 34.000, Len(game): 34, Training Time: 0.996s, Prediction Time: 0.025s
    label DQNAgent, Reward 49: 15.000, Len(game): 15, Training Time: 1.012s, Prediction Time: 0.025s
    label DQNAgent, Reward 50: 16.000, Len(game): 16, Training Time: 1.030s, Prediction Time: 0.026s
    label DQNAgent, Reward 51: 28.000, Len(game): 28, Training Time: 1.060s, Prediction Time: 0.027s
    label DQNAgent, Reward 52: 15.000, Len(game): 15, Training Time: 1.075s, Prediction Time: 0.028s
    label DQNAgent, Reward 53: 26.000, Len(game): 26, Training Time: 1.103s, Prediction Time: 0.030s
    label DQNAgent, Reward 54: 32.000, Len(game): 32, Training Time: 1.136s, Prediction Time: 0.031s
    label DQNAgent, Reward 55: 24.000, Len(game): 24, Training Time: 1.161s, Prediction Time: 0.032s
    label DQNAgent, Reward 56: 61.000, Len(game): 61, Training Time: 1.226s, Prediction Time: 0.035s
    label DQNAgent, Reward 57: 12.000, Len(game): 12, Training Time: 1.242s, Prediction Time: 0.035s
    label DQNAgent, Reward 58: 59.000, Len(game): 59, Training Time: 1.316s, Prediction Time: 0.037s
    label DQNAgent, Reward 59: 199.000, Len(game): 199, Training Time: 1.537s, Prediction Time: 0.043s
    label DQNAgent, Reward 60: 117.000, Len(game): 117, Training Time: 1.667s, Prediction Time: 0.047s
    label DQNAgent, Reward 61: 139.000, Len(game): 139, Training Time: 1.831s, Prediction Time: 0.051s
    label DQNAgent, Reward 62: 91.000, Len(game): 91, Training Time: 1.929s, Prediction Time: 0.054s
    label DQNAgent, Reward 63: 124.000, Len(game): 124, Training Time: 2.076s, Prediction Time: 0.059s
    label DQNAgent, Reward 64: 110.000, Len(game): 110, Training Time: 2.212s, Prediction Time: 0.064s
    label DQNAgent, Reward 65: 143.000, Len(game): 143, Training Time: 2.511s, Prediction Time: 0.071s
    label DQNAgent, Reward 66: 84.000, Len(game): 84, Training Time: 2.620s, Prediction Time: 0.074s
    label DQNAgent, Reward 67: 120.000, Len(game): 120, Training Time: 2.761s, Prediction Time: 0.080s
    label DQNAgent, Reward 68: 77.000, Len(game): 77, Training Time: 2.844s, Prediction Time: 0.084s
    label DQNAgent, Reward 69: 118.000, Len(game): 118, Training Time: 2.972s, Prediction Time: 0.089s
    label DQNAgent, Reward 70: 109.000, Len(game): 109, Training Time: 3.101s, Prediction Time: 0.095s
    label DQNAgent, Reward 71: 166.000, Len(game): 166, Training Time: 3.101s, Prediction Time: 0.101s
    label DQNAgent, Reward 72: 162.000, Len(game): 162, Training Time: 3.101s, Prediction Time: 0.109s
    label DQNAgent, Reward 73: 230.000, Len(game): 230, Training Time: 3.101s, Prediction Time: 0.119s
    label DQNAgent, Reward 74: 110.000, Len(game): 110, Training Time: 3.101s, Prediction Time: 0.124s
    label DQNAgent, Reward 75: 109.000, Len(game): 109, Training Time: 3.101s, Prediction Time: 0.129s
    label DQNAgent, Reward 76: 124.000, Len(game): 124, Training Time: 3.101s, Prediction Time: 0.135s
    label DQNAgent, Reward 77: 106.000, Len(game): 106, Training Time: 3.101s, Prediction Time: 0.139s
    label DQNAgent, Reward 78: 130.000, Len(game): 130, Training Time: 3.101s, Prediction Time: 0.144s
    label DQNAgent, Reward 79: 119.000, Len(game): 119, Training Time: 3.101s, Prediction Time: 0.149s
    label DQNAgent, Reward 80: 162.000, Len(game): 162, Training Time: 3.101s, Prediction Time: 0.156s
    label DQNAgent, Reward 81: 110.000, Len(game): 110, Training Time: 3.101s, Prediction Time: 0.160s
    label DQNAgent, Reward 82: 106.000, Len(game): 106, Training Time: 3.101s, Prediction Time: 0.165s
    label DQNAgent, Reward 83: 136.000, Len(game): 136, Training Time: 3.101s, Prediction Time: 0.170s
    label DQNAgent, Reward 84: 142.000, Len(game): 142, Training Time: 3.101s, Prediction Time: 0.176s
    label DQNAgent, Reward 85: 128.000, Len(game): 128, Training Time: 3.101s, Prediction Time: 0.180s
    label DQNAgent, Reward 86: 152.000, Len(game): 152, Training Time: 3.101s, Prediction Time: 0.187s
    label DQNAgent, Reward 87: 121.000, Len(game): 121, Training Time: 3.101s, Prediction Time: 0.192s
    label DQNAgent, Reward 88: 137.000, Len(game): 137, Training Time: 3.101s, Prediction Time: 0.197s
    label DQNAgent, Reward 89: 181.000, Len(game): 181, Training Time: 3.101s, Prediction Time: 0.205s
    label DQNAgent, Reward 90: 122.000, Len(game): 122, Training Time: 3.101s, Prediction Time: 0.209s
    label DQNAgent, Reward 91: 126.000, Len(game): 126, Training Time: 3.101s, Prediction Time: 0.216s
    label DQNAgent, Reward 92: 148.000, Len(game): 148, Training Time: 3.101s, Prediction Time: 0.223s
    label DQNAgent, Reward 93: 118.000, Len(game): 118, Training Time: 3.101s, Prediction Time: 0.227s
    label DQNAgent, Reward 94: 106.000, Len(game): 106, Training Time: 3.101s, Prediction Time: 0.232s
    label DQNAgent, Reward 95: 126.000, Len(game): 126, Training Time: 3.101s, Prediction Time: 0.237s
    label DQNAgent, Reward 96: 179.000, Len(game): 179, Training Time: 3.101s, Prediction Time: 0.245s
    label DQNAgent, Reward 97: 200.000, Len(game): 200, Training Time: 3.101s, Prediction Time: 0.253s
    label DQNAgent, Reward 98: 117.000, Len(game): 117, Training Time: 3.101s, Prediction Time: 0.258s
    label DQNAgent, Reward 99: 144.000, Len(game): 144, Training Time: 3.101s, Prediction Time: 0.265s
    Computed global error Bellman mean:  2.429265355294616e-07  iter:  2
    label KQLearningHJBCP, Reward 0: 27.000, Len(game): 27, Training Time: 0.011s, Prediction Time: 0.001s
    Computed global error Bellman mean:  1.1135033661321919e-07  iter:  6
    label KQLearningHJBCP, Reward 1: 44.000, Len(game): 44, Training Time: 0.043s, Prediction Time: 0.007s
    Computed global error Bellman mean:  3.422438720370735e-07  iter:  8
    label KQLearningHJBCP, Reward 2: 54.000, Len(game): 54, Training Time: 0.122s, Prediction Time: 0.017s
    Computed global error Bellman mean:  3.9577951882171255e-07  iter:  4
    label KQLearningHJBCP, Reward 3: 58.000, Len(game): 58, Training Time: 0.203s, Prediction Time: 0.029s
    Computed global error Bellman mean:  3.432339072491923e-07  iter:  6
    label KQLearningHJBCP, Reward 4: 58.000, Len(game): 58, Training Time: 0.415s, Prediction Time: 0.043s
    Computed global error Bellman mean:  5.280399794638235e-07  iter:  5
    label KQLearningHJBCP, Reward 5: 63.000, Len(game): 63, Training Time: 0.666s, Prediction Time: 0.060s
    Computed global error Bellman mean:  1.3332333149421225e-06  iter:  10
    label KQLearningHJBCP, Reward 6: 65.000, Len(game): 65, Training Time: 1.528s, Prediction Time: 0.080s
    Computed global error Bellman mean:  4.910292080454003e-07  iter:  6
    label KQLearningHJBCP, Reward 7: 48.000, Len(game): 48, Training Time: 2.194s, Prediction Time: 0.096s
    Computed global error Bellman mean:  4.686085707314039e-07  iter:  5
    label KQLearningHJBCP, Reward 8: 73.000, Len(game): 73, Training Time: 2.916s, Prediction Time: 0.121s
    Computed global error Bellman mean:  4.164509857890951e-07  iter:  6
    label KQLearningHJBCP, Reward 9: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.149s
    label KQLearningHJBCP, Reward 10: 33.000, Len(game): 33, Training Time: 4.082s, Prediction Time: 0.162s
    label KQLearningHJBCP, Reward 11: 104.000, Len(game): 104, Training Time: 4.082s, Prediction Time: 0.199s
    label KQLearningHJBCP, Reward 12: 78.000, Len(game): 78, Training Time: 4.082s, Prediction Time: 0.229s
    label KQLearningHJBCP, Reward 13: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 0.256s
    label KQLearningHJBCP, Reward 14: 78.000, Len(game): 78, Training Time: 4.082s, Prediction Time: 0.284s
    label KQLearningHJBCP, Reward 15: 54.000, Len(game): 54, Training Time: 4.082s, Prediction Time: 0.304s
    label KQLearningHJBCP, Reward 16: 47.000, Len(game): 47, Training Time: 4.082s, Prediction Time: 0.321s
    label KQLearningHJBCP, Reward 17: 93.000, Len(game): 93, Training Time: 4.082s, Prediction Time: 0.358s
    label KQLearningHJBCP, Reward 18: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.391s
    label KQLearningHJBCP, Reward 19: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.423s
    label KQLearningHJBCP, Reward 20: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 0.451s
    label KQLearningHJBCP, Reward 21: 53.000, Len(game): 53, Training Time: 4.082s, Prediction Time: 0.472s
    label KQLearningHJBCP, Reward 22: 52.000, Len(game): 52, Training Time: 4.082s, Prediction Time: 0.493s
    label KQLearningHJBCP, Reward 23: 49.000, Len(game): 49, Training Time: 4.082s, Prediction Time: 0.513s
    label KQLearningHJBCP, Reward 24: 74.000, Len(game): 74, Training Time: 4.082s, Prediction Time: 0.543s
    label KQLearningHJBCP, Reward 25: 113.000, Len(game): 113, Training Time: 4.082s, Prediction Time: 0.589s
    label KQLearningHJBCP, Reward 26: 110.000, Len(game): 110, Training Time: 4.082s, Prediction Time: 0.633s
    label KQLearningHJBCP, Reward 27: 43.000, Len(game): 43, Training Time: 4.082s, Prediction Time: 0.649s
    label KQLearningHJBCP, Reward 28: 66.000, Len(game): 66, Training Time: 4.082s, Prediction Time: 0.677s
    label KQLearningHJBCP, Reward 29: 77.000, Len(game): 77, Training Time: 4.082s, Prediction Time: 0.709s
    label KQLearningHJBCP, Reward 30: 67.000, Len(game): 67, Training Time: 4.082s, Prediction Time: 0.736s
    label KQLearningHJBCP, Reward 31: 61.000, Len(game): 61, Training Time: 4.082s, Prediction Time: 0.760s
    label KQLearningHJBCP, Reward 32: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.792s
    label KQLearningHJBCP, Reward 33: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.824s
    label KQLearningHJBCP, Reward 34: 80.000, Len(game): 80, Training Time: 4.082s, Prediction Time: 0.858s
    label KQLearningHJBCP, Reward 35: 90.000, Len(game): 90, Training Time: 4.082s, Prediction Time: 0.897s
    label KQLearningHJBCP, Reward 36: 100.000, Len(game): 100, Training Time: 4.082s, Prediction Time: 0.937s
    label KQLearningHJBCP, Reward 37: 83.000, Len(game): 83, Training Time: 4.082s, Prediction Time: 0.970s
    label KQLearningHJBCP, Reward 38: 74.000, Len(game): 74, Training Time: 4.082s, Prediction Time: 0.999s
    label KQLearningHJBCP, Reward 39: 80.000, Len(game): 80, Training Time: 4.082s, Prediction Time: 1.033s
    label KQLearningHJBCP, Reward 40: 75.000, Len(game): 75, Training Time: 4.082s, Prediction Time: 1.063s
    label KQLearningHJBCP, Reward 41: 56.000, Len(game): 56, Training Time: 4.082s, Prediction Time: 1.087s
    label KQLearningHJBCP, Reward 42: 85.000, Len(game): 85, Training Time: 4.082s, Prediction Time: 1.121s
    label KQLearningHJBCP, Reward 43: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 1.149s
    label KQLearningHJBCP, Reward 44: 88.000, Len(game): 88, Training Time: 4.082s, Prediction Time: 1.187s
    label KQLearningHJBCP, Reward 45: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 1.216s
    label KQLearningHJBCP, Reward 46: 46.000, Len(game): 46, Training Time: 4.082s, Prediction Time: 1.234s
    label KQLearningHJBCP, Reward 47: 106.000, Len(game): 106, Training Time: 4.082s, Prediction Time: 1.277s
    label KQLearningHJBCP, Reward 48: 83.000, Len(game): 83, Training Time: 4.082s, Prediction Time: 1.312s
    label KQLearningHJBCP, Reward 49: 85.000, Len(game): 85, Training Time: 4.082s, Prediction Time: 1.346s
    label KQLearningHJBCP, Reward 50: 68.000, Len(game): 68, Training Time: 4.082s, Prediction Time: 1.374s
    label KQLearningHJBCP, Reward 51: 68.000, Len(game): 68, Training Time: 4.082s, Prediction Time: 1.402s
    label KQLearningHJBCP, Reward 52: 84.000, Len(game): 84, Training Time: 4.082s, Prediction Time: 1.436s
    label KQLearningHJBCP, Reward 53: 94.000, Len(game): 94, Training Time: 4.082s, Prediction Time: 1.474s
    label KQLearningHJBCP, Reward 54: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 1.503s
    label KQLearningHJBCP, Reward 55: 86.000, Len(game): 86, Training Time: 4.082s, Prediction Time: 1.538s
    label KQLearningHJBCP, Reward 56: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 1.568s
    label KQLearningHJBCP, Reward 57: 68.000, Len(game): 68, Training Time: 4.082s, Prediction Time: 1.596s
    label KQLearningHJBCP, Reward 58: 51.000, Len(game): 51, Training Time: 4.082s, Prediction Time: 1.617s
    label KQLearningHJBCP, Reward 59: 45.000, Len(game): 45, Training Time: 4.082s, Prediction Time: 1.634s
    label KQLearningHJBCP, Reward 60: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 1.666s
    label KQLearningHJBCP, Reward 61: 84.000, Len(game): 84, Training Time: 4.082s, Prediction Time: 1.700s
    label KQLearningHJBCP, Reward 62: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 1.732s
    label KQLearningHJBCP, Reward 63: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 1.760s
    label KQLearningHJBCP, Reward 64: 101.000, Len(game): 101, Training Time: 4.082s, Prediction Time: 1.801s
    label KQLearningHJBCP, Reward 65: 78.000, Len(game): 78, Training Time: 4.082s, Prediction Time: 1.832s
    label KQLearningHJBCP, Reward 66: 90.000, Len(game): 90, Training Time: 4.082s, Prediction Time: 1.869s
    label KQLearningHJBCP, Reward 67: 107.000, Len(game): 107, Training Time: 4.082s, Prediction Time: 1.912s
    label KQLearningHJBCP, Reward 68: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 1.943s
    label KQLearningHJBCP, Reward 69: 67.000, Len(game): 67, Training Time: 4.082s, Prediction Time: 1.970s
    label KQLearningHJBCP, Reward 70: 64.000, Len(game): 64, Training Time: 4.082s, Prediction Time: 1.996s
    label KQLearningHJBCP, Reward 71: 69.000, Len(game): 69, Training Time: 4.082s, Prediction Time: 2.024s
    label KQLearningHJBCP, Reward 72: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 2.055s
    label KQLearningHJBCP, Reward 73: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 2.083s
    label KQLearningHJBCP, Reward 74: 78.000, Len(game): 78, Training Time: 4.082s, Prediction Time: 2.116s
    label KQLearningHJBCP, Reward 75: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 2.144s
    label KQLearningHJBCP, Reward 76: 94.000, Len(game): 94, Training Time: 4.082s, Prediction Time: 2.183s
    label KQLearningHJBCP, Reward 77: 50.000, Len(game): 50, Training Time: 4.082s, Prediction Time: 2.203s
    label KQLearningHJBCP, Reward 78: 79.000, Len(game): 79, Training Time: 4.082s, Prediction Time: 2.236s
    label KQLearningHJBCP, Reward 79: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 2.266s
    label KQLearningHJBCP, Reward 80: 96.000, Len(game): 96, Training Time: 4.082s, Prediction Time: 2.305s
    label KQLearningHJBCP, Reward 81: 77.000, Len(game): 77, Training Time: 4.082s, Prediction Time: 2.337s
    label KQLearningHJBCP, Reward 82: 74.000, Len(game): 74, Training Time: 4.082s, Prediction Time: 2.367s
    label KQLearningHJBCP, Reward 83: 42.000, Len(game): 42, Training Time: 4.082s, Prediction Time: 2.384s
    label KQLearningHJBCP, Reward 84: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 2.415s
    label KQLearningHJBCP, Reward 85: 51.000, Len(game): 51, Training Time: 4.082s, Prediction Time: 2.436s
    label KQLearningHJBCP, Reward 86: 77.000, Len(game): 77, Training Time: 4.082s, Prediction Time: 2.467s
    label KQLearningHJBCP, Reward 87: 71.000, Len(game): 71, Training Time: 4.082s, Prediction Time: 2.496s
    label KQLearningHJBCP, Reward 88: 73.000, Len(game): 73, Training Time: 4.082s, Prediction Time: 2.527s
    label KQLearningHJBCP, Reward 89: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 2.557s
    label KQLearningHJBCP, Reward 90: 47.000, Len(game): 47, Training Time: 4.082s, Prediction Time: 2.576s
    label KQLearningHJBCP, Reward 91: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 2.607s
    label KQLearningHJBCP, Reward 92: 47.000, Len(game): 47, Training Time: 4.082s, Prediction Time: 2.627s
    label KQLearningHJBCP, Reward 93: 58.000, Len(game): 58, Training Time: 4.082s, Prediction Time: 2.653s
    label KQLearningHJBCP, Reward 94: 80.000, Len(game): 80, Training Time: 4.082s, Prediction Time: 2.685s
    label KQLearningHJBCP, Reward 95: 77.000, Len(game): 77, Training Time: 4.082s, Prediction Time: 2.716s
    label KQLearningHJBCP, Reward 96: 66.000, Len(game): 66, Training Time: 4.082s, Prediction Time: 2.743s
    label KQLearningHJBCP, Reward 97: 71.000, Len(game): 71, Training Time: 4.082s, Prediction Time: 2.773s
    label KQLearningHJBCP, Reward 98: 80.000, Len(game): 80, Training Time: 4.082s, Prediction Time: 2.806s
    label KQLearningHJBCP, Reward 99: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 2.835s
    Computed global error Bellman mean:  2.2538068875251957e-08  iter:  2
    label KQLearning, Reward 0: 17.000, Len(game): 17, Training Time: 0.005s, Prediction Time: 0.001s
    Computed global error Bellman mean:  2.1038873556891403e-08  iter:  3
    label KQLearning, Reward 1: 19.000, Len(game): 19, Training Time: 0.011s, Prediction Time: 0.006s
    Computed global error Bellman mean:  3.3796859718234394e-08  iter:  4
    label KQLearning, Reward 2: 36.000, Len(game): 36, Training Time: 0.027s, Prediction Time: 0.013s
    Computed global error Bellman mean:  0.005292865730735803  iter:  5
    label KQLearning, Reward 3: 41.000, Len(game): 41, Training Time: 0.056s, Prediction Time: 0.021s
    Computed global error Bellman mean:  3.536763350335074e-08  iter:  5
    label KQLearning, Reward 4: 22.000, Len(game): 22, Training Time: 0.099s, Prediction Time: 0.026s
    Computed global error Bellman mean:  7.233596841768994e-08  iter:  5
    label KQLearning, Reward 5: 23.000, Len(game): 23, Training Time: 0.148s, Prediction Time: 0.031s
    Computed global error Bellman mean:  6.697590247687889e-05  iter:  5
    label KQLearning, Reward 6: 57.000, Len(game): 57, Training Time: 0.224s, Prediction Time: 0.044s
    Computed global error Bellman mean:  6.971595679015014e-08  iter:  5
    label KQLearning, Reward 7: 59.000, Len(game): 59, Training Time: 0.363s, Prediction Time: 0.059s
    Computed global error Bellman mean:  1.0071149827691969e-07  iter:  5
    label KQLearning, Reward 8: 97.000, Len(game): 97, Training Time: 0.654s, Prediction Time: 0.087s
    Computed global error Bellman mean:  9.729623505213322e-08  iter:  5
    label KQLearning, Reward 9: 71.000, Len(game): 71, Training Time: 1.035s, Prediction Time: 0.111s
    Computed global error Bellman mean:  1.228719115186594e-05  iter:  5
    label KQLearning, Reward 10: 143.000, Len(game): 143, Training Time: 1.667s, Prediction Time: 0.163s
    Computed global error Bellman mean:  0.22382836200452905  iter:  5
    label KQLearning, Reward 11: 198.000, Len(game): 198, Training Time: 2.817s, Prediction Time: 0.238s
    Computed global error Bellman mean:  0.09699429032149978  iter:  5
    label KQLearning, Reward 12: 305.000, Len(game): 305, Training Time: 4.855s, Prediction Time: 0.387s
    label KQLearning, Reward 13: 370.000, Len(game): 370, Training Time: 4.855s, Prediction Time: 0.593s
    label KQLearning, Reward 14: 533.000, Len(game): 533, Training Time: 4.855s, Prediction Time: 0.895s
    label KQLearning, Reward 15: 168.000, Len(game): 168, Training Time: 4.855s, Prediction Time: 0.989s
    label KQLearning, Reward 16: 150.000, Len(game): 150, Training Time: 4.855s, Prediction Time: 1.072s
    label KQLearning, Reward 17: 222.000, Len(game): 222, Training Time: 4.855s, Prediction Time: 1.197s
    label KQLearning, Reward 18: 253.000, Len(game): 253, Training Time: 4.855s, Prediction Time: 1.341s
    label KQLearning, Reward 19: 217.000, Len(game): 217, Training Time: 4.855s, Prediction Time: 1.463s
    label KQLearning, Reward 20: 221.000, Len(game): 221, Training Time: 4.855s, Prediction Time: 1.587s
    label KQLearning, Reward 21: 147.000, Len(game): 147, Training Time: 4.855s, Prediction Time: 1.671s
    label KQLearning, Reward 22: 170.000, Len(game): 170, Training Time: 4.855s, Prediction Time: 1.768s
    label KQLearning, Reward 23: 155.000, Len(game): 155, Training Time: 4.855s, Prediction Time: 1.854s
    label KQLearning, Reward 24: 295.000, Len(game): 295, Training Time: 4.855s, Prediction Time: 2.020s
    label KQLearning, Reward 25: 153.000, Len(game): 153, Training Time: 4.855s, Prediction Time: 2.107s
    label KQLearning, Reward 26: 309.000, Len(game): 309, Training Time: 4.855s, Prediction Time: 2.281s
    label KQLearning, Reward 27: 157.000, Len(game): 157, Training Time: 4.855s, Prediction Time: 2.370s
    label KQLearning, Reward 28: 263.000, Len(game): 263, Training Time: 4.855s, Prediction Time: 2.520s
    label KQLearning, Reward 29: 175.000, Len(game): 175, Training Time: 4.855s, Prediction Time: 2.622s
    label KQLearning, Reward 30: 218.000, Len(game): 218, Training Time: 4.856s, Prediction Time: 2.744s
    label KQLearning, Reward 31: 177.000, Len(game): 177, Training Time: 4.856s, Prediction Time: 2.846s
    label KQLearning, Reward 32: 155.000, Len(game): 155, Training Time: 4.856s, Prediction Time: 2.934s
    label KQLearning, Reward 33: 174.000, Len(game): 174, Training Time: 4.856s, Prediction Time: 3.034s
    label KQLearning, Reward 34: 198.000, Len(game): 198, Training Time: 4.856s, Prediction Time: 3.146s
    label KQLearning, Reward 35: 176.000, Len(game): 176, Training Time: 4.856s, Prediction Time: 3.247s
    label KQLearning, Reward 36: 161.000, Len(game): 161, Training Time: 4.856s, Prediction Time: 3.344s
    label KQLearning, Reward 37: 176.000, Len(game): 176, Training Time: 4.856s, Prediction Time: 3.444s
    label KQLearning, Reward 38: 127.000, Len(game): 127, Training Time: 4.856s, Prediction Time: 3.516s
    label KQLearning, Reward 39: 207.000, Len(game): 207, Training Time: 4.856s, Prediction Time: 3.635s
    label KQLearning, Reward 40: 209.000, Len(game): 209, Training Time: 4.856s, Prediction Time: 3.754s
    label KQLearning, Reward 41: 127.000, Len(game): 127, Training Time: 4.856s, Prediction Time: 3.829s
    label KQLearning, Reward 42: 134.000, Len(game): 134, Training Time: 4.856s, Prediction Time: 3.909s
    label KQLearning, Reward 43: 115.000, Len(game): 115, Training Time: 4.856s, Prediction Time: 3.977s
    label KQLearning, Reward 44: 150.000, Len(game): 150, Training Time: 4.856s, Prediction Time: 4.066s
    label KQLearning, Reward 45: 197.000, Len(game): 197, Training Time: 4.856s, Prediction Time: 4.181s
    label KQLearning, Reward 46: 176.000, Len(game): 176, Training Time: 4.856s, Prediction Time: 4.282s
    label KQLearning, Reward 47: 233.000, Len(game): 233, Training Time: 4.856s, Prediction Time: 4.414s
    label KQLearning, Reward 48: 167.000, Len(game): 167, Training Time: 4.856s, Prediction Time: 4.509s
    label KQLearning, Reward 49: 181.000, Len(game): 181, Training Time: 4.856s, Prediction Time: 4.614s
    label KQLearning, Reward 50: 192.000, Len(game): 192, Training Time: 4.856s, Prediction Time: 4.721s
    label KQLearning, Reward 51: 225.000, Len(game): 225, Training Time: 4.856s, Prediction Time: 4.853s
    label KQLearning, Reward 52: 654.000, Len(game): 654, Training Time: 4.856s, Prediction Time: 5.228s
    label KQLearning, Reward 53: 217.000, Len(game): 217, Training Time: 4.856s, Prediction Time: 5.363s
    label KQLearning, Reward 54: 240.000, Len(game): 240, Training Time: 4.856s, Prediction Time: 5.515s
    label KQLearning, Reward 55: 232.000, Len(game): 232, Training Time: 4.856s, Prediction Time: 5.658s
    label KQLearning, Reward 56: 314.000, Len(game): 314, Training Time: 4.856s, Prediction Time: 5.855s
    label KQLearning, Reward 57: 219.000, Len(game): 219, Training Time: 4.856s, Prediction Time: 5.984s
    label KQLearning, Reward 58: 365.000, Len(game): 365, Training Time: 4.856s, Prediction Time: 6.192s
    label KQLearning, Reward 59: 157.000, Len(game): 157, Training Time: 4.856s, Prediction Time: 6.283s
    label KQLearning, Reward 60: 223.000, Len(game): 223, Training Time: 4.856s, Prediction Time: 6.409s
    label KQLearning, Reward 61: 383.000, Len(game): 383, Training Time: 4.856s, Prediction Time: 6.631s
    label KQLearning, Reward 62: 259.000, Len(game): 259, Training Time: 4.856s, Prediction Time: 6.780s
    label KQLearning, Reward 63: 212.000, Len(game): 212, Training Time: 4.856s, Prediction Time: 6.903s
    label KQLearning, Reward 64: 144.000, Len(game): 144, Training Time: 4.856s, Prediction Time: 6.986s
    label KQLearning, Reward 65: 155.000, Len(game): 155, Training Time: 4.856s, Prediction Time: 7.076s
    label KQLearning, Reward 66: 181.000, Len(game): 181, Training Time: 4.856s, Prediction Time: 7.178s
    label KQLearning, Reward 67: 243.000, Len(game): 243, Training Time: 4.856s, Prediction Time: 7.318s
    label KQLearning, Reward 68: 147.000, Len(game): 147, Training Time: 4.856s, Prediction Time: 7.401s
    label KQLearning, Reward 69: 157.000, Len(game): 157, Training Time: 4.856s, Prediction Time: 7.490s
    label KQLearning, Reward 70: 279.000, Len(game): 279, Training Time: 4.856s, Prediction Time: 7.649s
    label KQLearning, Reward 71: 155.000, Len(game): 155, Training Time: 4.856s, Prediction Time: 7.738s
    label KQLearning, Reward 72: 160.000, Len(game): 160, Training Time: 4.856s, Prediction Time: 7.830s
    label KQLearning, Reward 73: 142.000, Len(game): 142, Training Time: 4.856s, Prediction Time: 7.910s
    label KQLearning, Reward 74: 298.000, Len(game): 298, Training Time: 4.856s, Prediction Time: 8.080s
    label KQLearning, Reward 75: 175.000, Len(game): 175, Training Time: 4.856s, Prediction Time: 8.180s
    label KQLearning, Reward 76: 194.000, Len(game): 194, Training Time: 4.856s, Prediction Time: 8.290s
    label KQLearning, Reward 77: 147.000, Len(game): 147, Training Time: 4.856s, Prediction Time: 8.373s
    label KQLearning, Reward 78: 165.000, Len(game): 165, Training Time: 4.856s, Prediction Time: 8.466s
    label KQLearning, Reward 79: 155.000, Len(game): 155, Training Time: 4.856s, Prediction Time: 8.553s
    label KQLearning, Reward 80: 202.000, Len(game): 202, Training Time: 4.856s, Prediction Time: 8.665s
    label KQLearning, Reward 81: 158.000, Len(game): 158, Training Time: 4.856s, Prediction Time: 8.756s
    label KQLearning, Reward 82: 279.000, Len(game): 279, Training Time: 4.856s, Prediction Time: 8.915s
    label KQLearning, Reward 83: 218.000, Len(game): 218, Training Time: 4.856s, Prediction Time: 9.039s
    label KQLearning, Reward 84: 200.000, Len(game): 200, Training Time: 4.856s, Prediction Time: 9.152s
    label KQLearning, Reward 85: 210.000, Len(game): 210, Training Time: 4.856s, Prediction Time: 9.270s
    label KQLearning, Reward 86: 171.000, Len(game): 171, Training Time: 4.856s, Prediction Time: 9.374s
    label KQLearning, Reward 87: 187.000, Len(game): 187, Training Time: 4.856s, Prediction Time: 9.482s
    label KQLearning, Reward 88: 144.000, Len(game): 144, Training Time: 4.856s, Prediction Time: 9.564s
    label KQLearning, Reward 89: 185.000, Len(game): 185, Training Time: 4.856s, Prediction Time: 9.669s
    label KQLearning, Reward 90: 133.000, Len(game): 133, Training Time: 4.856s, Prediction Time: 9.744s
    label KQLearning, Reward 91: 160.000, Len(game): 160, Training Time: 4.856s, Prediction Time: 9.834s
    label KQLearning, Reward 92: 161.000, Len(game): 161, Training Time: 4.856s, Prediction Time: 9.926s
    label KQLearning, Reward 93: 211.000, Len(game): 211, Training Time: 4.856s, Prediction Time: 10.046s
    label KQLearning, Reward 94: 164.000, Len(game): 164, Training Time: 4.856s, Prediction Time: 10.137s
    label KQLearning, Reward 95: 339.000, Len(game): 339, Training Time: 4.856s, Prediction Time: 10.328s
    label KQLearning, Reward 96: 175.000, Len(game): 175, Training Time: 4.856s, Prediction Time: 10.426s
    label KQLearning, Reward 97: 138.000, Len(game): 138, Training Time: 4.856s, Prediction Time: 10.504s
    label KQLearning, Reward 98: 151.000, Len(game): 151, Training Time: 4.856s, Prediction Time: 10.589s
    label KQLearning, Reward 99: 167.000, Len(game): 167, Training Time: 4.856s, Prediction Time: 10.684s
    2


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (4 minutes 8.002 seconds)


.. _sphx_glr_download_auto_ch8_ch8_cartpole.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: ch8_cartpole.ipynb <ch8_cartpole.ipynb>`

    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: ch8_cartpole.py <ch8_cartpole.py>`

    .. container:: sphx-glr-download sphx-glr-download-zip

      :download:`Download zipped: ch8_cartpole.zip <ch8_cartpole.zip>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_