.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "auto_ch8\ch8_lunarlander.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        :ref:`Go to the end <sphx_glr_download_auto_ch8_ch8_lunarlander.py>`
        to download the full example code.

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_auto_ch8_ch8_lunarlander.py:


==================================
8.3 Experiments - LunarLander
==================================
We use the OpenAI Gym library to instanciate the gymnasium LunarLander-v3 environment and reproduce the figure from chapter 8_XXX.

We train the following agents:

- PPO 
- DQN 
- Controller-based
- Kernel Actor-Critic
- Kernel Q-Learning
- Kernel Q-Learning HJB
- Kernel Policy-Gradient

We show how you can tweak some methods in each algorithm to tune them to the environment. For a detailed documentation on KAgents, see **codpy documentation**.

.. GENERATED FROM PYTHON SOURCE LINES 19-30

.. code-block:: Python

    # Importing necessary modules
    import sys

    from matplotlib import pyplot as plt
    import numpy as np

    import codpy.core as core
    import codpy.KQLearning as KQLearning
    import codpy.conditioning as conditioning
    from ignore_utils import * 


.. GENERATED FROM PYTHON SOURCE LINES 31-33

KQLearning
------------------------

.. GENERATED FROM PYTHON SOURCE LINES 33-80

.. code-block:: Python

    class KQLearningLN(KQLearning.KQLearning):

        def train(
            self,
            game,
            max_training_game_size=None,
            format=True,
            replay_buffer=True,
            tol=1e-2,
            **kwargs
        ):
            """
            For LunarLander, we want to fit one kernel per game. So again, we override the train method.
            """
            game = self.format(
                game, max_training_game_size=max_training_game_size, **kwargs
            )
            # Here the kernel is fit on the latest game only. 
            kernel = self.optimal_states_values_function(game, verbose=True, **kwargs)
            kernel.games = game
            self.critic.add_kernel(kernel, **kwargs)
            delete_kernels = []
            for i, k in self.critic.kernels.items():
                error = self.critic.kernels[i].bellman_error
                if error > tol and not hasattr(self.critic.kernels[i], "flag_kill_me"):
                    kernel = self.optimal_states_values_function(
                        self.critic.kernels[i].games,
                        kernel=self.critic.kernels[i],
                        verbose=True,
                        **kwargs,
                    )
                    kernel.games = self.critic.kernels[i].games
                    if kernel.bellman_error >= error - tol:
                        kernel.flag_kill_me = "please"
                    else:
                        self.critic.kernels[i] = kernel
            if (
                len(delete_kernels) > 0
                and len(self.critic.kernels) - len(delete_kernels) > 1
            ):
                new_kernels = {}
                count = 0
                for i in range(len(self.critic.kernels)):
                    if i not in delete_kernels:
                        new_kernels[count] = self.critic.kernels[i]
                        count = count + 1
                self.critic.kernels = new_kernels


.. GENERATED FROM PYTHON SOURCE LINES 81-83

PolicyGradient
------------------------

.. GENERATED FROM PYTHON SOURCE LINES 83-155

.. code-block:: Python

    class PolicyGradientLN(KQLearning.PolicyGradient):
        def train(self, game, max_training_game_size=None, **kwargs):
            if self.actor.is_valid() and self.actor.get_x().shape[0] > self.replay_buffer.capacity:
                return
            params = kwargs.get("KCritic", {})
            state, action, next_state, reward, return_, done = self.format(
                game, max_training_game_size=max_training_game_size, **kwargs
            )
            if len(self.replay_buffer):
                states, actions, next_states, rewards, returns, dones = (
                    self.replay_buffer.memory
                )
            else:
                states, actions, next_states, rewards, returns, dones = state, action, next_state, reward, return_, done
                # dones[0] = True
            games = [states, actions, next_states, rewards, returns, dones]

            if self.actor.is_valid():
                last_policy = self.actor(states)
            else:
                last_policy = np.full(
                    [states.shape[0], self.actions_dim], 1.0 / self.actions_dim
                )
            last_policy = np.where(last_policy < 1e-9, 1e-9,last_policy)
            last_policy = np.where(last_policy > 1.-1e-9,1.- 1e-9,last_policy)
            # update probabilities
            if not self.actor.is_valid() or self.actor.get_x().shape[0] < self.replay_buffer.capacity:
                advantages, self.value_function = self.get_advantages(games, policy=last_policy, **kwargs)
                self.actor = self.update_probabilities(
                    advantages, games, last_policy=last_policy, clip=.1, **kwargs
                )
            else:
                pass
                # advantages, self.value_function = self.get_advantages(games, policy=last_policy, kernel = self.value_function,**kwargs)
                # kernel = self.update_probabilities(
                #     advantages, games, last_policy=last_policy,kernel = self.actor, clip=.1, **kwargs
                # )
            if not hasattr(self,"scores"):
                self.scores = [rewards.sum()]
            else:
                self.scores.append(rewards.sum())
            # if len(self.replay_buffer)+states.shape[0] < self.replay_buffer.capacity:
            is_pushed = self.replay_buffer.push(
                state, action, next_state, reward, return_, done, worst_game=False,**kwargs
            )

        def format(self, sarsd, max_training_game_size=None, **kwargs):
            states, actions, next_states, rewards, dones = [
                core.get_matrix(e) for e in sarsd
            ]
            actions = KQLearning.rl_hot_encoder(actions, self.actions_dim)
            dones = core.get_matrix(dones, dtype=bool)
            len_game=states.shape[0]
            if max_training_game_size is not None :
                # indices = [int(n*len_game/max_training_game_size) for n in range(0, max_training_game_size)]
                states, actions, next_states, rewards, dones = (
                    states[-max_training_game_size:],
                    actions[-max_training_game_size:],
                    next_states[-max_training_game_size:],
                    rewards[-max_training_game_size:],
                    dones[-max_training_game_size:],
                    # states[:max_training_game_size],
                    # actions[:max_training_game_size],
                    # next_states[:max_training_game_size],
                    # rewards[:max_training_game_size],
                    # dones[:max_training_game_size],
                )
            returns = self.compute_returns(
                states, actions, next_states, rewards, dones, **kwargs
            )
            # dones[0]=True
            return states, actions, next_states, rewards, returns, dones


.. GENERATED FROM PYTHON SOURCE LINES 156-158

KActorCritic
------------------------

.. GENERATED FROM PYTHON SOURCE LINES 158-233

.. code-block:: Python

    class KActorCriticLN(KQLearning.KActorCritic):
        """
        Defines the main KActorCritic class.

        This inherits from KQLearning.KActorCritic. You can then extend any method from the main class to fit your needs.

        """

        def train(self, game, max_training_game_size=None, **kwargs):
            if self.actor.is_valid() and self.actor.get_x().shape[0] > self.replay_buffer.capacity:
                return
            params = kwargs.get("KCritic", {})
            state, action, next_state, reward, return_, done = self.format(
                game, max_training_game_size=max_training_game_size, **kwargs
            )
            if len(self.replay_buffer):
                states, actions, next_states, rewards, returns, dones = (
                    self.replay_buffer.memory
                )
            else:
                states, actions, next_states, rewards, returns, dones = state, action, next_state, reward, return_, done
                # dones[0] = True
            games = [states, actions, next_states, rewards, returns, dones]

            if self.actor.is_valid():
                last_policy = self.actor(states)
            else:
                last_policy = np.full(
                    [states.shape[0], self.actions_dim], 1.0 / self.actions_dim
                )
            last_policy = np.where(last_policy < 1e-9, 1e-9,last_policy)
            last_policy = np.where(last_policy > 1.-1e-9,1.- 1e-9,last_policy)
            # update probabilities
            if not self.actor.is_valid() or self.actor.get_x().shape[0] < self.replay_buffer.capacity:
                advantages, self.value_function = self.get_advantages(games, policy=last_policy, **kwargs)
                self.actor = self.update_probabilities(
                    advantages, games, last_policy=last_policy, clip=.1, **kwargs
                )
            else:
                pass
                # advantages, self.value_function = self.get_advantages(games, policy=last_policy, kernel = self.value_function,**kwargs)
                # kernel = self.update_probabilities(
                #     advantages, games, last_policy=last_policy,kernel = self.actor, clip=.1, **kwargs
                # )
            if not hasattr(self,"scores"):
                self.scores = [rewards.sum()]
            else:
                self.scores.append(rewards.sum())
            # if len(self.replay_buffer)+states.shape[0] < self.replay_buffer.capacity:
            is_pushed = self.replay_buffer.push(
                state, action, next_state, reward, return_, done, worst_game=False,**kwargs
            )

        def format(self, sarsd, max_training_game_size=None, **kwargs):
            states, actions, next_states, rewards, dones = [
                core.get_matrix(e) for e in sarsd
            ]
            actions = KQLearning.rl_hot_encoder(actions, self.actions_dim)
            dones = core.get_matrix(dones, dtype=bool)
            len_game=states.shape[0]
            if max_training_game_size is not None :
                # indices = [int(n*len_game/max_training_game_size) for n in range(0, max_training_game_size)]
                states, actions, next_states, rewards, dones = (
                    states[-max_training_game_size:],
                    actions[-max_training_game_size:],
                    next_states[-max_training_game_size:],
                    rewards[-max_training_game_size:],
                    dones[-max_training_game_size:],
                )
            returns = self.compute_returns(
                states, actions, next_states, rewards, dones, **kwargs
            )
            # dones[0]=True
            return states, actions, next_states, rewards, returns, dones


.. GENERATED FROM PYTHON SOURCE LINES 234-236

HJB
------------------------

.. GENERATED FROM PYTHON SOURCE LINES 236-302

.. code-block:: Python

    class KQLearningHJBLN(KQLearning.KQLearningHJB):

        def __call__(self, state, **kwargs):
            self.eps_threshold *= 0.999
            if np.random.random() > self.eps_threshold and self.critic.is_valid() == True:
                z = self.all_states_actions(core.get_matrix(state).T)
                # z = self.all_states_actions(self.get_expectation_kernel(z))
                q_values = self.critic(z)
                q_values += np.random.random(q_values.shape) * 1e-9
                return np.argmax(q_values)
            return np.random.randint(0, self.actions_dim)

        def get_conditioned_kernel(self, games, **kwargs):
            return KQLearning.get_conditioned_kernel(
                games, base_class=conditioning.ConditionerKernel, **kwargs
            )

        def train(
            self,
            game,
            max_training_game_size=None,
            format=True,
            replay_buffer=True,
            tol=1e-2,
            **kwargs
        ):
            # return super().train(game, max_training_game_size,format,replay_buffer, tol,**kwargs)
            # l = len(game[0])
            # self.gamma = np.exp(-np.log(l) / l)
            game = self.format(
                game, max_training_game_size=max_training_game_size, **kwargs
            )
            kernel = self.optimal_states_values_function(game, verbose=True, **kwargs)
            kernel.games = game
            # kernel.gamma = self.gamma
            self.critic.add_kernel(kernel, **kwargs)
            delete_kernels = []
            for i, k in self.critic.kernels.items():
                # self.gamma = k.gamma
                error = self.critic.kernels[i].bellman_error
                if error > tol and not hasattr(self.critic.kernels[i], "flag_kill_me"):
                    kernel = self.optimal_states_values_function(
                        self.critic.kernels[i].games,
                        kernel=self.critic.kernels[i],
                        verbose=True,
                        **kwargs,
                    )
                    kernel.games = self.critic.kernels[i].games
                    # kernel.gamma = self.critic.kernels[i].gamma
                    if kernel.bellman_error >= error - tol:
                        # delete_kernels.append(i)
                        kernel.flag_kill_me = "please"
                    else:
                        self.critic.kernels[i] = kernel
            if (
                len(delete_kernels) > 0
                and len(self.critic.kernels) - len(delete_kernels) > 1
            ):
                new_kernels = {}
                count = 0
                for i in range(len(self.critic.kernels)):
                    if i not in delete_kernels:
                        new_kernels[count] = self.critic.kernels[i]
                        count = count + 1
                self.critic.kernels = new_kernels


.. GENERATED FROM PYTHON SOURCE LINES 303-305

KController
------------------------

.. GENERATED FROM PYTHON SOURCE LINES 305-507

.. code-block:: Python

    class heuristic_ControllerLN:
        """
        Defines the heuristic controller for LunarLander. We choose to use 12 parameters to be tweaked. 
        """
        dim = 12

        def __init__(self, w=None, **kwargs):
            if w is None:
                self.w = np.ones([self.dim]) * 0.5
            else:
                self.w = w
            pass

        def get_distribution(self):
            class uniform:
                def __init__(self, shape1):
                    self.shape1 = shape1

                def __call__(self, n):
                    return np.random.uniform(size=[n, self.shape1])

                def support(self, v):
                    out = np.clip(v, 0, 1)
                    return out

            return uniform(self.w.shape[0])

        def get_thetas(self):
            return self.w

        def set_thetas(self, w):
            self.w = w.flatten()

        def __call__(self, s, **kwargs):
            angle_targ = s[0] * self.w[0] + s[2] * self.w[1]
            if angle_targ > self.w[2]:
                angle_targ = self.w[2]
            if angle_targ < -self.w[2]:
                angle_targ = -self.w[2]
            hover_targ = self.w[3] * np.abs(s[0])

            angle_todo = (angle_targ - s[4]) * self.w[4] - (s[5]) * self.w[5]
            hover_todo = (hover_targ - s[1]) * self.w[6] - (s[3]) * self.w[7]

            if s[6] or s[7]:
                angle_todo = self.w[8]
                hover_todo = -(s[3]) * self.w[9]

            a = 0
            if hover_todo > np.abs(angle_todo) and hover_todo > self.w[10]:
                a = 2
            elif angle_todo < -self.w[11]:
                a = 3
            elif angle_todo > +self.w[11]:
                a = 1
            return a
    

    class KControllerLN(KQLearning.KController):
        """
        Defines the class for optimizing the controller. 

        The class inherit from KQLearning.KController. You can then extend any method from the main class to fit your needs. 

        Parameters:
        - state_dim: Dimension of the environment's state space.
        - actions_dim: Dimension of the environment's action space.
        """
        def __init__(self, state_dim, actions_dim, **kwargs):
            controller = heuristic_ControllerLN(state_dim=state_dim, **kwargs)
            super().__init__(state_dim, actions_dim, controller, **kwargs)

        def get_function(self, **kwargs):
            """
            The optimizer will find the best parameters which maximizes this function. 

            This is where you would tweak the function to be maximized.
            """
            self.expectation_estimator = self.get_expectation_estimator(
                self.x, self.y, **kwargs
            )

            def function(x):
                expectation = self.expectation_estimator(x)
                distance = self.expectation_estimator.distance(x)
                return expectation + distance

            return function 

        def format(self, sarsd, **kwargs):
            """
            This formats the game data to be used in the train method
            """
            state, action, next_state, reward, done = [core.get_matrix(e) for e in sarsd]

            action = KQLearning.rl_hot_encoder(action, self.actions_dim)
            action = core.get_matrix(self.controller.get_thetas()).T
            done = core.get_matrix(done, dtype=bool)
            return (
                core.get_matrix(state.mean(axis=0)).T,
                core.get_matrix(action.mean(axis=0)).T,
                core.get_matrix(next_state.mean(axis=0)).T,
                core.get_matrix(reward.mean(axis=0)).T,
                core.get_matrix(done.mean(axis=0)).T,
            )

    def main():
        # Define agents here, which will be trained in the benchmark. If game_dictionnary is empty, the benchmark will try to load data from the .pkl file
        game_dictionary = {
            "PPOAgent": PPOAgent,
            "Controller-based": KControllerLN,
            "KACAgent": KActorCriticLN,
            "PolicyGradient": PolicyGradientLN,
            "DQNAgent": DQNAgent,
            # "KQLearningHJBCP": KQLearningHJBLN, #bug to solve get_transition
            "KQLearning": KQLearningLN,
        }

        # Define your agent's parameters here. This dict will be passed in each agent's __init__() method.
        extras = {
            "KActor": {
                # "latent_shape":[100,50],
                "max_size": 1000,
                "n_batch": 1000000,
                "max_nystrom": 1000,
                "reg": 1e-6,
                "order": None,
            },
            "KCritic": {
                "max_size": 1000000,
                "n_batch": 1000000,
                "max_nystrom": 1000,
                "reg": 1e-9,
                "order": None,
            },
            "HJBModel": {
                # "latent_shape":[100,50],
                "max_size": 100000,
                "n_batch": 1000000,
                "max_nystrom": 1000,
                "reg": 1e-9,
                "order": None,
                "state_dim": 8,
            },
            "Rewards": {
                "max_size": 1000000,
                "n_batch": 100000,
                "max_nystrom": 1000,
                "reg": 1e-9,
                "order": None,
            },
            "NextStates": {
                "max_size": 1000000,
                "n_batch": 100000,
                "max_nystrom": 1000,
                "reg": 1e-9,
                "order": None,
            },
            "DQNAgent": {
                # 'reward_function': mc_reward_function,
                "episodes": 500,
                "policy_param": 64,
                "target_param": 64,
            },
            "ACAgent": {"reward_function": None},
            "QAgent": {
                0
                # 'reward_function': mc_reward_function
            },
            "KController": {
                "reg": 1e-9,
                "order": 2,
            },
            "Conditionner": {
                "reg": 1e-4,
                "order": 3,
            },
            "max_game": 2000,
            "gamma": 0.99,
            "capacity": 10000,
            "max_training_game_size": 1000,
            # "max_kernel": 40
            # "seed": 42,
        }
        seed = extras.get("seed", None)
        np.random.seed(seed)
        softmax = lambda x: np.exp(x) / np.sum(np.exp(x), axis=0)
        test = softmax([1,0])
        Benchmark()(
            game_dictionary,
            "LunarLander-v3",
            num_games=100,
            eps_threshold=0.1,
            num_repeats=3,
            max_time=50,
            axis="episodes",
            # file_name="results_LN_final.pkl",
            **extras,
        )
        plt.show()
        pass

    main()


.. rst-class:: sphx-glr-horizontal


    *

      .. image-sg:: /auto_ch8/images/sphx_glr_ch8_lunarlander_001.png
         :alt: Cumulative Reward over 100 Games
         :srcset: /auto_ch8/images/sphx_glr_ch8_lunarlander_001.png
         :class: sphx-glr-multi-img

    *

      .. image-sg:: /auto_ch8/images/sphx_glr_ch8_lunarlander_002.png
         :alt: Training Time per Game over 100 Games
         :srcset: /auto_ch8/images/sphx_glr_ch8_lunarlander_002.png
         :class: sphx-glr-multi-img


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    label PPOAgent, Reward 0: -155.007, Len(game): 110, Training Time: 0.028s, Prediction Time: 0.028s
    label PPOAgent, Reward 1: -49.228, Len(game): 68, Training Time: 0.044s, Prediction Time: 0.044s
    label PPOAgent, Reward 2: -63.131, Len(game): 73, Training Time: 0.062s, Prediction Time: 0.062s
    label PPOAgent, Reward 3: -85.729, Len(game): 70, Training Time: 0.078s, Prediction Time: 0.078s
    label PPOAgent, Reward 4: -125.021, Len(game): 80, Training Time: 0.098s, Prediction Time: 0.098s
    label PPOAgent, Reward 5: -164.398, Len(game): 119, Training Time: 0.127s, Prediction Time: 0.127s
    label PPOAgent, Reward 6: -99.909, Len(game): 80, Training Time: 0.146s, Prediction Time: 0.146s
    label PPOAgent, Reward 7: -276.220, Len(game): 104, Training Time: 0.172s, Prediction Time: 0.172s
    label PPOAgent, Reward 8: -248.290, Len(game): 105, Training Time: 0.197s, Prediction Time: 0.197s
    label PPOAgent, Reward 9: -218.675, Len(game): 107, Training Time: 0.223s, Prediction Time: 0.223s
    label PPOAgent, Reward 10: -237.605, Len(game): 123, Training Time: 0.253s, Prediction Time: 0.253s
    label PPOAgent, Reward 11: -429.480, Len(game): 110, Training Time: 0.279s, Prediction Time: 0.279s
    label PPOAgent, Reward 12: -161.509, Len(game): 113, Training Time: 0.346s, Prediction Time: 0.346s
    label PPOAgent, Reward 13: -61.647, Len(game): 60, Training Time: 0.360s, Prediction Time: 0.360s
    label PPOAgent, Reward 14: -319.218, Len(game): 81, Training Time: 0.380s, Prediction Time: 0.380s
    label PPOAgent, Reward 15: -120.076, Len(game): 82, Training Time: 0.401s, Prediction Time: 0.401s
    label PPOAgent, Reward 16: -7.705, Len(game): 2000, Training Time: 0.959s, Prediction Time: 0.959s
    label PPOAgent, Reward 17: -257.049, Len(game): 98, Training Time: 0.984s, Prediction Time: 0.984s
    label PPOAgent, Reward 18: -78.086, Len(game): 68, Training Time: 1.038s, Prediction Time: 1.038s
    label PPOAgent, Reward 19: -269.682, Len(game): 131, Training Time: 1.073s, Prediction Time: 1.073s
    label PPOAgent, Reward 20: -102.666, Len(game): 79, Training Time: 1.094s, Prediction Time: 1.094s
    label PPOAgent, Reward 21: -143.012, Len(game): 89, Training Time: 1.116s, Prediction Time: 1.116s
    label PPOAgent, Reward 22: -164.618, Len(game): 79, Training Time: 1.137s, Prediction Time: 1.137s
    label PPOAgent, Reward 23: -126.824, Len(game): 81, Training Time: 1.158s, Prediction Time: 1.158s
    label PPOAgent, Reward 24: -79.662, Len(game): 62, Training Time: 1.174s, Prediction Time: 1.174s
    label PPOAgent, Reward 25: -82.773, Len(game): 152, Training Time: 1.213s, Prediction Time: 1.213s
    label PPOAgent, Reward 26: -125.296, Len(game): 104, Training Time: 1.240s, Prediction Time: 1.240s
    label PPOAgent, Reward 27: -84.146, Len(game): 106, Training Time: 1.265s, Prediction Time: 1.265s
    label PPOAgent, Reward 28: -149.814, Len(game): 110, Training Time: 1.292s, Prediction Time: 1.292s
    label PPOAgent, Reward 29: 17.381, Len(game): 68, Training Time: 1.310s, Prediction Time: 1.310s
    label PPOAgent, Reward 30: -85.098, Len(game): 115, Training Time: 1.374s, Prediction Time: 1.374s
    label PPOAgent, Reward 31: -238.813, Len(game): 102, Training Time: 1.401s, Prediction Time: 1.401s
    label PPOAgent, Reward 32: -134.363, Len(game): 111, Training Time: 1.429s, Prediction Time: 1.429s
    label PPOAgent, Reward 33: -178.439, Len(game): 89, Training Time: 1.451s, Prediction Time: 1.451s
    label PPOAgent, Reward 34: -144.528, Len(game): 111, Training Time: 1.479s, Prediction Time: 1.479s
    label PPOAgent, Reward 35: -178.162, Len(game): 99, Training Time: 1.505s, Prediction Time: 1.505s
    label PPOAgent, Reward 36: -60.394, Len(game): 65, Training Time: 1.523s, Prediction Time: 1.523s
    label PPOAgent, Reward 37: -110.917, Len(game): 87, Training Time: 1.544s, Prediction Time: 1.544s
    label PPOAgent, Reward 38: -136.499, Len(game): 100, Training Time: 1.569s, Prediction Time: 1.569s
    label PPOAgent, Reward 39: -83.841, Len(game): 87, Training Time: 1.590s, Prediction Time: 1.590s
    label PPOAgent, Reward 40: -230.637, Len(game): 108, Training Time: 1.616s, Prediction Time: 1.616s
    label PPOAgent, Reward 41: -84.512, Len(game): 84, Training Time: 1.637s, Prediction Time: 1.637s
    label PPOAgent, Reward 42: -214.474, Len(game): 137, Training Time: 1.707s, Prediction Time: 1.707s
    label PPOAgent, Reward 43: -109.316, Len(game): 76, Training Time: 1.729s, Prediction Time: 1.729s
    label PPOAgent, Reward 44: -94.543, Len(game): 81, Training Time: 1.749s, Prediction Time: 1.749s
    label PPOAgent, Reward 45: -87.504, Len(game): 76, Training Time: 1.769s, Prediction Time: 1.769s
    label PPOAgent, Reward 46: -121.231, Len(game): 76, Training Time: 1.787s, Prediction Time: 1.787s
    label PPOAgent, Reward 47: -139.251, Len(game): 96, Training Time: 1.811s, Prediction Time: 1.811s
    label PPOAgent, Reward 48: -130.601, Len(game): 118, Training Time: 1.840s, Prediction Time: 1.840s
    label PPOAgent, Reward 49: -261.466, Len(game): 119, Training Time: 1.871s, Prediction Time: 1.871s
    label PPOAgent, Reward 50: -329.855, Len(game): 102, Training Time: 1.897s, Prediction Time: 1.897s
    label PPOAgent, Reward 51: -133.380, Len(game): 71, Training Time: 1.914s, Prediction Time: 1.914s
    label PPOAgent, Reward 52: -107.086, Len(game): 85, Training Time: 1.936s, Prediction Time: 1.936s
    label PPOAgent, Reward 53: -147.687, Len(game): 102, Training Time: 1.960s, Prediction Time: 1.960s
    label PPOAgent, Reward 54: -116.676, Len(game): 86, Training Time: 1.981s, Prediction Time: 1.981s
    label PPOAgent, Reward 55: -153.364, Len(game): 90, Training Time: 2.003s, Prediction Time: 2.003s
    label PPOAgent, Reward 56: -73.097, Len(game): 105, Training Time: 2.064s, Prediction Time: 2.064s
    label PPOAgent, Reward 57: -100.277, Len(game): 83, Training Time: 2.085s, Prediction Time: 2.085s
    label PPOAgent, Reward 58: -52.122, Len(game): 78, Training Time: 2.105s, Prediction Time: 2.105s
    label PPOAgent, Reward 59: -76.214, Len(game): 95, Training Time: 2.128s, Prediction Time: 2.128s
    label PPOAgent, Reward 60: -124.150, Len(game): 68, Training Time: 2.145s, Prediction Time: 2.145s
    label PPOAgent, Reward 61: -79.070, Len(game): 62, Training Time: 2.161s, Prediction Time: 2.161s
    label PPOAgent, Reward 62: -197.114, Len(game): 102, Training Time: 2.187s, Prediction Time: 2.187s
    label PPOAgent, Reward 63: -209.057, Len(game): 102, Training Time: 2.213s, Prediction Time: 2.213s
    label PPOAgent, Reward 64: -58.717, Len(game): 66, Training Time: 2.229s, Prediction Time: 2.229s
    label PPOAgent, Reward 65: -270.200, Len(game): 92, Training Time: 2.252s, Prediction Time: 2.252s
    label PPOAgent, Reward 66: -13.982, Len(game): 115, Training Time: 2.281s, Prediction Time: 2.281s
    label PPOAgent, Reward 67: -12.036, Len(game): 116, Training Time: 2.310s, Prediction Time: 2.310s
    label PPOAgent, Reward 68: -111.589, Len(game): 67, Training Time: 2.327s, Prediction Time: 2.327s
    label PPOAgent, Reward 69: -123.134, Len(game): 90, Training Time: 2.384s, Prediction Time: 2.384s
    label PPOAgent, Reward 70: -124.631, Len(game): 101, Training Time: 2.408s, Prediction Time: 2.408s
    label PPOAgent, Reward 71: -207.868, Len(game): 83, Training Time: 2.429s, Prediction Time: 2.429s
    label PPOAgent, Reward 72: -56.974, Len(game): 75, Training Time: 2.448s, Prediction Time: 2.448s
    label PPOAgent, Reward 73: -106.013, Len(game): 120, Training Time: 2.482s, Prediction Time: 2.482s
    label PPOAgent, Reward 74: -123.814, Len(game): 114, Training Time: 2.511s, Prediction Time: 2.511s
    label PPOAgent, Reward 75: -122.384, Len(game): 82, Training Time: 2.531s, Prediction Time: 2.531s
    label PPOAgent, Reward 76: -110.124, Len(game): 83, Training Time: 2.555s, Prediction Time: 2.555s
    label PPOAgent, Reward 77: -112.512, Len(game): 130, Training Time: 2.587s, Prediction Time: 2.587s
    label PPOAgent, Reward 78: -63.188, Len(game): 70, Training Time: 2.604s, Prediction Time: 2.604s
    label PPOAgent, Reward 79: 22.390, Len(game): 117, Training Time: 2.633s, Prediction Time: 2.633s
    label PPOAgent, Reward 80: -98.291, Len(game): 71, Training Time: 2.651s, Prediction Time: 2.651s
    label PPOAgent, Reward 81: -89.695, Len(game): 90, Training Time: 2.673s, Prediction Time: 2.673s
    label PPOAgent, Reward 82: -118.329, Len(game): 79, Training Time: 2.739s, Prediction Time: 2.739s
    label PPOAgent, Reward 83: -126.434, Len(game): 100, Training Time: 2.764s, Prediction Time: 2.764s
    label PPOAgent, Reward 84: -179.646, Len(game): 99, Training Time: 2.788s, Prediction Time: 2.788s
    label PPOAgent, Reward 85: -65.340, Len(game): 56, Training Time: 2.802s, Prediction Time: 2.802s
    label PPOAgent, Reward 86: -97.081, Len(game): 62, Training Time: 2.818s, Prediction Time: 2.818s
    label PPOAgent, Reward 87: -123.019, Len(game): 69, Training Time: 2.835s, Prediction Time: 2.835s
    label PPOAgent, Reward 88: -150.878, Len(game): 65, Training Time: 2.852s, Prediction Time: 2.852s
    label PPOAgent, Reward 89: -71.878, Len(game): 104, Training Time: 2.878s, Prediction Time: 2.878s
    label PPOAgent, Reward 90: -78.592, Len(game): 108, Training Time: 2.906s, Prediction Time: 2.906s
    label PPOAgent, Reward 91: -54.500, Len(game): 74, Training Time: 2.924s, Prediction Time: 2.924s
    label PPOAgent, Reward 92: -73.596, Len(game): 59, Training Time: 2.938s, Prediction Time: 2.938s
    label PPOAgent, Reward 93: -170.120, Len(game): 126, Training Time: 2.971s, Prediction Time: 2.971s
    label PPOAgent, Reward 94: -90.585, Len(game): 74, Training Time: 2.990s, Prediction Time: 2.990s
    label PPOAgent, Reward 95: -84.918, Len(game): 54, Training Time: 3.003s, Prediction Time: 3.003s
    label PPOAgent, Reward 96: -110.031, Len(game): 124, Training Time: 3.067s, Prediction Time: 3.067s
    label PPOAgent, Reward 97: -81.512, Len(game): 71, Training Time: 3.085s, Prediction Time: 3.085s
    label PPOAgent, Reward 98: -167.500, Len(game): 102, Training Time: 3.111s, Prediction Time: 3.111s
    label PPOAgent, Reward 99: -123.470, Len(game): 71, Training Time: 3.129s, Prediction Time: 3.129s
    label Controller-based, Reward 0: -66.389, Len(game): 62, Training Time: 0.002s, Prediction Time: 0.002s
    label Controller-based, Reward 1: -147.150, Len(game): 78, Training Time: 0.004s, Prediction Time: 0.004s
    label Controller-based, Reward 2: -29.537, Len(game): 91, Training Time: 0.035s, Prediction Time: 0.007s
    label Controller-based, Reward 3: -351.113, Len(game): 262, Training Time: 0.050s, Prediction Time: 0.014s
    label Controller-based, Reward 4: -238.660, Len(game): 190, Training Time: 0.062s, Prediction Time: 0.020s
    label Controller-based, Reward 5: -136.099, Len(game): 62, Training Time: 0.073s, Prediction Time: 0.022s
    label Controller-based, Reward 6: -130.023, Len(game): 73, Training Time: 0.083s, Prediction Time: 0.024s
    label Controller-based, Reward 7: -178.675, Len(game): 80, Training Time: 0.093s, Prediction Time: 0.027s
    label Controller-based, Reward 8: -35.860, Len(game): 75, Training Time: 0.104s, Prediction Time: 0.029s
    label Controller-based, Reward 9: -81.565, Len(game): 58, Training Time: 0.116s, Prediction Time: 0.030s
    label Controller-based, Reward 10: -38.254, Len(game): 72, Training Time: 0.126s, Prediction Time: 0.032s
    label Controller-based, Reward 11: -26.056, Len(game): 141, Training Time: 0.137s, Prediction Time: 0.036s
    label Controller-based, Reward 12: -107.701, Len(game): 93, Training Time: 0.148s, Prediction Time: 0.039s
    label Controller-based, Reward 13: -61.681, Len(game): 83, Training Time: 0.159s, Prediction Time: 0.041s
    label Controller-based, Reward 14: -73.744, Len(game): 54, Training Time: 0.170s, Prediction Time: 0.043s
    label Controller-based, Reward 15: -202.117, Len(game): 111, Training Time: 0.183s, Prediction Time: 0.046s
    label Controller-based, Reward 16: -42.078, Len(game): 83, Training Time: 0.194s, Prediction Time: 0.048s
    label Controller-based, Reward 17: -101.128, Len(game): 72, Training Time: 0.206s, Prediction Time: 0.050s
    label Controller-based, Reward 18: 198.978, Len(game): 358, Training Time: 0.222s, Prediction Time: 0.063s
    label Controller-based, Reward 19: -26.105, Len(game): 150, Training Time: 0.235s, Prediction Time: 0.068s
    label Controller-based, Reward 20: 178.616, Len(game): 297, Training Time: 0.249s, Prediction Time: 0.075s
    label Controller-based, Reward 21: 227.939, Len(game): 393, Training Time: 0.267s, Prediction Time: 0.089s
    label Controller-based, Reward 22: -149.331, Len(game): 88, Training Time: 0.279s, Prediction Time: 0.091s
    label Controller-based, Reward 23: -90.549, Len(game): 72, Training Time: 0.293s, Prediction Time: 0.092s
    label Controller-based, Reward 24: 283.269, Len(game): 293, Training Time: 0.310s, Prediction Time: 0.101s
    label Controller-based, Reward 25: -145.738, Len(game): 2000, Training Time: 0.341s, Prediction Time: 0.159s
    label Controller-based, Reward 26: 272.230, Len(game): 320, Training Time: 0.358s, Prediction Time: 0.170s
    label Controller-based, Reward 27: -7.610, Len(game): 213, Training Time: 0.372s, Prediction Time: 0.175s
    label Controller-based, Reward 28: -45.432, Len(game): 178, Training Time: 0.387s, Prediction Time: 0.180s
    label Controller-based, Reward 29: 264.286, Len(game): 307, Training Time: 0.404s, Prediction Time: 0.190s
    label Controller-based, Reward 30: 223.321, Len(game): 375, Training Time: 0.422s, Prediction Time: 0.202s
    label Controller-based, Reward 31: 236.921, Len(game): 332, Training Time: 0.440s, Prediction Time: 0.213s
    label Controller-based, Reward 32: 134.198, Len(game): 592, Training Time: 0.458s, Prediction Time: 0.228s
    label Controller-based, Reward 33: -192.755, Len(game): 181, Training Time: 0.472s, Prediction Time: 0.232s
    label Controller-based, Reward 34: 310.746, Len(game): 318, Training Time: 0.489s, Prediction Time: 0.243s
    label Controller-based, Reward 35: 172.609, Len(game): 453, Training Time: 0.508s, Prediction Time: 0.259s
    label Controller-based, Reward 36: 275.965, Len(game): 265, Training Time: 0.525s, Prediction Time: 0.267s
    label Controller-based, Reward 37: -195.630, Len(game): 190, Training Time: 0.540s, Prediction Time: 0.271s
    label Controller-based, Reward 38: 203.798, Len(game): 349, Training Time: 0.555s, Prediction Time: 0.279s
    label Controller-based, Reward 39: 302.250, Len(game): 284, Training Time: 0.570s, Prediction Time: 0.288s
    label Controller-based, Reward 40: -21.642, Len(game): 211, Training Time: 0.587s, Prediction Time: 0.294s
    label Controller-based, Reward 41: 234.800, Len(game): 298, Training Time: 0.603s, Prediction Time: 0.301s
    label Controller-based, Reward 42: 28.698, Len(game): 196, Training Time: 0.618s, Prediction Time: 0.305s
    label Controller-based, Reward 43: 206.002, Len(game): 319, Training Time: 0.633s, Prediction Time: 0.313s
    label Controller-based, Reward 44: 235.279, Len(game): 330, Training Time: 0.651s, Prediction Time: 0.324s
    label Controller-based, Reward 45: 65.881, Len(game): 202, Training Time: 0.666s, Prediction Time: 0.328s
    label Controller-based, Reward 46: 290.840, Len(game): 275, Training Time: 0.683s, Prediction Time: 0.336s
    label Controller-based, Reward 47: 250.375, Len(game): 661, Training Time: 0.702s, Prediction Time: 0.354s
    label Controller-based, Reward 48: 268.129, Len(game): 322, Training Time: 0.717s, Prediction Time: 0.361s
    label Controller-based, Reward 49: 187.047, Len(game): 518, Training Time: 0.737s, Prediction Time: 0.377s
    label Controller-based, Reward 50: 293.946, Len(game): 271, Training Time: 0.752s, Prediction Time: 0.384s
    label Controller-based, Reward 51: 202.701, Len(game): 325, Training Time: 0.769s, Prediction Time: 0.395s
    label Controller-based, Reward 52: 225.155, Len(game): 333, Training Time: 0.784s, Prediction Time: 0.403s
    label Controller-based, Reward 53: 242.825, Len(game): 303, Training Time: 0.801s, Prediction Time: 0.413s
    label Controller-based, Reward 54: 299.742, Len(game): 295, Training Time: 0.816s, Prediction Time: 0.419s
    label Controller-based, Reward 55: 248.438, Len(game): 337, Training Time: 0.832s, Prediction Time: 0.428s
    label Controller-based, Reward 56: 134.144, Len(game): 429, Training Time: 0.850s, Prediction Time: 0.442s
    label Controller-based, Reward 57: 272.800, Len(game): 260, Training Time: 0.865s, Prediction Time: 0.448s
    label Controller-based, Reward 58: 260.772, Len(game): 314, Training Time: 0.880s, Prediction Time: 0.456s
    label Controller-based, Reward 59: 232.116, Len(game): 471, Training Time: 0.897s, Prediction Time: 0.469s
    label Controller-based, Reward 60: -176.896, Len(game): 195, Training Time: 0.913s, Prediction Time: 0.474s
    label Controller-based, Reward 61: -193.130, Len(game): 190, Training Time: 0.927s, Prediction Time: 0.479s
    label Controller-based, Reward 62: 285.408, Len(game): 258, Training Time: 0.942s, Prediction Time: 0.486s
    label Controller-based, Reward 63: 229.752, Len(game): 306, Training Time: 0.958s, Prediction Time: 0.493s
    label Controller-based, Reward 64: 295.034, Len(game): 298, Training Time: 0.974s, Prediction Time: 0.503s
    label Controller-based, Reward 65: -165.219, Len(game): 480, Training Time: 0.991s, Prediction Time: 0.515s
    label Controller-based, Reward 66: -197.014, Len(game): 204, Training Time: 1.005s, Prediction Time: 0.520s
    label Controller-based, Reward 67: 209.169, Len(game): 277, Training Time: 1.023s, Prediction Time: 0.529s
    label Controller-based, Reward 68: 264.307, Len(game): 281, Training Time: 1.038s, Prediction Time: 0.535s
    label Controller-based, Reward 69: 252.392, Len(game): 307, Training Time: 1.054s, Prediction Time: 0.543s
    label Controller-based, Reward 70: -23.640, Len(game): 157, Training Time: 1.068s, Prediction Time: 0.546s
    label Controller-based, Reward 71: -197.094, Len(game): 186, Training Time: 1.083s, Prediction Time: 0.551s
    label Controller-based, Reward 72: -98.063, Len(game): 1120, Training Time: 1.107s, Prediction Time: 0.591s
    label Controller-based, Reward 73: 289.180, Len(game): 282, Training Time: 1.123s, Prediction Time: 0.598s
    label Controller-based, Reward 74: -189.898, Len(game): 186, Training Time: 1.138s, Prediction Time: 0.602s
    label Controller-based, Reward 75: 197.033, Len(game): 243, Training Time: 1.153s, Prediction Time: 0.608s
    label Controller-based, Reward 76: 285.819, Len(game): 321, Training Time: 1.169s, Prediction Time: 0.615s
    label Controller-based, Reward 77: 232.468, Len(game): 288, Training Time: 1.184s, Prediction Time: 0.622s
    label Controller-based, Reward 78: 244.307, Len(game): 278, Training Time: 1.199s, Prediction Time: 0.629s
    label Controller-based, Reward 79: 14.754, Len(game): 206, Training Time: 1.214s, Prediction Time: 0.634s
    label Controller-based, Reward 80: 237.025, Len(game): 399, Training Time: 1.232s, Prediction Time: 0.647s
    label Controller-based, Reward 81: 238.159, Len(game): 309, Training Time: 1.251s, Prediction Time: 0.656s
    label Controller-based, Reward 82: 195.445, Len(game): 356, Training Time: 1.267s, Prediction Time: 0.665s
    label Controller-based, Reward 83: 231.256, Len(game): 341, Training Time: 1.282s, Prediction Time: 0.673s
    label Controller-based, Reward 84: -184.248, Len(game): 379, Training Time: 1.299s, Prediction Time: 0.683s
    label Controller-based, Reward 85: 249.370, Len(game): 310, Training Time: 1.315s, Prediction Time: 0.690s
    label Controller-based, Reward 86: 274.466, Len(game): 324, Training Time: 1.330s, Prediction Time: 0.698s
    label Controller-based, Reward 87: 261.930, Len(game): 294, Training Time: 1.345s, Prediction Time: 0.705s
    label Controller-based, Reward 88: 212.232, Len(game): 381, Training Time: 1.362s, Prediction Time: 0.715s
    label Controller-based, Reward 89: -43.572, Len(game): 203, Training Time: 1.379s, Prediction Time: 0.719s
    label Controller-based, Reward 90: 231.690, Len(game): 528, Training Time: 1.397s, Prediction Time: 0.734s
    label Controller-based, Reward 91: 259.358, Len(game): 303, Training Time: 1.415s, Prediction Time: 0.743s
    label Controller-based, Reward 92: 21.422, Len(game): 214, Training Time: 1.430s, Prediction Time: 0.749s
    label Controller-based, Reward 93: -128.026, Len(game): 415, Training Time: 1.447s, Prediction Time: 0.760s
    label Controller-based, Reward 94: 249.865, Len(game): 277, Training Time: 1.464s, Prediction Time: 0.767s
    label Controller-based, Reward 95: 221.310, Len(game): 356, Training Time: 1.480s, Prediction Time: 0.776s
    label Controller-based, Reward 96: 228.415, Len(game): 290, Training Time: 1.498s, Prediction Time: 0.786s
    label Controller-based, Reward 97: 218.647, Len(game): 397, Training Time: 1.515s, Prediction Time: 0.795s
    label Controller-based, Reward 98: 190.291, Len(game): 410, Training Time: 1.533s, Prediction Time: 0.805s
    label Controller-based, Reward 99: 206.255, Len(game): 340, Training Time: 1.549s, Prediction Time: 0.814s
    label KACAgent, Reward 0: -328.568, Len(game): 64, Training Time: 0.007s, Prediction Time: 0.002s
    label KACAgent, Reward 1: -63.964, Len(game): 71, Training Time: 0.014s, Prediction Time: 0.014s
    label KACAgent, Reward 2: -207.799, Len(game): 106, Training Time: 0.028s, Prediction Time: 0.032s
    label KACAgent, Reward 3: -111.775, Len(game): 117, Training Time: 0.056s, Prediction Time: 0.059s
    label KACAgent, Reward 4: -128.210, Len(game): 83, Training Time: 0.099s, Prediction Time: 0.085s
    label KACAgent, Reward 5: -116.726, Len(game): 120, Training Time: 0.168s, Prediction Time: 0.129s
    label KACAgent, Reward 6: -144.208, Len(game): 74, Training Time: 0.266s, Prediction Time: 0.172s
    label KACAgent, Reward 7: -192.032, Len(game): 80, Training Time: 0.381s, Prediction Time: 0.232s
    label KACAgent, Reward 8: -80.821, Len(game): 79, Training Time: 0.524s, Prediction Time: 0.300s
    label KACAgent, Reward 9: -311.251, Len(game): 123, Training Time: 0.697s, Prediction Time: 0.401s
    label KACAgent, Reward 10: -105.327, Len(game): 144, Training Time: 0.926s, Prediction Time: 0.533s
    label KACAgent, Reward 11: -102.437, Len(game): 102, Training Time: 1.225s, Prediction Time: 0.653s
    label KACAgent, Reward 12: 13.917, Len(game): 109, Training Time: 1.568s, Prediction Time: 0.821s
    label KACAgent, Reward 13: -104.934, Len(game): 122, Training Time: 2.001s, Prediction Time: 1.026s
    label KACAgent, Reward 14: -253.741, Len(game): 119, Training Time: 2.491s, Prediction Time: 1.235s
    label KACAgent, Reward 15: -273.641, Len(game): 132, Training Time: 3.038s, Prediction Time: 1.495s
    label KACAgent, Reward 16: -360.088, Len(game): 130, Training Time: 3.604s, Prediction Time: 1.779s
    label KACAgent, Reward 17: -137.669, Len(game): 101, Training Time: 4.264s, Prediction Time: 2.056s
    label KACAgent, Reward 18: -95.321, Len(game): 115, Training Time: 5.039s, Prediction Time: 2.377s
    label KACAgent, Reward 19: -51.346, Len(game): 120, Training Time: 5.919s, Prediction Time: 2.768s
    label KACAgent, Reward 20: -20.718, Len(game): 89, Training Time: 6.770s, Prediction Time: 3.160s
    label KACAgent, Reward 21: -79.090, Len(game): 112, Training Time: 7.663s, Prediction Time: 3.595s
    label KACAgent, Reward 22: -51.047, Len(game): 176, Training Time: 8.694s, Prediction Time: 4.109s
    label KACAgent, Reward 23: -157.352, Len(game): 134, Training Time: 9.913s, Prediction Time: 4.633s
    label KACAgent, Reward 24: -84.571, Len(game): 155, Training Time: 11.302s, Prediction Time: 5.287s
    label KACAgent, Reward 25: -38.485, Len(game): 87, Training Time: 12.840s, Prediction Time: 5.865s
    label KACAgent, Reward 26: -50.646, Len(game): 142, Training Time: 14.349s, Prediction Time: 6.622s
    label KACAgent, Reward 27: -94.792, Len(game): 100, Training Time: 16.126s, Prediction Time: 7.352s
    label KACAgent, Reward 28: -88.876, Len(game): 92, Training Time: 18.067s, Prediction Time: 8.186s
    label KACAgent, Reward 29: -65.502, Len(game): 189, Training Time: 20.058s, Prediction Time: 9.204s
    label KACAgent, Reward 30: -59.935, Len(game): 135, Training Time: 22.282s, Prediction Time: 10.244s
    label KACAgent, Reward 31: -38.201, Len(game): 115, Training Time: 24.704s, Prediction Time: 11.381s
    label KACAgent, Reward 32: 35.936, Len(game): 108, Training Time: 27.232s, Prediction Time: 12.556s
    label KACAgent, Reward 33: -75.890, Len(game): 113, Training Time: 29.869s, Prediction Time: 13.816s
    label KACAgent, Reward 34: 7.389, Len(game): 155, Training Time: 32.601s, Prediction Time: 15.190s
    label KACAgent, Reward 35: -27.966, Len(game): 200, Training Time: 35.647s, Prediction Time: 16.746s
    label KACAgent, Reward 36: -104.508, Len(game): 144, Training Time: 38.920s, Prediction Time: 18.330s
    label KACAgent, Reward 37: 14.899, Len(game): 176, Training Time: 42.011s, Prediction Time: 19.865s
    label KACAgent, Reward 38: -10.088, Len(game): 146, Training Time: 45.404s, Prediction Time: 21.513s
    label KACAgent, Reward 39: 38.261, Len(game): 126, Training Time: 49.000s, Prediction Time: 23.256s
    label KACAgent, Reward 40: -88.638, Len(game): 152, Training Time: 52.735s, Prediction Time: 25.247s
    label KACAgent, Reward 41: 24.816, Len(game): 159, Training Time: 52.735s, Prediction Time: 27.218s
    label KACAgent, Reward 42: -18.355, Len(game): 104, Training Time: 52.735s, Prediction Time: 27.359s
    label KACAgent, Reward 43: -77.535, Len(game): 293, Training Time: 52.735s, Prediction Time: 27.757s
    label KACAgent, Reward 44: -81.236, Len(game): 248, Training Time: 52.735s, Prediction Time: 28.090s
    label KACAgent, Reward 45: 20.237, Len(game): 127, Training Time: 52.735s, Prediction Time: 28.260s
    label KACAgent, Reward 46: -26.196, Len(game): 288, Training Time: 52.735s, Prediction Time: 28.647s
    label KACAgent, Reward 47: -36.950, Len(game): 233, Training Time: 52.735s, Prediction Time: 28.963s
    label KACAgent, Reward 48: -223.085, Len(game): 218, Training Time: 52.735s, Prediction Time: 29.258s
    label KACAgent, Reward 49: -111.125, Len(game): 98, Training Time: 52.735s, Prediction Time: 29.389s
    label KACAgent, Reward 50: 42.071, Len(game): 2000, Training Time: 52.735s, Prediction Time: 32.119s
    label KACAgent, Reward 51: -53.059, Len(game): 115, Training Time: 52.735s, Prediction Time: 32.274s
    label KACAgent, Reward 52: -54.857, Len(game): 293, Training Time: 52.735s, Prediction Time: 32.671s
    label KACAgent, Reward 53: -0.778, Len(game): 128, Training Time: 52.735s, Prediction Time: 32.844s
    label KACAgent, Reward 54: -40.809, Len(game): 232, Training Time: 52.735s, Prediction Time: 33.157s
    label KACAgent, Reward 55: -65.557, Len(game): 194, Training Time: 52.735s, Prediction Time: 33.419s
    label KACAgent, Reward 56: -151.013, Len(game): 179, Training Time: 52.735s, Prediction Time: 33.662s
    label KACAgent, Reward 57: -138.108, Len(game): 105, Training Time: 52.735s, Prediction Time: 33.804s
    label KACAgent, Reward 58: -72.718, Len(game): 280, Training Time: 52.735s, Prediction Time: 34.181s
    label KACAgent, Reward 59: 40.494, Len(game): 142, Training Time: 52.735s, Prediction Time: 34.374s
    label KACAgent, Reward 60: -2.169, Len(game): 208, Training Time: 52.735s, Prediction Time: 34.653s
    label KACAgent, Reward 61: 6.666, Len(game): 148, Training Time: 52.735s, Prediction Time: 34.852s
    label KACAgent, Reward 62: 17.237, Len(game): 204, Training Time: 52.735s, Prediction Time: 35.128s
    label KACAgent, Reward 63: -123.301, Len(game): 168, Training Time: 52.735s, Prediction Time: 35.355s
    label KACAgent, Reward 64: -55.402, Len(game): 266, Training Time: 52.735s, Prediction Time: 35.716s
    label KACAgent, Reward 65: 10.282, Len(game): 132, Training Time: 52.735s, Prediction Time: 35.895s
    label KACAgent, Reward 66: -85.417, Len(game): 284, Training Time: 52.735s, Prediction Time: 36.278s
    label KACAgent, Reward 67: -405.309, Len(game): 170, Training Time: 52.735s, Prediction Time: 36.511s
    label KACAgent, Reward 68: 17.269, Len(game): 142, Training Time: 52.735s, Prediction Time: 36.703s
    label KACAgent, Reward 69: -57.186, Len(game): 169, Training Time: 52.735s, Prediction Time: 36.931s
    label KACAgent, Reward 70: -41.669, Len(game): 156, Training Time: 52.735s, Prediction Time: 37.142s
    label KACAgent, Reward 71: -59.518, Len(game): 123, Training Time: 52.735s, Prediction Time: 37.307s
    label KACAgent, Reward 72: -61.951, Len(game): 249, Training Time: 52.735s, Prediction Time: 37.644s
    label KACAgent, Reward 73: 37.137, Len(game): 170, Training Time: 52.735s, Prediction Time: 37.874s
    label KACAgent, Reward 74: 13.856, Len(game): 90, Training Time: 52.735s, Prediction Time: 37.995s
    label KACAgent, Reward 75: -36.667, Len(game): 280, Training Time: 52.735s, Prediction Time: 38.375s
    label KACAgent, Reward 76: 7.559, Len(game): 118, Training Time: 52.735s, Prediction Time: 38.535s
    label KACAgent, Reward 77: 34.614, Len(game): 168, Training Time: 52.735s, Prediction Time: 38.762s
    label KACAgent, Reward 78: -33.996, Len(game): 152, Training Time: 52.735s, Prediction Time: 38.968s
    label KACAgent, Reward 79: 43.459, Len(game): 143, Training Time: 52.735s, Prediction Time: 39.161s
    label KACAgent, Reward 80: -296.611, Len(game): 141, Training Time: 52.735s, Prediction Time: 39.354s
    label KACAgent, Reward 81: -34.731, Len(game): 335, Training Time: 52.735s, Prediction Time: 39.808s
    label KACAgent, Reward 82: -146.869, Len(game): 167, Training Time: 52.735s, Prediction Time: 40.036s
    label KACAgent, Reward 83: -41.911, Len(game): 271, Training Time: 52.735s, Prediction Time: 40.403s
    label KACAgent, Reward 84: -67.637, Len(game): 326, Training Time: 52.735s, Prediction Time: 40.844s
    label KACAgent, Reward 85: -28.237, Len(game): 151, Training Time: 52.735s, Prediction Time: 41.049s
    label KACAgent, Reward 86: 20.948, Len(game): 160, Training Time: 52.735s, Prediction Time: 41.265s
    label KACAgent, Reward 87: -64.373, Len(game): 113, Training Time: 52.735s, Prediction Time: 41.417s
    label KACAgent, Reward 88: -63.747, Len(game): 159, Training Time: 52.735s, Prediction Time: 41.633s
    label KACAgent, Reward 89: 3.050, Len(game): 271, Training Time: 52.735s, Prediction Time: 42.001s
    label KACAgent, Reward 90: 22.918, Len(game): 195, Training Time: 52.735s, Prediction Time: 42.264s
    label KACAgent, Reward 91: 59.352, Len(game): 138, Training Time: 52.735s, Prediction Time: 42.450s
    label KACAgent, Reward 92: -92.817, Len(game): 184, Training Time: 52.735s, Prediction Time: 42.699s
    label KACAgent, Reward 93: 11.008, Len(game): 178, Training Time: 52.735s, Prediction Time: 42.941s
    label KACAgent, Reward 94: -84.499, Len(game): 106, Training Time: 52.735s, Prediction Time: 43.084s
    label KACAgent, Reward 95: 24.324, Len(game): 138, Training Time: 52.735s, Prediction Time: 43.273s
    label KACAgent, Reward 96: 29.313, Len(game): 151, Training Time: 52.735s, Prediction Time: 43.477s
    label KACAgent, Reward 97: -161.641, Len(game): 143, Training Time: 52.735s, Prediction Time: 43.671s
    label KACAgent, Reward 98: -39.324, Len(game): 111, Training Time: 52.735s, Prediction Time: 43.822s
    label KACAgent, Reward 99: -24.729, Len(game): 178, Training Time: 52.735s, Prediction Time: 44.064s
    label PolicyGradient, Reward 0: -81.303, Len(game): 64, Training Time: 0.008s, Prediction Time: 0.001s
    label PolicyGradient, Reward 1: -102.050, Len(game): 67, Training Time: 0.015s, Prediction Time: 0.013s
    label PolicyGradient, Reward 2: -204.890, Len(game): 98, Training Time: 0.027s, Prediction Time: 0.029s
    label PolicyGradient, Reward 3: -161.854, Len(game): 99, Training Time: 0.119s, Prediction Time: 0.052s
    label PolicyGradient, Reward 4: -262.171, Len(game): 111, Training Time: 0.152s, Prediction Time: 0.086s
    label PolicyGradient, Reward 5: -99.184, Len(game): 63, Training Time: 0.207s, Prediction Time: 0.111s
    label PolicyGradient, Reward 6: -94.398, Len(game): 69, Training Time: 0.273s, Prediction Time: 0.148s
    label PolicyGradient, Reward 7: -170.477, Len(game): 129, Training Time: 0.362s, Prediction Time: 0.214s
    label PolicyGradient, Reward 8: -323.570, Len(game): 100, Training Time: 0.481s, Prediction Time: 0.277s
    label PolicyGradient, Reward 9: -203.517, Len(game): 108, Training Time: 0.626s, Prediction Time: 0.360s
    label PolicyGradient, Reward 10: -209.092, Len(game): 149, Training Time: 0.810s, Prediction Time: 0.475s
    label PolicyGradient, Reward 11: -54.381, Len(game): 71, Training Time: 1.045s, Prediction Time: 0.563s
    label PolicyGradient, Reward 12: -210.281, Len(game): 135, Training Time: 1.312s, Prediction Time: 0.728s
    label PolicyGradient, Reward 13: -137.631, Len(game): 108, Training Time: 1.653s, Prediction Time: 0.876s
    label PolicyGradient, Reward 14: -305.747, Len(game): 114, Training Time: 2.049s, Prediction Time: 1.046s
    label PolicyGradient, Reward 15: -234.554, Len(game): 111, Training Time: 2.479s, Prediction Time: 1.239s
    label PolicyGradient, Reward 16: -239.750, Len(game): 99, Training Time: 2.962s, Prediction Time: 1.455s
    label PolicyGradient, Reward 17: -172.324, Len(game): 129, Training Time: 3.494s, Prediction Time: 1.714s
    label PolicyGradient, Reward 18: -225.455, Len(game): 139, Training Time: 4.098s, Prediction Time: 2.004s
    label PolicyGradient, Reward 19: -187.867, Len(game): 104, Training Time: 4.800s, Prediction Time: 2.301s
    label PolicyGradient, Reward 20: -61.457, Len(game): 86, Training Time: 5.571s, Prediction Time: 2.625s
    label PolicyGradient, Reward 21: -96.494, Len(game): 145, Training Time: 6.389s, Prediction Time: 3.038s
    label PolicyGradient, Reward 22: -202.978, Len(game): 180, Training Time: 7.320s, Prediction Time: 3.492s
    label PolicyGradient, Reward 23: -130.890, Len(game): 100, Training Time: 8.385s, Prediction Time: 3.918s
    label PolicyGradient, Reward 24: -157.249, Len(game): 169, Training Time: 9.605s, Prediction Time: 4.513s
    label PolicyGradient, Reward 25: -122.789, Len(game): 110, Training Time: 10.988s, Prediction Time: 5.131s
    label PolicyGradient, Reward 26: -106.723, Len(game): 82, Training Time: 12.463s, Prediction Time: 5.787s
    label PolicyGradient, Reward 27: -118.584, Len(game): 139, Training Time: 14.062s, Prediction Time: 6.507s
    label PolicyGradient, Reward 28: -86.630, Len(game): 78, Training Time: 15.741s, Prediction Time: 7.213s
    label PolicyGradient, Reward 29: -33.499, Len(game): 98, Training Time: 17.483s, Prediction Time: 7.963s
    label PolicyGradient, Reward 30: -81.531, Len(game): 139, Training Time: 19.325s, Prediction Time: 8.796s
    label PolicyGradient, Reward 31: -86.179, Len(game): 131, Training Time: 21.536s, Prediction Time: 9.749s
    label PolicyGradient, Reward 32: -41.293, Len(game): 159, Training Time: 23.714s, Prediction Time: 10.848s
    label PolicyGradient, Reward 33: -58.410, Len(game): 78, Training Time: 26.094s, Prediction Time: 11.789s
    label PolicyGradient, Reward 34: -37.786, Len(game): 84, Training Time: 28.568s, Prediction Time: 12.906s
    label PolicyGradient, Reward 35: 2.557, Len(game): 101, Training Time: 31.185s, Prediction Time: 14.112s
    label PolicyGradient, Reward 36: -201.901, Len(game): 219, Training Time: 34.354s, Prediction Time: 15.479s
    label PolicyGradient, Reward 37: -119.981, Len(game): 211, Training Time: 37.941s, Prediction Time: 17.138s
    label PolicyGradient, Reward 38: 11.247, Len(game): 91, Training Time: 41.920s, Prediction Time: 18.788s
    label PolicyGradient, Reward 39: -20.067, Len(game): 248, Training Time: 46.147s, Prediction Time: 20.905s
    label PolicyGradient, Reward 40: -67.204, Len(game): 206, Training Time: 50.963s, Prediction Time: 23.085s
    label PolicyGradient, Reward 41: -126.823, Len(game): 185, Training Time: 50.963s, Prediction Time: 25.493s
    label PolicyGradient, Reward 42: -230.637, Len(game): 246, Training Time: 50.963s, Prediction Time: 25.838s
    label PolicyGradient, Reward 43: -18.282, Len(game): 140, Training Time: 50.963s, Prediction Time: 26.031s
    label PolicyGradient, Reward 44: -196.310, Len(game): 182, Training Time: 50.963s, Prediction Time: 26.283s
    label PolicyGradient, Reward 45: -190.296, Len(game): 200, Training Time: 50.963s, Prediction Time: 26.565s
    label PolicyGradient, Reward 46: -80.623, Len(game): 143, Training Time: 50.963s, Prediction Time: 26.761s
    label PolicyGradient, Reward 47: -99.060, Len(game): 303, Training Time: 50.963s, Prediction Time: 27.186s
    label PolicyGradient, Reward 48: -241.909, Len(game): 176, Training Time: 50.963s, Prediction Time: 27.438s
    label PolicyGradient, Reward 49: -371.817, Len(game): 257, Training Time: 50.963s, Prediction Time: 27.796s
    label PolicyGradient, Reward 50: -100.116, Len(game): 145, Training Time: 50.963s, Prediction Time: 27.998s
    label PolicyGradient, Reward 51: -0.238, Len(game): 96, Training Time: 50.963s, Prediction Time: 28.132s
    label PolicyGradient, Reward 52: -21.396, Len(game): 151, Training Time: 50.963s, Prediction Time: 28.341s
    label PolicyGradient, Reward 53: -248.328, Len(game): 208, Training Time: 50.963s, Prediction Time: 28.636s
    label PolicyGradient, Reward 54: -188.180, Len(game): 260, Training Time: 50.963s, Prediction Time: 28.995s
    label PolicyGradient, Reward 55: -78.366, Len(game): 119, Training Time: 50.963s, Prediction Time: 29.166s
    label PolicyGradient, Reward 56: -4.501, Len(game): 99, Training Time: 50.963s, Prediction Time: 29.305s
    label PolicyGradient, Reward 57: -245.866, Len(game): 159, Training Time: 50.963s, Prediction Time: 29.528s
    label PolicyGradient, Reward 58: -38.078, Len(game): 137, Training Time: 50.963s, Prediction Time: 29.721s
    label PolicyGradient, Reward 59: -40.093, Len(game): 90, Training Time: 50.963s, Prediction Time: 29.849s
    label PolicyGradient, Reward 60: -231.111, Len(game): 156, Training Time: 50.963s, Prediction Time: 30.069s
    label PolicyGradient, Reward 61: -106.972, Len(game): 204, Training Time: 50.963s, Prediction Time: 30.355s
    label PolicyGradient, Reward 62: -143.943, Len(game): 238, Training Time: 50.963s, Prediction Time: 30.690s
    label PolicyGradient, Reward 63: -148.996, Len(game): 213, Training Time: 50.963s, Prediction Time: 30.986s
    label PolicyGradient, Reward 64: -547.445, Len(game): 220, Training Time: 50.963s, Prediction Time: 31.299s
    label PolicyGradient, Reward 65: -104.275, Len(game): 142, Training Time: 50.963s, Prediction Time: 31.501s
    label PolicyGradient, Reward 66: -63.869, Len(game): 153, Training Time: 50.963s, Prediction Time: 31.715s
    label PolicyGradient, Reward 67: -149.268, Len(game): 117, Training Time: 50.963s, Prediction Time: 31.881s
    label PolicyGradient, Reward 68: -58.490, Len(game): 160, Training Time: 50.963s, Prediction Time: 32.108s
    label PolicyGradient, Reward 69: -85.277, Len(game): 135, Training Time: 50.963s, Prediction Time: 32.298s
    label PolicyGradient, Reward 70: -121.707, Len(game): 349, Training Time: 50.963s, Prediction Time: 32.791s
    label PolicyGradient, Reward 71: -18.086, Len(game): 99, Training Time: 50.963s, Prediction Time: 32.929s
    label PolicyGradient, Reward 72: -45.293, Len(game): 129, Training Time: 50.963s, Prediction Time: 33.110s
    label PolicyGradient, Reward 73: -4.228, Len(game): 152, Training Time: 50.963s, Prediction Time: 33.322s
    label PolicyGradient, Reward 74: -21.963, Len(game): 92, Training Time: 50.963s, Prediction Time: 33.455s
    label PolicyGradient, Reward 75: -341.886, Len(game): 359, Training Time: 50.963s, Prediction Time: 33.957s
    label PolicyGradient, Reward 76: -84.536, Len(game): 160, Training Time: 50.963s, Prediction Time: 34.181s
    label PolicyGradient, Reward 77: -45.906, Len(game): 135, Training Time: 50.963s, Prediction Time: 34.370s
    label PolicyGradient, Reward 78: -80.470, Len(game): 155, Training Time: 50.963s, Prediction Time: 34.590s
    label PolicyGradient, Reward 79: -78.294, Len(game): 131, Training Time: 50.963s, Prediction Time: 34.774s
    label PolicyGradient, Reward 80: -172.076, Len(game): 347, Training Time: 50.963s, Prediction Time: 35.261s
    label PolicyGradient, Reward 81: -74.395, Len(game): 298, Training Time: 50.963s, Prediction Time: 35.678s
    label PolicyGradient, Reward 82: -219.371, Len(game): 329, Training Time: 50.963s, Prediction Time: 36.145s
    label PolicyGradient, Reward 83: -15.508, Len(game): 162, Training Time: 50.963s, Prediction Time: 36.371s
    label PolicyGradient, Reward 84: -68.583, Len(game): 141, Training Time: 50.963s, Prediction Time: 36.566s
    label PolicyGradient, Reward 85: -82.117, Len(game): 231, Training Time: 50.963s, Prediction Time: 36.887s
    label PolicyGradient, Reward 86: -66.863, Len(game): 222, Training Time: 50.963s, Prediction Time: 37.199s
    label PolicyGradient, Reward 87: -311.287, Len(game): 205, Training Time: 50.963s, Prediction Time: 37.486s
    label PolicyGradient, Reward 88: -100.645, Len(game): 241, Training Time: 50.963s, Prediction Time: 37.820s
    label PolicyGradient, Reward 89: -46.934, Len(game): 167, Training Time: 50.963s, Prediction Time: 38.054s
    label PolicyGradient, Reward 90: -158.278, Len(game): 167, Training Time: 50.963s, Prediction Time: 38.285s
    label PolicyGradient, Reward 91: -46.275, Len(game): 124, Training Time: 50.963s, Prediction Time: 38.459s
    label PolicyGradient, Reward 92: -75.934, Len(game): 182, Training Time: 50.963s, Prediction Time: 38.714s
    label PolicyGradient, Reward 93: -191.096, Len(game): 206, Training Time: 50.963s, Prediction Time: 39.000s
    label PolicyGradient, Reward 94: 5.992, Len(game): 109, Training Time: 50.963s, Prediction Time: 39.152s
    label PolicyGradient, Reward 95: -113.291, Len(game): 198, Training Time: 50.963s, Prediction Time: 39.427s
    label PolicyGradient, Reward 96: -4.211, Len(game): 89, Training Time: 50.963s, Prediction Time: 39.549s
    label PolicyGradient, Reward 97: -123.054, Len(game): 233, Training Time: 50.963s, Prediction Time: 39.878s
    label PolicyGradient, Reward 98: -105.275, Len(game): 178, Training Time: 50.963s, Prediction Time: 40.126s
    label PolicyGradient, Reward 99: -35.657, Len(game): 99, Training Time: 50.963s, Prediction Time: 40.267s
    C:\Users\geoff\Desktop\Github\codpybook-rtd\docs\ch8\ignore_utils.py:621: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\pytorch\torch\csrc\utils\tensor_new.cpp:257.)
      state = torch.tensor([state], dtype=torch.float32)
    label DQNAgent, Reward 0: -113.085, Len(game): 78, Training Time: 0.025s, Prediction Time: 0.004s
    label DQNAgent, Reward 1: -379.710, Len(game): 114, Training Time: 0.172s, Prediction Time: 0.008s
    label DQNAgent, Reward 2: -154.823, Len(game): 91, Training Time: 0.281s, Prediction Time: 0.012s
    label DQNAgent, Reward 3: -158.450, Len(game): 115, Training Time: 0.422s, Prediction Time: 0.017s
    label DQNAgent, Reward 4: -126.051, Len(game): 142, Training Time: 0.591s, Prediction Time: 0.025s
    label DQNAgent, Reward 5: -362.040, Len(game): 453, Training Time: 1.171s, Prediction Time: 0.054s
    label DQNAgent, Reward 6: 2.751, Len(game): 244, Training Time: 1.475s, Prediction Time: 0.073s
    label DQNAgent, Reward 7: -224.326, Len(game): 2000, Training Time: 3.986s, Prediction Time: 0.242s
    label DQNAgent, Reward 8: -310.762, Len(game): 2000, Training Time: 6.763s, Prediction Time: 0.417s
    label DQNAgent, Reward 9: -432.592, Len(game): 2000, Training Time: 9.324s, Prediction Time: 0.591s
    label DQNAgent, Reward 10: -355.976, Len(game): 1604, Training Time: 11.380s, Prediction Time: 0.729s
    label DQNAgent, Reward 11: 236.521, Len(game): 367, Training Time: 11.857s, Prediction Time: 0.761s
    label DQNAgent, Reward 12: -187.677, Len(game): 2000, Training Time: 14.435s, Prediction Time: 0.933s
    label DQNAgent, Reward 13: -179.741, Len(game): 2000, Training Time: 17.013s, Prediction Time: 1.110s
    label DQNAgent, Reward 14: -183.653, Len(game): 603, Training Time: 17.794s, Prediction Time: 1.162s
    label DQNAgent, Reward 15: -191.268, Len(game): 383, Training Time: 18.287s, Prediction Time: 1.195s
    label DQNAgent, Reward 16: -188.280, Len(game): 332, Training Time: 18.720s, Prediction Time: 1.225s
    label DQNAgent, Reward 17: -196.164, Len(game): 541, Training Time: 19.416s, Prediction Time: 1.272s
    label DQNAgent, Reward 18: -167.276, Len(game): 2000, Training Time: 22.016s, Prediction Time: 1.443s
    label DQNAgent, Reward 19: -189.136, Len(game): 2000, Training Time: 24.666s, Prediction Time: 1.618s
    label DQNAgent, Reward 20: -174.624, Len(game): 2000, Training Time: 27.299s, Prediction Time: 1.801s
    label DQNAgent, Reward 21: -238.685, Len(game): 2000, Training Time: 29.948s, Prediction Time: 1.980s
    label DQNAgent, Reward 22: -154.886, Len(game): 124, Training Time: 30.116s, Prediction Time: 1.992s
    label DQNAgent, Reward 23: -174.114, Len(game): 2000, Training Time: 32.795s, Prediction Time: 2.168s
    label DQNAgent, Reward 24: -253.414, Len(game): 2000, Training Time: 35.460s, Prediction Time: 2.342s
    label DQNAgent, Reward 25: -191.407, Len(game): 2000, Training Time: 38.131s, Prediction Time: 2.520s
    label DQNAgent, Reward 26: -202.056, Len(game): 2000, Training Time: 40.805s, Prediction Time: 2.698s
    label DQNAgent, Reward 27: -194.853, Len(game): 2000, Training Time: 43.471s, Prediction Time: 2.875s
    label DQNAgent, Reward 28: -192.771, Len(game): 2000, Training Time: 46.156s, Prediction Time: 3.051s
    label DQNAgent, Reward 29: -217.784, Len(game): 2000, Training Time: 48.842s, Prediction Time: 3.226s
    label DQNAgent, Reward 30: -157.779, Len(game): 2000, Training Time: 51.526s, Prediction Time: 3.406s
    label DQNAgent, Reward 31: -297.432, Len(game): 2000, Training Time: 51.526s, Prediction Time: 3.585s
    label DQNAgent, Reward 32: -250.067, Len(game): 2000, Training Time: 51.526s, Prediction Time: 3.730s
    label DQNAgent, Reward 33: -278.377, Len(game): 2000, Training Time: 51.526s, Prediction Time: 3.874s
    label DQNAgent, Reward 34: -248.133, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.017s
    label DQNAgent, Reward 35: -268.962, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.159s
    label DQNAgent, Reward 36: -254.991, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.302s
    label DQNAgent, Reward 37: -292.653, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.445s
    label DQNAgent, Reward 38: -256.357, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.588s
    label DQNAgent, Reward 39: -219.518, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.731s
    label DQNAgent, Reward 40: -288.255, Len(game): 2000, Training Time: 51.526s, Prediction Time: 4.874s
    label DQNAgent, Reward 41: -211.079, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.019s
    label DQNAgent, Reward 42: -236.794, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.161s
    label DQNAgent, Reward 43: -305.389, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.305s
    label DQNAgent, Reward 44: -288.723, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.445s
    label DQNAgent, Reward 45: -267.358, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.587s
    label DQNAgent, Reward 46: -307.678, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.727s
    label DQNAgent, Reward 47: -326.587, Len(game): 2000, Training Time: 51.526s, Prediction Time: 5.869s
    label DQNAgent, Reward 48: -239.119, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.012s
    label DQNAgent, Reward 49: -288.258, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.153s
    label DQNAgent, Reward 50: -315.439, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.293s
    label DQNAgent, Reward 51: -201.114, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.435s
    label DQNAgent, Reward 52: -295.868, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.578s
    label DQNAgent, Reward 53: -259.802, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.720s
    label DQNAgent, Reward 54: -295.257, Len(game): 2000, Training Time: 51.526s, Prediction Time: 6.861s
    label DQNAgent, Reward 55: -248.141, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.004s
    label DQNAgent, Reward 56: -245.958, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.145s
    label DQNAgent, Reward 57: -315.552, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.286s
    label DQNAgent, Reward 58: -238.332, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.429s
    label DQNAgent, Reward 59: -311.968, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.573s
    label DQNAgent, Reward 60: -237.635, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.715s
    label DQNAgent, Reward 61: -320.902, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.855s
    label DQNAgent, Reward 62: -262.911, Len(game): 2000, Training Time: 51.526s, Prediction Time: 7.997s
    label DQNAgent, Reward 63: -306.299, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.140s
    label DQNAgent, Reward 64: -185.516, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.280s
    label DQNAgent, Reward 65: -264.190, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.423s
    label DQNAgent, Reward 66: -307.969, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.565s
    label DQNAgent, Reward 67: -262.631, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.710s
    label DQNAgent, Reward 68: -281.048, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.852s
    label DQNAgent, Reward 69: -258.154, Len(game): 2000, Training Time: 51.526s, Prediction Time: 8.994s
    label DQNAgent, Reward 70: -263.735, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.137s
    label DQNAgent, Reward 71: -319.130, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.280s
    label DQNAgent, Reward 72: -294.916, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.423s
    label DQNAgent, Reward 73: -233.601, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.567s
    label DQNAgent, Reward 74: -285.451, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.709s
    label DQNAgent, Reward 75: -296.343, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.850s
    label DQNAgent, Reward 76: -248.531, Len(game): 2000, Training Time: 51.526s, Prediction Time: 9.992s
    label DQNAgent, Reward 77: -286.580, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.133s
    label DQNAgent, Reward 78: -252.319, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.274s
    label DQNAgent, Reward 79: -292.500, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.416s
    label DQNAgent, Reward 80: -287.494, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.559s
    label DQNAgent, Reward 81: -258.796, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.700s
    label DQNAgent, Reward 82: -280.900, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.841s
    label DQNAgent, Reward 83: -287.505, Len(game): 2000, Training Time: 51.526s, Prediction Time: 10.980s
    label DQNAgent, Reward 84: -312.122, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.125s
    label DQNAgent, Reward 85: -291.135, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.266s
    label DQNAgent, Reward 86: -288.435, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.409s
    label DQNAgent, Reward 87: -260.070, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.554s
    label DQNAgent, Reward 88: -211.202, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.697s
    label DQNAgent, Reward 89: -236.297, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.838s
    label DQNAgent, Reward 90: -240.365, Len(game): 2000, Training Time: 51.526s, Prediction Time: 11.981s
    label DQNAgent, Reward 91: -294.879, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.124s
    label DQNAgent, Reward 92: -259.572, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.266s
    label DQNAgent, Reward 93: -278.656, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.408s
    label DQNAgent, Reward 94: -315.290, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.547s
    label DQNAgent, Reward 95: -237.135, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.688s
    label DQNAgent, Reward 96: -233.797, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.827s
    label DQNAgent, Reward 97: -271.331, Len(game): 2000, Training Time: 51.526s, Prediction Time: 12.968s
    label DQNAgent, Reward 98: -292.179, Len(game): 2000, Training Time: 51.526s, Prediction Time: 13.108s
    label DQNAgent, Reward 99: -324.016, Len(game): 2000, Training Time: 51.526s, Prediction Time: 13.248s
    Computed global error Bellman mean:  1.7878172036910294  iter:  0
    Computed global error Bellman mean:  1.8623318222045684  iter:  0
    label KQLearning, Reward 0: -200.395, Len(game): 128, Training Time: 0.055s, Prediction Time: 0.003s
    Computed global error Bellman mean:  1.2356713497168017e-07  iter:  3
    label KQLearning, Reward 1: -620.667, Len(game): 73, Training Time: 0.075s, Prediction Time: 0.023s
    Computed global error Bellman mean:  0.00017559370102202926  iter:  5
    label KQLearning, Reward 2: -708.620, Len(game): 151, Training Time: 0.154s, Prediction Time: 0.147s
    Computed global error Bellman mean:  1.5985114262427255e-07  iter:  5
    label KQLearning, Reward 3: -162.912, Len(game): 128, Training Time: 0.207s, Prediction Time: 0.301s
    Computed global error Bellman mean:  0.09014312280287062  iter:  5
    Computed global error Bellman mean:  0.005174441156915289  iter:  3
    label KQLearning, Reward 4: -132.788, Len(game): 59, Training Time: 0.248s, Prediction Time: 0.394s
    Computed global error Bellman mean:  2.5997462881714264e-08  iter:  3
    label KQLearning, Reward 5: -134.307, Len(game): 75, Training Time: 0.268s, Prediction Time: 0.524s
    Computed global error Bellman mean:  4.279074899312429e-08  iter:  5
    label KQLearning, Reward 6: -145.728, Len(game): 59, Training Time: 0.288s, Prediction Time: 0.637s
    Computed global error Bellman mean:  1.1294630867435227  iter:  5
    Computed global error Bellman mean:  0.003057647495996643  iter:  5
    label KQLearning, Reward 7: -153.151, Len(game): 120, Training Time: 0.431s, Prediction Time: 0.875s
    Computed global error Bellman mean:  2.0004563183198205e-07  iter:  4
    label KQLearning, Reward 8: 40.859, Len(game): 82, Training Time: 0.458s, Prediction Time: 1.078s
    Computed global error Bellman mean:  2.0093319964137324e-07  iter:  4
    label KQLearning, Reward 9: -297.285, Len(game): 128, Training Time: 0.508s, Prediction Time: 1.403s
    Computed global error Bellman mean:  5.93768899459578e-05  iter:  5
    label KQLearning, Reward 10: -377.949, Len(game): 58, Training Time: 0.532s, Prediction Time: 1.589s
    Computed global error Bellman mean:  0.1857227283768003  iter:  5
    Computed global error Bellman mean:  0.1749357521973165  iter:  5
    label KQLearning, Reward 11: -121.868, Len(game): 58, Training Time: 0.578s, Prediction Time: 1.779s
    Computed global error Bellman mean:  5.506837701738312e-08  iter:  4
    Computed global error Bellman mean:  0.08027210785316556  iter:  5
    label KQLearning, Reward 12: -156.228, Len(game): 65, Training Time: 0.625s, Prediction Time: 2.009s
    Computed global error Bellman mean:  8.817866412693878e-09  iter:  2
    Computed global error Bellman mean:  0.034812471780843325  iter:  5
    label KQLearning, Reward 13: -122.908, Len(game): 73, Training Time: 0.670s, Prediction Time: 2.263s
    Computed global error Bellman mean:  0.02543451762234678  iter:  5
    Computed global error Bellman mean:  0.009514509920557154  iter:  5
    Computed global error Bellman mean:  0.037829476788574255  iter:  3
    label KQLearning, Reward 14: -96.159, Len(game): 85, Training Time: 0.757s, Prediction Time: 2.585s
    Computed global error Bellman mean:  0.24919380917819176  iter:  5
    Computed global error Bellman mean:  0.044900472614210124  iter:  5
    label KQLearning, Reward 15: -121.204, Len(game): 59, Training Time: 0.804s, Prediction Time: 2.837s
    Computed global error Bellman mean:  1.0091587948197072e-07  iter:  5
    Computed global error Bellman mean:  0.022390631371600574  iter:  5
    label KQLearning, Reward 16: -86.396, Len(game): 91, Training Time: 0.870s, Prediction Time: 3.197s
    Computed global error Bellman mean:  0.128533530536805  iter:  5
    Computed global error Bellman mean:  0.001863147889892192  iter:  5
    Computed global error Bellman mean:  0.007104770255512356  iter:  5
    label KQLearning, Reward 17: -143.376, Len(game): 61, Training Time: 0.942s, Prediction Time: 3.479s
    Computed global error Bellman mean:  0.00023891959529186475  iter:  5
    label KQLearning, Reward 18: -121.010, Len(game): 83, Training Time: 0.976s, Prediction Time: 3.846s
    Computed global error Bellman mean:  0.013989181209570873  iter:  5
    Computed global error Bellman mean:  0.006825096004593422  iter:  4
    label KQLearning, Reward 19: -119.881, Len(game): 118, Training Time: 1.119s, Prediction Time: 4.405s
    Computed global error Bellman mean:  1.0513683747492841e-07  iter:  4
    label KQLearning, Reward 20: -118.150, Len(game): 80, Training Time: 1.149s, Prediction Time: 4.816s
    Computed global error Bellman mean:  7.453840246851411e-06  iter:  5
    label KQLearning, Reward 21: -136.488, Len(game): 128, Training Time: 1.213s, Prediction Time: 5.502s
    Computed global error Bellman mean:  4.260048658277782e-08  iter:  5
    label KQLearning, Reward 22: -99.267, Len(game): 70, Training Time: 1.240s, Prediction Time: 5.898s
    Computed global error Bellman mean:  1.1402115084113178e-08  iter:  4
    label KQLearning, Reward 23: -131.985, Len(game): 84, Training Time: 1.274s, Prediction Time: 6.391s
    Computed global error Bellman mean:  0.1251552939518742  iter:  5
    Computed global error Bellman mean:  0.04363123471567561  iter:  5
    label KQLearning, Reward 24: -112.795, Len(game): 53, Training Time: 1.314s, Prediction Time: 6.735s
    Computed global error Bellman mean:  0.00016643639993477752  iter:  5
    Computed global error Bellman mean:  0.005202712701594659  iter:  5
    label KQLearning, Reward 25: -133.218, Len(game): 116, Training Time: 1.396s, Prediction Time: 7.442s
    Computed global error Bellman mean:  2.04596698215854e-08  iter:  3
    label KQLearning, Reward 26: -103.134, Len(game): 52, Training Time: 1.413s, Prediction Time: 7.797s
    Computed global error Bellman mean:  8.631624331451543e-09  iter:  4
    label KQLearning, Reward 27: -132.907, Len(game): 55, Training Time: 1.431s, Prediction Time: 8.165s
    Computed global error Bellman mean:  0.041597443857888286  iter:  5
    Computed global error Bellman mean:  0.013447130230095082  iter:  5
    label KQLearning, Reward 28: -145.249, Len(game): 64, Training Time: 1.486s, Prediction Time: 8.606s
    Computed global error Bellman mean:  0.04793728290235055  iter:  5
    Computed global error Bellman mean:  0.013447129295314223  iter:  1
    Computed global error Bellman mean:  0.05875239585481834  iter:  3
    label KQLearning, Reward 29: -163.755, Len(game): 139, Training Time: 1.679s, Prediction Time: 9.588s
    Computed global error Bellman mean:  0.02056910697615851  iter:  5
    Computed global error Bellman mean:  0.004457868533659127  iter:  5
    label KQLearning, Reward 30: -224.982, Len(game): 244, Training Time: 2.268s, Prediction Time: 11.357s
    Computed global error Bellman mean:  2.7721892858304878e-08  iter:  5
    label KQLearning, Reward 31: -111.750, Len(game): 75, Training Time: 2.303s, Prediction Time: 11.962s
    Computed global error Bellman mean:  8.88503637487098e-09  iter:  3
    label KQLearning, Reward 32: -133.927, Len(game): 71, Training Time: 2.325s, Prediction Time: 12.546s
    Computed global error Bellman mean:  0.011135651236183156  iter:  5
    Computed global error Bellman mean:  0.019759772703321254  iter:  4
    label KQLearning, Reward 33: -113.742, Len(game): 58, Training Time: 2.367s, Prediction Time: 13.033s
    Computed global error Bellman mean:  0.012950627470135787  iter:  5
    Computed global error Bellman mean:  0.018880851009172462  iter:  3
    label KQLearning, Reward 34: -58.630, Len(game): 97, Training Time: 2.455s, Prediction Time: 13.857s
    Computed global error Bellman mean:  0.3276866377019415  iter:  5
    Computed global error Bellman mean:  0.3008326609173155  iter:  5
    label KQLearning, Reward 35: -104.955, Len(game): 53, Training Time: 2.498s, Prediction Time: 14.328s
    Computed global error Bellman mean:  1.1194796463118103e-07  iter:  4
    Computed global error Bellman mean:  0.08565904938278161  iter:  5
    label KQLearning, Reward 36: -137.875, Len(game): 94, Training Time: 2.555s, Prediction Time: 15.170s
    Computed global error Bellman mean:  0.0014778275554370845  iter:  5
    Computed global error Bellman mean:  0.03495880138002345  iter:  5
    label KQLearning, Reward 37: -116.011, Len(game): 105, Training Time: 2.628s, Prediction Time: 16.136s
    Computed global error Bellman mean:  4.1763729385912994e-08  iter:  5
    Computed global error Bellman mean:  0.001885933119938559  iter:  5
    label KQLearning, Reward 38: -100.451, Len(game): 60, Training Time: 2.673s, Prediction Time: 16.717s
    Computed global error Bellman mean:  0.1462575863035063  iter:  5
    Computed global error Bellman mean:  0.01722092350381151  iter:  5
    label KQLearning, Reward 39: -168.628, Len(game): 164, Training Time: 2.953s, Prediction Time: 18.286s
    Computed global error Bellman mean:  0.0010308075347473908  iter:  5
    Computed global error Bellman mean:  0.01722092350381151  iter:  0
    label KQLearning, Reward 40: -134.726, Len(game): 123, Training Time: 3.063s, Prediction Time: 19.510s
    Computed global error Bellman mean:  7.542191039163838e-08  iter:  5
    label KQLearning, Reward 41: -177.308, Len(game): 99, Training Time: 3.104s, Prediction Time: 20.523s
    Computed global error Bellman mean:  2.8866856238707328e-08  iter:  4
    label KQLearning, Reward 42: -161.166, Len(game): 82, Training Time: 3.133s, Prediction Time: 21.389s
    Computed global error Bellman mean:  2.397640513347026e-07  iter:  4
    label KQLearning, Reward 43: -333.739, Len(game): 114, Training Time: 3.177s, Prediction Time: 22.620s
    Computed global error Bellman mean:  0.07508408132661465  iter:  5
    Computed global error Bellman mean:  0.039524670065325604  iter:  5
    label KQLearning, Reward 44: -126.057, Len(game): 55, Training Time: 3.220s, Prediction Time: 23.243s
    Computed global error Bellman mean:  0.021675287415418642  iter:  5
    Computed global error Bellman mean:  0.023099582157161767  iter:  5
    Computed global error Bellman mean:  0.02441240894226758  iter:  3
    label KQLearning, Reward 45: -217.480, Len(game): 102, Training Time: 3.347s, Prediction Time: 24.386s
    Computed global error Bellman mean:  0.8280139871538977  iter:  5
    Computed global error Bellman mean:  0.0030895465786024735  iter:  5
    Computed global error Bellman mean:  0.8459046706688572  iter:  0
    label KQLearning, Reward 46: -176.128, Len(game): 116, Training Time: 3.451s, Prediction Time: 25.716s
    Computed global error Bellman mean:  3.6333881026195065e-07  iter:  5
    label KQLearning, Reward 47: -412.807, Len(game): 126, Training Time: 3.506s, Prediction Time: 27.205s
    Computed global error Bellman mean:  0.022067067426577363  iter:  5
    Computed global error Bellman mean:  0.05577230577725157  iter:  1
    label KQLearning, Reward 48: -137.337, Len(game): 70, Training Time: 3.551s, Prediction Time: 28.055s
    Computed global error Bellman mean:  4.629390375805136e-08  iter:  4
    label KQLearning, Reward 49: -246.877, Len(game): 84, Training Time: 3.579s, Prediction Time: 29.101s
    Computed global error Bellman mean:  1.1653079550570192e-07  iter:  5
    label KQLearning, Reward 50: -121.362, Len(game): 90, Training Time: 3.618s, Prediction Time: 30.220s
    Computed global error Bellman mean:  2.223875292330289e-07  iter:  4
    label KQLearning, Reward 51: -205.546, Len(game): 112, Training Time: 3.668s, Prediction Time: 31.655s
    Computed global error Bellman mean:  0.00015011166943613467  iter:  5
    label KQLearning, Reward 52: -277.724, Len(game): 217, Training Time: 3.844s, Prediction Time: 34.494s
    Computed global error Bellman mean:  0.8212383004785393  iter:  1
    Computed global error Bellman mean:  0.8580645610499036  iter:  0
    label KQLearning, Reward 53: -156.637, Len(game): 127, Training Time: 3.904s, Prediction Time: 36.234s
    Computed global error Bellman mean:  2.033127582415023e-07  iter:  4
    label KQLearning, Reward 54: -191.852, Len(game): 82, Training Time: 3.934s, Prediction Time: 37.392s
    Computed global error Bellman mean:  0.00020429838190502467  iter:  5
    label KQLearning, Reward 55: -199.740, Len(game): 236, Training Time: 4.145s, Prediction Time: 40.729s
    Computed global error Bellman mean:  1.3120410218598572e-08  iter:  3
    label KQLearning, Reward 56: -111.694, Len(game): 56, Training Time: 4.164s, Prediction Time: 41.569s
    Computed global error Bellman mean:  0.00439517314175621  iter:  5
    label KQLearning, Reward 57: -106.712, Len(game): 201, Training Time: 4.326s, Prediction Time: 44.569s
    Computed global error Bellman mean:  0.31998081255164346  iter:  5
    Computed global error Bellman mean:  0.2997701749506863  iter:  1
    label KQLearning, Reward 58: -141.385, Len(game): 76, Training Time: 4.384s, Prediction Time: 45.745s
    Computed global error Bellman mean:  0.8587894065022125  iter:  1
    Computed global error Bellman mean:  0.2997701749506863  iter:  0
    Computed global error Bellman mean:  0.9609382850441174  iter:  0
    label KQLearning, Reward 59: -126.471, Len(game): 90, Training Time: 4.439s, Prediction Time: 47.134s
    Computed global error Bellman mean:  0.13265114924030735  iter:  5
    Computed global error Bellman mean:  0.0851737249958913  iter:  5
    label KQLearning, Reward 60: -158.298, Len(game): 65, Training Time: 4.496s, Prediction Time: 48.172s
    Computed global error Bellman mean:  1.8792662783272623e-07  iter:  5
    Computed global error Bellman mean:  0.04316964766155983  iter:  5
    label KQLearning, Reward 61: -128.194, Len(game): 87, Training Time: 4.568s, Prediction Time: 49.567s
    Computed global error Bellman mean:  5.014585652317337e-07  iter:  5
    Computed global error Bellman mean:  0.012240207265594015  iter:  5
    label KQLearning, Reward 62: -20.612, Len(game): 129, Training Time: 4.663s, Prediction Time: 51.667s
    Computed global error Bellman mean:  0.0002394081257820467  iter:  5
    Computed global error Bellman mean:  0.01220292199060476  iter:  3
    label KQLearning, Reward 63: -114.193, Len(game): 89, Training Time: 4.734s, Prediction Time: 53.146s
    Computed global error Bellman mean:  0.002011721584875883  iter:  5
    label KQLearning, Reward 64: -161.212, Len(game): 223, Training Time: 4.922s, Prediction Time: 56.871s
    Computed global error Bellman mean:  0.0010813878500443924  iter:  5
    label KQLearning, Reward 65: -43.540, Len(game): 86, Training Time: 4.954s, Prediction Time: 58.373s
    Computed global error Bellman mean:  0.041838648664400034  iter:  5
    Computed global error Bellman mean:  0.021678195969673072  iter:  3
    label KQLearning, Reward 66: -76.536, Len(game): 88, Training Time: 5.033s, Prediction Time: 59.912s
    Computed global error Bellman mean:  3.1878030651204306e-07  iter:  5
    Computed global error Bellman mean:  0.021678195969673072  iter:  0
    label KQLearning, Reward 67: -329.870, Len(game): 116, Training Time: 5.105s, Prediction Time: 61.973s
    Computed global error Bellman mean:  2.8089624420491666e-07  iter:  4
    label KQLearning, Reward 68: -110.082, Len(game): 145, Training Time: 5.170s, Prediction Time: 64.594s
    Computed global error Bellman mean:  4.9160521471762124e-05  iter:  5
    label KQLearning, Reward 69: -112.219, Len(game): 67, Training Time: 5.200s, Prediction Time: 65.840s
    Computed global error Bellman mean:  0.01302253247801825  iter:  5
    Computed global error Bellman mean:  0.03073940390872866  iter:  0
    label KQLearning, Reward 70: -165.107, Len(game): 125, Training Time: 5.287s, Prediction Time: 68.142s
    Computed global error Bellman mean:  1.450390678825464e-07  iter:  5
    label KQLearning, Reward 71: -77.585, Len(game): 100, Training Time: 5.333s, Prediction Time: 70.012s
    Computed global error Bellman mean:  0.002150057297677937  iter:  5
    label KQLearning, Reward 72: -99.093, Len(game): 89, Training Time: 5.370s, Prediction Time: 71.710s
    Computed global error Bellman mean:  1.5736717288337026e-07  iter:  5
    label KQLearning, Reward 73: -53.694, Len(game): 95, Training Time: 5.413s, Prediction Time: 73.544s
    Computed global error Bellman mean:  0.10379776490011078  iter:  5
    Computed global error Bellman mean:  0.03528469701228075  iter:  5
    label KQLearning, Reward 74: -346.314, Len(game): 123, Training Time: 5.568s, Prediction Time: 75.974s
    Computed global error Bellman mean:  1.2592304101800802e-07  iter:  4
    Computed global error Bellman mean:  0.03528469701228075  iter:  0
    label KQLearning, Reward 75: -155.581, Len(game): 66, Training Time: 5.618s, Prediction Time: 77.293s
    Computed global error Bellman mean:  4.4272656867935096e-07  iter:  5
    label KQLearning, Reward 76: -37.157, Len(game): 134, Training Time: 5.689s, Prediction Time: 79.989s
    Computed global error Bellman mean:  0.0035638326415961073  iter:  5
    label KQLearning, Reward 77: -366.402, Len(game): 100, Training Time: 5.733s, Prediction Time: 82.029s
    Computed global error Bellman mean:  9.57788216707127e-08  iter:  5
    label KQLearning, Reward 78: -100.144, Len(game): 58, Training Time: 5.756s, Prediction Time: 83.246s
    Computed global error Bellman mean:  1.4021770559091706e-07  iter:  5
    label KQLearning, Reward 79: -194.275, Len(game): 100, Training Time: 5.801s, Prediction Time: 85.323s
    Computed global error Bellman mean:  0.001419789484288436  iter:  5
    label KQLearning, Reward 80: -263.120, Len(game): 138, Training Time: 5.872s, Prediction Time: 88.233s
    Computed global error Bellman mean:  3.0411055276608776e-07  iter:  5
    label KQLearning, Reward 81: -81.549, Len(game): 137, Training Time: 5.945s, Prediction Time: 91.170s
    Computed global error Bellman mean:  6.369757501627322e-08  iter:  5
    label KQLearning, Reward 82: -120.753, Len(game): 65, Training Time: 5.973s, Prediction Time: 92.603s
    Computed global error Bellman mean:  3.226556651880328e-07  iter:  5
    label KQLearning, Reward 83: -122.095, Len(game): 102, Training Time: 6.020s, Prediction Time: 94.849s
    Computed global error Bellman mean:  3.7966944364115074e-07  iter:  4
    label KQLearning, Reward 84: -148.674, Len(game): 128, Training Time: 6.072s, Prediction Time: 97.709s
    Computed global error Bellman mean:  0.02860698640944178  iter:  5
    Computed global error Bellman mean:  0.018572779889972066  iter:  4
    label KQLearning, Reward 85: -176.882, Len(game): 186, Training Time: 6.440s, Prediction Time: 101.923s
    Computed global error Bellman mean:  0.763425499234148  iter:  3
    Computed global error Bellman mean:  0.018572779889972066  iter:  0
    Computed global error Bellman mean:  0.7819904862272558  iter:  0
    label KQLearning, Reward 86: -139.821, Len(game): 107, Training Time: 6.569s, Prediction Time: 104.375s
    Computed global error Bellman mean:  1.9305178000519866e-07  iter:  4
    label KQLearning, Reward 87: -89.087, Len(game): 93, Training Time: 6.606s, Prediction Time: 106.550s
    Computed global error Bellman mean:  5.592149356914433e-07  iter:  4
    label KQLearning, Reward 88: -172.517, Len(game): 97, Training Time: 6.643s, Prediction Time: 108.857s
    Computed global error Bellman mean:  4.684396435051234  iter:  0
    Computed global error Bellman mean:  0.09762563230220624  iter:  5
    label KQLearning, Reward 89: -172.966, Len(game): 99, Training Time: 6.729s, Prediction Time: 111.230s
    Computed global error Bellman mean:  1.2812629991544435e-07  iter:  5
    Computed global error Bellman mean:  0.021922939016299922  iter:  5
    label KQLearning, Reward 90: -30.679, Len(game): 63, Training Time: 6.814s, Prediction Time: 112.767s
    Computed global error Bellman mean:  1.558713637569061e-07  iter:  5
    Computed global error Bellman mean:  0.02126550307632721  iter:  2
    label KQLearning, Reward 91: -109.270, Len(game): 90, Training Time: 6.894s, Prediction Time: 114.976s
    Computed global error Bellman mean:  1.8942307178829204e-07  iter:  4
    label KQLearning, Reward 92: -76.169, Len(game): 71, Training Time: 6.919s, Prediction Time: 116.750s
    Computed global error Bellman mean:  4.7346803379463134e-07  iter:  5
    label KQLearning, Reward 93: -168.278, Len(game): 137, Training Time: 6.996s, Prediction Time: 120.165s
    Computed global error Bellman mean:  1.5971887492236893e-07  iter:  5
    label KQLearning, Reward 94: -70.672, Len(game): 77, Training Time: 7.030s, Prediction Time: 122.115s
    Computed global error Bellman mean:  1.5335284951487005e-07  iter:  5
    label KQLearning, Reward 95: -98.835, Len(game): 102, Training Time: 7.079s, Prediction Time: 124.702s
    Computed global error Bellman mean:  5.951184342647667e-07  iter:  4
    label KQLearning, Reward 96: -2.054, Len(game): 103, Training Time: 7.121s, Prediction Time: 127.361s
    Computed global error Bellman mean:  4.232589460560222e-07  iter:  5
    label KQLearning, Reward 97: -28.490, Len(game): 92, Training Time: 7.164s, Prediction Time: 129.775s
    Computed global error Bellman mean:  3.9706448843362365e-07  iter:  4
    label KQLearning, Reward 98: -190.312, Len(game): 145, Training Time: 7.228s, Prediction Time: 133.584s
    Computed global error Bellman mean:  1.3688804971793133e-07  iter:  5
    label KQLearning, Reward 99: -56.519, Len(game): 87, Training Time: 7.266s, Prediction Time: 135.903s
    0
    label PPOAgent, Reward 0: -272.693, Len(game): 75, Training Time: 0.026s, Prediction Time: 0.026s
    label PPOAgent, Reward 1: -326.853, Len(game): 91, Training Time: 0.052s, Prediction Time: 0.052s
    label PPOAgent, Reward 2: -156.853, Len(game): 97, Training Time: 0.082s, Prediction Time: 0.082s
    label PPOAgent, Reward 3: -351.866, Len(game): 94, Training Time: 0.109s, Prediction Time: 0.109s
    label PPOAgent, Reward 4: -31.552, Len(game): 86, Training Time: 0.136s, Prediction Time: 0.136s
    label PPOAgent, Reward 5: -484.713, Len(game): 104, Training Time: 0.168s, Prediction Time: 0.168s
    label PPOAgent, Reward 6: -170.297, Len(game): 63, Training Time: 0.186s, Prediction Time: 0.186s
    label PPOAgent, Reward 7: -73.148, Len(game): 61, Training Time: 0.202s, Prediction Time: 0.202s
    label PPOAgent, Reward 8: -359.462, Len(game): 86, Training Time: 0.406s, Prediction Time: 0.406s
    label PPOAgent, Reward 9: -125.669, Len(game): 75, Training Time: 0.426s, Prediction Time: 0.426s
    label PPOAgent, Reward 10: -291.011, Len(game): 85, Training Time: 0.448s, Prediction Time: 0.448s
    label PPOAgent, Reward 11: -122.909, Len(game): 108, Training Time: 0.476s, Prediction Time: 0.476s
    label PPOAgent, Reward 12: -407.708, Len(game): 85, Training Time: 0.498s, Prediction Time: 0.498s
    label PPOAgent, Reward 13: -88.640, Len(game): 81, Training Time: 0.518s, Prediction Time: 0.518s
    label PPOAgent, Reward 14: -137.984, Len(game): 97, Training Time: 0.588s, Prediction Time: 0.588s
    label PPOAgent, Reward 15: -98.901, Len(game): 94, Training Time: 0.617s, Prediction Time: 0.617s
    label PPOAgent, Reward 16: -115.273, Len(game): 61, Training Time: 0.636s, Prediction Time: 0.636s
    label PPOAgent, Reward 17: -118.381, Len(game): 70, Training Time: 0.658s, Prediction Time: 0.658s
    label PPOAgent, Reward 18: -200.027, Len(game): 116, Training Time: 0.694s, Prediction Time: 0.694s
    label PPOAgent, Reward 19: -270.885, Len(game): 73, Training Time: 0.715s, Prediction Time: 0.715s
    label PPOAgent, Reward 20: -109.285, Len(game): 75, Training Time: 0.738s, Prediction Time: 0.738s
    label PPOAgent, Reward 21: -97.132, Len(game): 61, Training Time: 0.754s, Prediction Time: 0.754s
    label PPOAgent, Reward 22: -138.481, Len(game): 61, Training Time: 0.771s, Prediction Time: 0.771s
    label PPOAgent, Reward 23: -218.267, Len(game): 92, Training Time: 0.795s, Prediction Time: 0.795s
    label PPOAgent, Reward 24: -45.480, Len(game): 61, Training Time: 0.812s, Prediction Time: 0.812s
    label PPOAgent, Reward 25: -297.662, Len(game): 85, Training Time: 0.835s, Prediction Time: 0.835s
    label PPOAgent, Reward 26: -133.804, Len(game): 70, Training Time: 0.853s, Prediction Time: 0.853s
    label PPOAgent, Reward 27: -75.677, Len(game): 58, Training Time: 0.869s, Prediction Time: 0.869s
    label PPOAgent, Reward 28: -160.155, Len(game): 69, Training Time: 0.887s, Prediction Time: 0.887s
    label PPOAgent, Reward 29: -144.494, Len(game): 87, Training Time: 0.952s, Prediction Time: 0.952s
    label PPOAgent, Reward 30: -282.363, Len(game): 111, Training Time: 0.985s, Prediction Time: 0.985s
    label PPOAgent, Reward 31: -96.992, Len(game): 67, Training Time: 1.005s, Prediction Time: 1.005s
    label PPOAgent, Reward 32: -176.616, Len(game): 66, Training Time: 1.026s, Prediction Time: 1.026s
    label PPOAgent, Reward 33: -148.627, Len(game): 69, Training Time: 1.048s, Prediction Time: 1.048s
    label PPOAgent, Reward 34: -115.371, Len(game): 95, Training Time: 1.078s, Prediction Time: 1.078s
    label PPOAgent, Reward 35: -155.800, Len(game): 89, Training Time: 1.104s, Prediction Time: 1.104s
    label PPOAgent, Reward 36: -74.617, Len(game): 63, Training Time: 1.122s, Prediction Time: 1.122s
    label PPOAgent, Reward 37: -99.922, Len(game): 69, Training Time: 1.141s, Prediction Time: 1.141s
    label PPOAgent, Reward 38: -133.277, Len(game): 96, Training Time: 1.166s, Prediction Time: 1.166s
    label PPOAgent, Reward 39: -116.466, Len(game): 93, Training Time: 1.192s, Prediction Time: 1.192s
    label PPOAgent, Reward 40: 6.117, Len(game): 76, Training Time: 1.213s, Prediction Time: 1.213s
    label PPOAgent, Reward 41: -98.545, Len(game): 103, Training Time: 1.240s, Prediction Time: 1.240s
    label PPOAgent, Reward 42: -131.679, Len(game): 87, Training Time: 1.263s, Prediction Time: 1.263s
    label PPOAgent, Reward 43: -99.885, Len(game): 86, Training Time: 1.285s, Prediction Time: 1.285s
    label PPOAgent, Reward 44: -173.818, Len(game): 90, Training Time: 1.351s, Prediction Time: 1.351s
    label PPOAgent, Reward 45: -140.466, Len(game): 85, Training Time: 1.377s, Prediction Time: 1.377s
    label PPOAgent, Reward 46: -101.473, Len(game): 97, Training Time: 1.406s, Prediction Time: 1.406s
    label PPOAgent, Reward 47: -95.563, Len(game): 79, Training Time: 1.431s, Prediction Time: 1.431s
    label PPOAgent, Reward 48: -61.422, Len(game): 62, Training Time: 1.452s, Prediction Time: 1.452s
    label PPOAgent, Reward 49: -154.342, Len(game): 90, Training Time: 1.481s, Prediction Time: 1.481s
    label PPOAgent, Reward 50: -97.530, Len(game): 62, Training Time: 1.500s, Prediction Time: 1.500s
    label PPOAgent, Reward 51: -138.518, Len(game): 107, Training Time: 1.528s, Prediction Time: 1.528s
    label PPOAgent, Reward 52: -0.738, Len(game): 112, Training Time: 1.556s, Prediction Time: 1.556s
    label PPOAgent, Reward 53: -85.017, Len(game): 55, Training Time: 1.571s, Prediction Time: 1.571s
    label PPOAgent, Reward 54: -99.419, Len(game): 63, Training Time: 1.588s, Prediction Time: 1.588s
    label PPOAgent, Reward 55: -93.208, Len(game): 89, Training Time: 1.611s, Prediction Time: 1.611s
    label PPOAgent, Reward 56: -117.814, Len(game): 64, Training Time: 1.628s, Prediction Time: 1.628s
    label PPOAgent, Reward 57: -352.768, Len(game): 119, Training Time: 1.659s, Prediction Time: 1.659s
    label PPOAgent, Reward 58: -111.589, Len(game): 88, Training Time: 1.722s, Prediction Time: 1.722s
    label PPOAgent, Reward 59: -117.000, Len(game): 64, Training Time: 1.742s, Prediction Time: 1.742s
    label PPOAgent, Reward 60: -93.238, Len(game): 72, Training Time: 1.764s, Prediction Time: 1.764s
    label PPOAgent, Reward 61: -101.310, Len(game): 87, Training Time: 1.790s, Prediction Time: 1.790s
    label PPOAgent, Reward 62: -82.012, Len(game): 77, Training Time: 1.814s, Prediction Time: 1.814s
    label PPOAgent, Reward 63: -152.909, Len(game): 116, Training Time: 1.848s, Prediction Time: 1.848s
    label PPOAgent, Reward 64: -128.238, Len(game): 96, Training Time: 1.879s, Prediction Time: 1.879s
    label PPOAgent, Reward 65: -127.991, Len(game): 74, Training Time: 1.899s, Prediction Time: 1.899s
    label PPOAgent, Reward 66: -80.294, Len(game): 61, Training Time: 1.914s, Prediction Time: 1.914s
    label PPOAgent, Reward 67: -117.948, Len(game): 70, Training Time: 1.931s, Prediction Time: 1.931s
    label PPOAgent, Reward 68: -81.239, Len(game): 74, Training Time: 1.950s, Prediction Time: 1.950s
    label PPOAgent, Reward 69: -120.795, Len(game): 101, Training Time: 1.976s, Prediction Time: 1.976s
    label PPOAgent, Reward 70: -425.281, Len(game): 132, Training Time: 2.010s, Prediction Time: 2.010s
    label PPOAgent, Reward 71: -122.421, Len(game): 106, Training Time: 2.038s, Prediction Time: 2.038s
    label PPOAgent, Reward 72: -108.586, Len(game): 76, Training Time: 2.100s, Prediction Time: 2.100s
    label PPOAgent, Reward 73: 7.097, Len(game): 91, Training Time: 2.128s, Prediction Time: 2.128s
    label PPOAgent, Reward 74: -111.053, Len(game): 119, Training Time: 2.163s, Prediction Time: 2.163s
    label PPOAgent, Reward 75: -65.215, Len(game): 57, Training Time: 2.180s, Prediction Time: 2.180s
    label PPOAgent, Reward 76: -89.396, Len(game): 67, Training Time: 2.202s, Prediction Time: 2.202s
    label PPOAgent, Reward 77: -91.236, Len(game): 87, Training Time: 2.229s, Prediction Time: 2.229s
    label PPOAgent, Reward 78: -104.544, Len(game): 73, Training Time: 2.252s, Prediction Time: 2.252s
    label PPOAgent, Reward 79: -112.192, Len(game): 56, Training Time: 2.266s, Prediction Time: 2.266s
    label PPOAgent, Reward 80: -172.109, Len(game): 99, Training Time: 2.292s, Prediction Time: 2.292s
    label PPOAgent, Reward 81: -132.582, Len(game): 94, Training Time: 2.316s, Prediction Time: 2.316s
    label PPOAgent, Reward 82: -123.629, Len(game): 96, Training Time: 2.341s, Prediction Time: 2.341s
    label PPOAgent, Reward 83: -69.742, Len(game): 70, Training Time: 2.359s, Prediction Time: 2.359s
    label PPOAgent, Reward 84: -86.067, Len(game): 62, Training Time: 2.375s, Prediction Time: 2.375s
    label PPOAgent, Reward 85: -127.144, Len(game): 98, Training Time: 2.401s, Prediction Time: 2.401s
    label PPOAgent, Reward 86: -162.282, Len(game): 68, Training Time: 2.419s, Prediction Time: 2.419s
    label PPOAgent, Reward 87: -259.057, Len(game): 89, Training Time: 2.482s, Prediction Time: 2.482s
    label PPOAgent, Reward 88: -95.300, Len(game): 111, Training Time: 2.516s, Prediction Time: 2.516s
    label PPOAgent, Reward 89: -422.531, Len(game): 81, Training Time: 2.540s, Prediction Time: 2.540s
    label PPOAgent, Reward 90: -264.879, Len(game): 100, Training Time: 2.571s, Prediction Time: 2.571s
    label PPOAgent, Reward 91: 31.986, Len(game): 80, Training Time: 2.595s, Prediction Time: 2.595s
    label PPOAgent, Reward 92: -128.386, Len(game): 86, Training Time: 2.623s, Prediction Time: 2.623s
    label PPOAgent, Reward 93: -204.219, Len(game): 97, Training Time: 2.651s, Prediction Time: 2.651s
    label PPOAgent, Reward 94: -197.231, Len(game): 82, Training Time: 2.672s, Prediction Time: 2.672s
    label PPOAgent, Reward 95: -228.226, Len(game): 96, Training Time: 2.697s, Prediction Time: 2.697s
    label PPOAgent, Reward 96: -117.467, Len(game): 82, Training Time: 2.719s, Prediction Time: 2.719s
    label PPOAgent, Reward 97: -101.354, Len(game): 63, Training Time: 2.735s, Prediction Time: 2.735s
    label PPOAgent, Reward 98: -180.603, Len(game): 110, Training Time: 2.763s, Prediction Time: 2.763s
    label PPOAgent, Reward 99: -68.018, Len(game): 60, Training Time: 2.779s, Prediction Time: 2.779s
    label Controller-based, Reward 0: -109.187, Len(game): 78, Training Time: 0.002s, Prediction Time: 0.002s
    label Controller-based, Reward 1: -145.415, Len(game): 58, Training Time: 0.004s, Prediction Time: 0.003s
    label Controller-based, Reward 2: -87.215, Len(game): 65, Training Time: 0.013s, Prediction Time: 0.004s
    label Controller-based, Reward 3: -328.020, Len(game): 1882, Training Time: 0.049s, Prediction Time: 0.060s
    label Controller-based, Reward 4: -272.301, Len(game): 325, Training Time: 0.064s, Prediction Time: 0.069s
    label Controller-based, Reward 5: -22.272, Len(game): 59, Training Time: 0.075s, Prediction Time: 0.071s
    label Controller-based, Reward 6: -117.033, Len(game): 66, Training Time: 0.087s, Prediction Time: 0.073s
    label Controller-based, Reward 7: -108.191, Len(game): 70, Training Time: 0.097s, Prediction Time: 0.074s
    label Controller-based, Reward 8: 5.976, Len(game): 100, Training Time: 0.109s, Prediction Time: 0.078s
    label Controller-based, Reward 9: 249.708, Len(game): 198, Training Time: 0.123s, Prediction Time: 0.084s
    label Controller-based, Reward 10: -127.574, Len(game): 155, Training Time: 0.137s, Prediction Time: 0.088s
    label Controller-based, Reward 11: -108.222, Len(game): 78, Training Time: 0.149s, Prediction Time: 0.091s
    label Controller-based, Reward 12: -164.595, Len(game): 118, Training Time: 0.161s, Prediction Time: 0.095s
    label Controller-based, Reward 13: -381.053, Len(game): 90, Training Time: 0.174s, Prediction Time: 0.098s
    label Controller-based, Reward 14: -66.826, Len(game): 84, Training Time: 0.187s, Prediction Time: 0.101s
    label Controller-based, Reward 15: -46.173, Len(game): 80, Training Time: 0.199s, Prediction Time: 0.103s
    label Controller-based, Reward 16: -73.804, Len(game): 113, Training Time: 0.214s, Prediction Time: 0.108s
    label Controller-based, Reward 17: -28.463, Len(game): 91, Training Time: 0.228s, Prediction Time: 0.110s
    label Controller-based, Reward 18: -5.673, Len(game): 119, Training Time: 0.242s, Prediction Time: 0.113s
    label Controller-based, Reward 19: -321.711, Len(game): 89, Training Time: 0.255s, Prediction Time: 0.117s
    label Controller-based, Reward 20: -269.180, Len(game): 126, Training Time: 0.270s, Prediction Time: 0.120s
    label Controller-based, Reward 21: -308.091, Len(game): 159, Training Time: 0.284s, Prediction Time: 0.124s
    label Controller-based, Reward 22: -298.844, Len(game): 87, Training Time: 0.298s, Prediction Time: 0.128s
    label Controller-based, Reward 23: -275.690, Len(game): 95, Training Time: 0.312s, Prediction Time: 0.129s
    label Controller-based, Reward 24: -386.044, Len(game): 100, Training Time: 0.326s, Prediction Time: 0.133s
    label Controller-based, Reward 25: -375.524, Len(game): 104, Training Time: 0.339s, Prediction Time: 0.136s
    label Controller-based, Reward 26: -314.856, Len(game): 103, Training Time: 0.353s, Prediction Time: 0.139s
    label Controller-based, Reward 27: -313.109, Len(game): 115, Training Time: 0.368s, Prediction Time: 0.142s
    label Controller-based, Reward 28: -306.189, Len(game): 96, Training Time: 0.381s, Prediction Time: 0.144s
    label Controller-based, Reward 29: -371.251, Len(game): 150, Training Time: 0.397s, Prediction Time: 0.149s
    label Controller-based, Reward 30: -195.533, Len(game): 168, Training Time: 0.411s, Prediction Time: 0.153s
    label Controller-based, Reward 31: -27.537, Len(game): 123, Training Time: 0.426s, Prediction Time: 0.156s
    label Controller-based, Reward 32: -307.507, Len(game): 144, Training Time: 0.441s, Prediction Time: 0.160s
    label Controller-based, Reward 33: -318.105, Len(game): 98, Training Time: 0.455s, Prediction Time: 0.162s
    label Controller-based, Reward 34: -321.512, Len(game): 93, Training Time: 0.468s, Prediction Time: 0.165s
    label Controller-based, Reward 35: -45.163, Len(game): 68, Training Time: 0.482s, Prediction Time: 0.168s
    label Controller-based, Reward 36: -307.871, Len(game): 92, Training Time: 0.497s, Prediction Time: 0.170s
    label Controller-based, Reward 37: -123.997, Len(game): 164, Training Time: 0.514s, Prediction Time: 0.175s
    label Controller-based, Reward 38: -450.676, Len(game): 97, Training Time: 0.527s, Prediction Time: 0.178s
    label Controller-based, Reward 39: -324.244, Len(game): 113, Training Time: 0.541s, Prediction Time: 0.181s
    label Controller-based, Reward 40: -296.305, Len(game): 96, Training Time: 0.555s, Prediction Time: 0.184s
    label Controller-based, Reward 41: -303.796, Len(game): 100, Training Time: 0.569s, Prediction Time: 0.186s
    label Controller-based, Reward 42: -469.271, Len(game): 99, Training Time: 0.584s, Prediction Time: 0.189s
    label Controller-based, Reward 43: -258.104, Len(game): 111, Training Time: 0.598s, Prediction Time: 0.191s
    label Controller-based, Reward 44: -164.094, Len(game): 105, Training Time: 0.612s, Prediction Time: 0.194s
    label Controller-based, Reward 45: -336.855, Len(game): 148, Training Time: 0.626s, Prediction Time: 0.197s
    label Controller-based, Reward 46: -307.578, Len(game): 91, Training Time: 0.640s, Prediction Time: 0.199s
    label Controller-based, Reward 47: 43.214, Len(game): 135, Training Time: 0.655s, Prediction Time: 0.204s
    label Controller-based, Reward 48: -314.221, Len(game): 162, Training Time: 0.671s, Prediction Time: 0.209s
    label Controller-based, Reward 49: -159.623, Len(game): 140, Training Time: 0.686s, Prediction Time: 0.213s
    label Controller-based, Reward 50: 240.217, Len(game): 214, Training Time: 0.701s, Prediction Time: 0.219s
    label Controller-based, Reward 51: -306.454, Len(game): 103, Training Time: 0.715s, Prediction Time: 0.222s
    label Controller-based, Reward 52: 283.703, Len(game): 262, Training Time: 0.729s, Prediction Time: 0.230s
    label Controller-based, Reward 53: -117.697, Len(game): 207, Training Time: 0.745s, Prediction Time: 0.235s
    label Controller-based, Reward 54: 28.239, Len(game): 113, Training Time: 0.761s, Prediction Time: 0.238s
    label Controller-based, Reward 55: -109.534, Len(game): 112, Training Time: 0.776s, Prediction Time: 0.241s
    label Controller-based, Reward 56: -270.764, Len(game): 124, Training Time: 0.791s, Prediction Time: 0.244s
    label Controller-based, Reward 57: -105.658, Len(game): 81, Training Time: 0.804s, Prediction Time: 0.247s
    label Controller-based, Reward 58: -290.051, Len(game): 141, Training Time: 0.819s, Prediction Time: 0.250s
    label Controller-based, Reward 59: -344.216, Len(game): 103, Training Time: 0.832s, Prediction Time: 0.253s
    label Controller-based, Reward 60: 6.544, Len(game): 115, Training Time: 0.847s, Prediction Time: 0.256s
    label Controller-based, Reward 61: -42.887, Len(game): 92, Training Time: 0.861s, Prediction Time: 0.258s
    label Controller-based, Reward 62: -252.574, Len(game): 121, Training Time: 0.875s, Prediction Time: 0.261s
    label Controller-based, Reward 63: -323.205, Len(game): 103, Training Time: 0.890s, Prediction Time: 0.264s
    label Controller-based, Reward 64: -239.030, Len(game): 77, Training Time: 0.903s, Prediction Time: 0.267s
    label Controller-based, Reward 65: -205.818, Len(game): 69, Training Time: 0.917s, Prediction Time: 0.270s
    label Controller-based, Reward 66: 6.979, Len(game): 98, Training Time: 0.931s, Prediction Time: 0.273s
    label Controller-based, Reward 67: -311.754, Len(game): 87, Training Time: 0.945s, Prediction Time: 0.275s
    label Controller-based, Reward 68: -137.941, Len(game): 116, Training Time: 0.959s, Prediction Time: 0.278s
    label Controller-based, Reward 69: -309.389, Len(game): 121, Training Time: 0.974s, Prediction Time: 0.281s
    label Controller-based, Reward 70: -338.159, Len(game): 90, Training Time: 0.987s, Prediction Time: 0.284s
    label Controller-based, Reward 71: -328.075, Len(game): 83, Training Time: 1.001s, Prediction Time: 0.287s
    label Controller-based, Reward 72: -180.717, Len(game): 94, Training Time: 1.015s, Prediction Time: 0.289s
    label Controller-based, Reward 73: -313.592, Len(game): 127, Training Time: 1.031s, Prediction Time: 0.292s
    label Controller-based, Reward 74: -307.018, Len(game): 101, Training Time: 1.044s, Prediction Time: 0.295s
    label Controller-based, Reward 75: -254.989, Len(game): 99, Training Time: 1.058s, Prediction Time: 0.298s
    label Controller-based, Reward 76: -305.602, Len(game): 89, Training Time: 1.071s, Prediction Time: 0.300s
    label Controller-based, Reward 77: -105.594, Len(game): 119, Training Time: 1.085s, Prediction Time: 0.303s
    label Controller-based, Reward 78: -141.775, Len(game): 201, Training Time: 1.100s, Prediction Time: 0.308s
    label Controller-based, Reward 79: -315.513, Len(game): 85, Training Time: 1.114s, Prediction Time: 0.310s
    label Controller-based, Reward 80: -30.117, Len(game): 120, Training Time: 1.128s, Prediction Time: 0.313s
    label Controller-based, Reward 81: 52.009, Len(game): 125, Training Time: 1.143s, Prediction Time: 0.316s
    label Controller-based, Reward 82: -318.479, Len(game): 100, Training Time: 1.157s, Prediction Time: 0.319s
    label Controller-based, Reward 83: -295.388, Len(game): 92, Training Time: 1.170s, Prediction Time: 0.321s
    label Controller-based, Reward 84: 232.479, Len(game): 212, Training Time: 1.186s, Prediction Time: 0.328s
    label Controller-based, Reward 85: -19.038, Len(game): 108, Training Time: 1.201s, Prediction Time: 0.332s
    label Controller-based, Reward 86: -93.804, Len(game): 73, Training Time: 1.214s, Prediction Time: 0.335s
    label Controller-based, Reward 87: -253.262, Len(game): 107, Training Time: 1.228s, Prediction Time: 0.338s
    label Controller-based, Reward 88: -306.118, Len(game): 103, Training Time: 1.243s, Prediction Time: 0.340s
    label Controller-based, Reward 89: -355.999, Len(game): 152, Training Time: 1.257s, Prediction Time: 0.345s
    label Controller-based, Reward 90: -342.132, Len(game): 131, Training Time: 1.270s, Prediction Time: 0.349s
    label Controller-based, Reward 91: 9.256, Len(game): 107, Training Time: 1.285s, Prediction Time: 0.352s
    label Controller-based, Reward 92: -300.264, Len(game): 193, Training Time: 1.301s, Prediction Time: 0.358s
    label Controller-based, Reward 93: 48.608, Len(game): 105, Training Time: 1.315s, Prediction Time: 0.360s
    label Controller-based, Reward 94: -311.169, Len(game): 101, Training Time: 1.328s, Prediction Time: 0.363s
    label Controller-based, Reward 95: 15.571, Len(game): 103, Training Time: 1.343s, Prediction Time: 0.365s
    label Controller-based, Reward 96: -284.440, Len(game): 99, Training Time: 1.356s, Prediction Time: 0.367s
    label Controller-based, Reward 97: -210.060, Len(game): 236, Training Time: 1.371s, Prediction Time: 0.373s
    label Controller-based, Reward 98: -148.800, Len(game): 104, Training Time: 1.385s, Prediction Time: 0.376s
    label Controller-based, Reward 99: 2.763, Len(game): 109, Training Time: 1.398s, Prediction Time: 0.380s
    label KACAgent, Reward 0: -96.842, Len(game): 70, Training Time: 0.010s, Prediction Time: 0.001s
    label KACAgent, Reward 1: -369.215, Len(game): 84, Training Time: 0.019s, Prediction Time: 0.017s
    label KACAgent, Reward 2: -61.811, Len(game): 123, Training Time: 0.035s, Prediction Time: 0.039s
    label KACAgent, Reward 3: -153.078, Len(game): 93, Training Time: 0.063s, Prediction Time: 0.061s
    label KACAgent, Reward 4: -118.294, Len(game): 57, Training Time: 0.113s, Prediction Time: 0.083s
    label KACAgent, Reward 5: -69.676, Len(game): 108, Training Time: 0.175s, Prediction Time: 0.135s
    label KACAgent, Reward 6: -48.635, Len(game): 65, Training Time: 0.256s, Prediction Time: 0.174s
    label KACAgent, Reward 7: -284.986, Len(game): 101, Training Time: 0.347s, Prediction Time: 0.238s
    label KACAgent, Reward 8: -424.661, Len(game): 155, Training Time: 0.479s, Prediction Time: 0.326s
    label KACAgent, Reward 9: -323.118, Len(game): 100, Training Time: 0.648s, Prediction Time: 0.414s
    label KACAgent, Reward 10: -90.706, Len(game): 76, Training Time: 0.862s, Prediction Time: 0.506s
    label KACAgent, Reward 11: -109.113, Len(game): 95, Training Time: 1.083s, Prediction Time: 0.627s
    label KACAgent, Reward 12: -72.714, Len(game): 127, Training Time: 1.357s, Prediction Time: 0.784s
    label KACAgent, Reward 13: -11.326, Len(game): 104, Training Time: 1.704s, Prediction Time: 0.937s
    label KACAgent, Reward 14: 9.094, Len(game): 116, Training Time: 2.095s, Prediction Time: 1.121s
    label KACAgent, Reward 15: -107.403, Len(game): 132, Training Time: 2.545s, Prediction Time: 1.352s
    label KACAgent, Reward 16: -47.898, Len(game): 88, Training Time: 3.076s, Prediction Time: 1.584s
    label KACAgent, Reward 17: -261.339, Len(game): 85, Training Time: 3.667s, Prediction Time: 1.845s
    label KACAgent, Reward 18: -102.691, Len(game): 110, Training Time: 4.297s, Prediction Time: 2.147s
    label KACAgent, Reward 19: -114.236, Len(game): 86, Training Time: 5.023s, Prediction Time: 2.444s
    label KACAgent, Reward 20: 17.024, Len(game): 81, Training Time: 5.783s, Prediction Time: 2.782s
    label KACAgent, Reward 21: -309.332, Len(game): 117, Training Time: 6.616s, Prediction Time: 3.179s
    label KACAgent, Reward 22: -54.321, Len(game): 93, Training Time: 7.521s, Prediction Time: 3.579s
    label KACAgent, Reward 23: -256.266, Len(game): 104, Training Time: 8.484s, Prediction Time: 4.021s
    label KACAgent, Reward 24: -276.718, Len(game): 89, Training Time: 9.515s, Prediction Time: 4.499s
    label KACAgent, Reward 25: -227.532, Len(game): 127, Training Time: 10.628s, Prediction Time: 5.045s
    label KACAgent, Reward 26: -215.051, Len(game): 116, Training Time: 11.873s, Prediction Time: 5.625s
    label KACAgent, Reward 27: -69.964, Len(game): 105, Training Time: 13.218s, Prediction Time: 6.265s
    label KACAgent, Reward 28: -39.724, Len(game): 100, Training Time: 14.688s, Prediction Time: 6.947s
    label KACAgent, Reward 29: -81.385, Len(game): 101, Training Time: 16.245s, Prediction Time: 7.720s
    label KACAgent, Reward 30: -82.343, Len(game): 85, Training Time: 17.946s, Prediction Time: 8.529s
    label KACAgent, Reward 31: -263.379, Len(game): 87, Training Time: 19.784s, Prediction Time: 9.395s
    label KACAgent, Reward 32: -43.900, Len(game): 127, Training Time: 21.637s, Prediction Time: 10.360s
    label KACAgent, Reward 33: -39.229, Len(game): 100, Training Time: 23.725s, Prediction Time: 11.306s
    label KACAgent, Reward 34: -3.636, Len(game): 96, Training Time: 25.901s, Prediction Time: 12.406s
    label KACAgent, Reward 35: -149.382, Len(game): 143, Training Time: 28.247s, Prediction Time: 13.623s
    label KACAgent, Reward 36: 3.228, Len(game): 124, Training Time: 30.723s, Prediction Time: 14.857s
    label KACAgent, Reward 37: -44.013, Len(game): 97, Training Time: 33.358s, Prediction Time: 16.130s
    label KACAgent, Reward 38: -36.072, Len(game): 92, Training Time: 36.156s, Prediction Time: 17.473s
    label KACAgent, Reward 39: -9.850, Len(game): 146, Training Time: 39.122s, Prediction Time: 18.980s
    label KACAgent, Reward 40: -38.324, Len(game): 110, Training Time: 42.274s, Prediction Time: 20.569s
    label KACAgent, Reward 41: -42.383, Len(game): 89, Training Time: 45.614s, Prediction Time: 22.250s
    label KACAgent, Reward 42: -27.385, Len(game): 231, Training Time: 49.047s, Prediction Time: 24.259s
    label KACAgent, Reward 43: -16.779, Len(game): 83, Training Time: 52.967s, Prediction Time: 26.070s
    label KACAgent, Reward 44: -61.728, Len(game): 81, Training Time: 52.967s, Prediction Time: 28.125s
    label KACAgent, Reward 45: -27.585, Len(game): 97, Training Time: 52.967s, Prediction Time: 28.272s
    label KACAgent, Reward 46: -214.309, Len(game): 153, Training Time: 52.967s, Prediction Time: 28.479s
    label KACAgent, Reward 47: 0.448, Len(game): 99, Training Time: 52.967s, Prediction Time: 28.612s
    label KACAgent, Reward 48: -21.611, Len(game): 100, Training Time: 52.967s, Prediction Time: 28.746s
    label KACAgent, Reward 49: -52.920, Len(game): 123, Training Time: 52.967s, Prediction Time: 28.914s
    label KACAgent, Reward 50: -10.061, Len(game): 104, Training Time: 52.967s, Prediction Time: 29.055s
    label KACAgent, Reward 51: -43.226, Len(game): 103, Training Time: 52.967s, Prediction Time: 29.195s
    label KACAgent, Reward 52: 18.740, Len(game): 90, Training Time: 52.967s, Prediction Time: 29.318s
    label KACAgent, Reward 53: -8.629, Len(game): 98, Training Time: 52.967s, Prediction Time: 29.451s
    label KACAgent, Reward 54: 1.215, Len(game): 168, Training Time: 52.967s, Prediction Time: 29.679s
    label KACAgent, Reward 55: -266.347, Len(game): 142, Training Time: 52.967s, Prediction Time: 29.872s
    label KACAgent, Reward 56: -5.459, Len(game): 104, Training Time: 52.967s, Prediction Time: 30.011s
    label KACAgent, Reward 57: -160.539, Len(game): 146, Training Time: 52.967s, Prediction Time: 30.211s
    label KACAgent, Reward 58: -12.529, Len(game): 118, Training Time: 52.967s, Prediction Time: 30.371s
    label KACAgent, Reward 59: -35.501, Len(game): 120, Training Time: 52.967s, Prediction Time: 30.535s
    label KACAgent, Reward 60: 19.573, Len(game): 107, Training Time: 52.967s, Prediction Time: 30.679s
    label KACAgent, Reward 61: -14.930, Len(game): 87, Training Time: 52.967s, Prediction Time: 30.795s
    label KACAgent, Reward 62: -32.161, Len(game): 100, Training Time: 52.967s, Prediction Time: 30.932s
    label KACAgent, Reward 63: 7.399, Len(game): 114, Training Time: 52.967s, Prediction Time: 31.085s
    label KACAgent, Reward 64: -3.288, Len(game): 113, Training Time: 52.967s, Prediction Time: 31.236s
    label KACAgent, Reward 65: -82.042, Len(game): 125, Training Time: 52.967s, Prediction Time: 31.408s
    label KACAgent, Reward 66: -53.235, Len(game): 106, Training Time: 52.967s, Prediction Time: 31.552s
    label KACAgent, Reward 67: -62.717, Len(game): 150, Training Time: 52.967s, Prediction Time: 31.754s
    label KACAgent, Reward 68: -1.777, Len(game): 151, Training Time: 52.967s, Prediction Time: 31.958s
    label KACAgent, Reward 69: -43.269, Len(game): 74, Training Time: 52.967s, Prediction Time: 32.058s
    label KACAgent, Reward 70: -63.784, Len(game): 110, Training Time: 52.967s, Prediction Time: 32.208s
    label KACAgent, Reward 71: -73.038, Len(game): 69, Training Time: 52.967s, Prediction Time: 32.303s
    label KACAgent, Reward 72: -68.695, Len(game): 114, Training Time: 52.967s, Prediction Time: 32.457s
    label KACAgent, Reward 73: 11.010, Len(game): 150, Training Time: 52.967s, Prediction Time: 32.659s
    label KACAgent, Reward 74: -12.993, Len(game): 103, Training Time: 52.967s, Prediction Time: 32.797s
    label KACAgent, Reward 75: -432.062, Len(game): 147, Training Time: 52.967s, Prediction Time: 32.997s
    label KACAgent, Reward 76: -29.239, Len(game): 100, Training Time: 52.967s, Prediction Time: 33.136s
    label KACAgent, Reward 77: 15.039, Len(game): 157, Training Time: 52.967s, Prediction Time: 33.350s
    label KACAgent, Reward 78: -60.004, Len(game): 116, Training Time: 52.967s, Prediction Time: 33.512s
    label KACAgent, Reward 79: 17.875, Len(game): 122, Training Time: 52.967s, Prediction Time: 33.676s
    label KACAgent, Reward 80: -10.065, Len(game): 160, Training Time: 52.967s, Prediction Time: 33.893s
    label KACAgent, Reward 81: -18.335, Len(game): 145, Training Time: 52.967s, Prediction Time: 34.091s
    label KACAgent, Reward 82: -89.590, Len(game): 162, Training Time: 52.967s, Prediction Time: 34.312s
    label KACAgent, Reward 83: -18.717, Len(game): 130, Training Time: 52.967s, Prediction Time: 34.489s
    label KACAgent, Reward 84: -73.466, Len(game): 138, Training Time: 52.967s, Prediction Time: 34.677s
    label KACAgent, Reward 85: 16.586, Len(game): 108, Training Time: 52.967s, Prediction Time: 34.823s
    label KACAgent, Reward 86: -48.256, Len(game): 110, Training Time: 52.967s, Prediction Time: 34.970s
    label KACAgent, Reward 87: -101.534, Len(game): 280, Training Time: 52.967s, Prediction Time: 35.348s
    label KACAgent, Reward 88: -57.668, Len(game): 107, Training Time: 52.967s, Prediction Time: 35.491s
    label KACAgent, Reward 89: -23.000, Len(game): 147, Training Time: 52.967s, Prediction Time: 35.693s
    label KACAgent, Reward 90: -11.871, Len(game): 181, Training Time: 52.967s, Prediction Time: 35.940s
    label KACAgent, Reward 91: -7.514, Len(game): 171, Training Time: 52.967s, Prediction Time: 36.170s
    label KACAgent, Reward 92: -37.977, Len(game): 111, Training Time: 52.967s, Prediction Time: 36.322s
    label KACAgent, Reward 93: 6.681, Len(game): 79, Training Time: 52.967s, Prediction Time: 36.429s
    label KACAgent, Reward 94: -19.688, Len(game): 147, Training Time: 52.967s, Prediction Time: 36.629s
    label KACAgent, Reward 95: -45.433, Len(game): 150, Training Time: 52.967s, Prediction Time: 36.836s
    label KACAgent, Reward 96: 20.047, Len(game): 110, Training Time: 52.967s, Prediction Time: 36.984s
    label KACAgent, Reward 97: -79.894, Len(game): 152, Training Time: 52.967s, Prediction Time: 37.193s
    label KACAgent, Reward 98: -15.137, Len(game): 95, Training Time: 52.967s, Prediction Time: 37.323s
    label KACAgent, Reward 99: -51.432, Len(game): 146, Training Time: 52.967s, Prediction Time: 37.521s
    label PolicyGradient, Reward 0: -393.430, Len(game): 137, Training Time: 0.014s, Prediction Time: 0.003s
    label PolicyGradient, Reward 1: -75.146, Len(game): 71, Training Time: 0.030s, Prediction Time: 0.022s
    label PolicyGradient, Reward 2: -152.498, Len(game): 95, Training Time: 0.053s, Prediction Time: 0.046s
    label PolicyGradient, Reward 3: -324.945, Len(game): 94, Training Time: 0.088s, Prediction Time: 0.076s
    label PolicyGradient, Reward 4: -104.241, Len(game): 63, Training Time: 0.141s, Prediction Time: 0.101s
    label PolicyGradient, Reward 5: -105.430, Len(game): 61, Training Time: 0.203s, Prediction Time: 0.136s
    label PolicyGradient, Reward 6: -229.294, Len(game): 116, Training Time: 0.280s, Prediction Time: 0.196s
    label PolicyGradient, Reward 7: -67.352, Len(game): 73, Training Time: 0.392s, Prediction Time: 0.244s
    label PolicyGradient, Reward 8: -65.958, Len(game): 137, Training Time: 0.522s, Prediction Time: 0.344s
    label PolicyGradient, Reward 9: -92.362, Len(game): 66, Training Time: 0.701s, Prediction Time: 0.413s
    label PolicyGradient, Reward 10: -264.395, Len(game): 120, Training Time: 0.903s, Prediction Time: 0.534s
    label PolicyGradient, Reward 11: -80.529, Len(game): 168, Training Time: 1.178s, Prediction Time: 0.687s
    label PolicyGradient, Reward 12: -215.839, Len(game): 184, Training Time: 1.514s, Prediction Time: 0.885s
    label PolicyGradient, Reward 13: -211.972, Len(game): 158, Training Time: 1.941s, Prediction Time: 1.099s
    label PolicyGradient, Reward 14: -241.048, Len(game): 86, Training Time: 2.480s, Prediction Time: 1.287s
    label PolicyGradient, Reward 15: -45.792, Len(game): 133, Training Time: 3.060s, Prediction Time: 1.562s
    label PolicyGradient, Reward 16: -62.876, Len(game): 147, Training Time: 3.704s, Prediction Time: 1.872s
    label PolicyGradient, Reward 17: -206.646, Len(game): 107, Training Time: 4.547s, Prediction Time: 2.198s
    label PolicyGradient, Reward 18: -113.774, Len(game): 176, Training Time: 5.379s, Prediction Time: 2.633s
    label PolicyGradient, Reward 19: -161.748, Len(game): 152, Training Time: 6.345s, Prediction Time: 3.064s
    label PolicyGradient, Reward 20: -164.830, Len(game): 137, Training Time: 7.445s, Prediction Time: 3.559s
    label PolicyGradient, Reward 21: -81.720, Len(game): 136, Training Time: 8.731s, Prediction Time: 4.108s
    label PolicyGradient, Reward 22: -316.899, Len(game): 125, Training Time: 10.112s, Prediction Time: 4.727s
    label PolicyGradient, Reward 23: -135.891, Len(game): 144, Training Time: 11.624s, Prediction Time: 5.447s
    label PolicyGradient, Reward 24: -25.885, Len(game): 159, Training Time: 13.346s, Prediction Time: 6.258s
    label PolicyGradient, Reward 25: -97.168, Len(game): 143, Training Time: 15.327s, Prediction Time: 7.130s
    label PolicyGradient, Reward 26: -181.733, Len(game): 113, Training Time: 17.423s, Prediction Time: 8.051s
    label PolicyGradient, Reward 27: 13.154, Len(game): 181, Training Time: 19.700s, Prediction Time: 9.114s
    label PolicyGradient, Reward 28: -77.998, Len(game): 143, Training Time: 22.179s, Prediction Time: 10.268s
    label PolicyGradient, Reward 29: -64.061, Len(game): 217, Training Time: 24.847s, Prediction Time: 11.585s
    label PolicyGradient, Reward 30: -18.764, Len(game): 219, Training Time: 27.893s, Prediction Time: 13.003s
    label PolicyGradient, Reward 31: -255.009, Len(game): 391, Training Time: 31.394s, Prediction Time: 14.789s
    label PolicyGradient, Reward 32: -351.244, Len(game): 1817, Training Time: 35.562s, Prediction Time: 18.689s
    label PolicyGradient, Reward 33: -14.374, Len(game): 205, Training Time: 41.989s, Prediction Time: 20.802s
    label PolicyGradient, Reward 34: -354.272, Len(game): 231, Training Time: 49.651s, Prediction Time: 24.064s
    label PolicyGradient, Reward 35: -345.703, Len(game): 211, Training Time: 57.217s, Prediction Time: 27.649s
    label PolicyGradient, Reward 36: -389.583, Len(game): 298, Training Time: 57.217s, Prediction Time: 31.726s
    label PolicyGradient, Reward 37: -53.407, Len(game): 112, Training Time: 57.217s, Prediction Time: 31.919s
    label PolicyGradient, Reward 38: -88.474, Len(game): 126, Training Time: 57.217s, Prediction Time: 32.135s
    label PolicyGradient, Reward 39: -211.173, Len(game): 160, Training Time: 57.217s, Prediction Time: 32.407s
    label PolicyGradient, Reward 40: -225.880, Len(game): 171, Training Time: 57.217s, Prediction Time: 32.698s
    label PolicyGradient, Reward 41: -151.298, Len(game): 158, Training Time: 57.217s, Prediction Time: 32.968s
    label PolicyGradient, Reward 42: -355.336, Len(game): 231, Training Time: 57.217s, Prediction Time: 33.355s
    label PolicyGradient, Reward 43: -183.964, Len(game): 177, Training Time: 57.217s, Prediction Time: 33.657s
    label PolicyGradient, Reward 44: -14.213, Len(game): 196, Training Time: 57.217s, Prediction Time: 33.989s
    label PolicyGradient, Reward 45: -190.849, Len(game): 194, Training Time: 57.217s, Prediction Time: 34.319s
    label PolicyGradient, Reward 46: -49.239, Len(game): 194, Training Time: 57.217s, Prediction Time: 34.648s
    label PolicyGradient, Reward 47: -226.348, Len(game): 131, Training Time: 57.217s, Prediction Time: 34.873s
    label PolicyGradient, Reward 48: 6.076, Len(game): 188, Training Time: 57.217s, Prediction Time: 35.194s
    label PolicyGradient, Reward 49: -39.302, Len(game): 178, Training Time: 57.217s, Prediction Time: 35.492s
    label PolicyGradient, Reward 50: -80.477, Len(game): 124, Training Time: 57.217s, Prediction Time: 35.700s
    label PolicyGradient, Reward 51: -160.085, Len(game): 244, Training Time: 57.217s, Prediction Time: 36.117s
    label PolicyGradient, Reward 52: -77.668, Len(game): 193, Training Time: 57.217s, Prediction Time: 36.445s
    label PolicyGradient, Reward 53: -58.755, Len(game): 136, Training Time: 57.217s, Prediction Time: 36.677s
    label PolicyGradient, Reward 54: -233.658, Len(game): 283, Training Time: 57.217s, Prediction Time: 37.152s
    label PolicyGradient, Reward 55: -97.790, Len(game): 134, Training Time: 57.217s, Prediction Time: 37.377s
    label PolicyGradient, Reward 56: -239.818, Len(game): 195, Training Time: 57.217s, Prediction Time: 37.705s
    label PolicyGradient, Reward 57: 49.956, Len(game): 156, Training Time: 57.217s, Prediction Time: 37.966s
    label PolicyGradient, Reward 58: -52.775, Len(game): 171, Training Time: 57.217s, Prediction Time: 38.252s
    label PolicyGradient, Reward 59: -327.191, Len(game): 254, Training Time: 57.217s, Prediction Time: 38.685s
    label PolicyGradient, Reward 60: -44.443, Len(game): 227, Training Time: 57.217s, Prediction Time: 39.066s
    label PolicyGradient, Reward 61: 26.450, Len(game): 154, Training Time: 57.217s, Prediction Time: 39.325s
    label PolicyGradient, Reward 62: -247.901, Len(game): 207, Training Time: 57.217s, Prediction Time: 39.677s
    label PolicyGradient, Reward 63: -38.375, Len(game): 187, Training Time: 57.217s, Prediction Time: 39.993s
    label PolicyGradient, Reward 64: -8.211, Len(game): 193, Training Time: 57.217s, Prediction Time: 40.320s
    label PolicyGradient, Reward 65: -40.982, Len(game): 150, Training Time: 57.217s, Prediction Time: 40.575s
    label PolicyGradient, Reward 66: -326.341, Len(game): 237, Training Time: 57.217s, Prediction Time: 40.975s
    label PolicyGradient, Reward 67: -2.344, Len(game): 2000, Training Time: 57.217s, Prediction Time: 44.378s
    label PolicyGradient, Reward 68: -80.559, Len(game): 272, Training Time: 57.217s, Prediction Time: 44.840s
    label PolicyGradient, Reward 69: -340.607, Len(game): 160, Training Time: 57.217s, Prediction Time: 45.109s
    label PolicyGradient, Reward 70: -212.233, Len(game): 233, Training Time: 57.217s, Prediction Time: 45.502s
    label PolicyGradient, Reward 71: -197.870, Len(game): 212, Training Time: 57.217s, Prediction Time: 45.860s
    label PolicyGradient, Reward 72: -187.449, Len(game): 187, Training Time: 57.217s, Prediction Time: 46.179s
    label PolicyGradient, Reward 73: -225.196, Len(game): 207, Training Time: 57.217s, Prediction Time: 46.531s
    label PolicyGradient, Reward 74: -35.257, Len(game): 195, Training Time: 57.217s, Prediction Time: 46.863s
    label PolicyGradient, Reward 75: -356.615, Len(game): 159, Training Time: 57.217s, Prediction Time: 47.131s
    label PolicyGradient, Reward 76: -266.305, Len(game): 205, Training Time: 57.217s, Prediction Time: 47.480s
    label PolicyGradient, Reward 77: -21.267, Len(game): 186, Training Time: 57.217s, Prediction Time: 47.801s
    label PolicyGradient, Reward 78: -254.046, Len(game): 161, Training Time: 57.217s, Prediction Time: 48.075s
    label PolicyGradient, Reward 79: -51.840, Len(game): 181, Training Time: 57.217s, Prediction Time: 48.376s
    label PolicyGradient, Reward 80: -59.774, Len(game): 257, Training Time: 57.217s, Prediction Time: 48.810s
    label PolicyGradient, Reward 81: -124.355, Len(game): 197, Training Time: 57.217s, Prediction Time: 49.142s
    label PolicyGradient, Reward 82: -73.173, Len(game): 154, Training Time: 57.217s, Prediction Time: 49.407s
    label PolicyGradient, Reward 83: -222.295, Len(game): 189, Training Time: 57.217s, Prediction Time: 49.736s
    label PolicyGradient, Reward 84: -57.890, Len(game): 201, Training Time: 57.217s, Prediction Time: 50.076s
    label PolicyGradient, Reward 85: -132.483, Len(game): 121, Training Time: 57.217s, Prediction Time: 50.282s
    label PolicyGradient, Reward 86: -57.338, Len(game): 120, Training Time: 57.217s, Prediction Time: 50.488s
    label PolicyGradient, Reward 87: -241.359, Len(game): 204, Training Time: 57.217s, Prediction Time: 50.837s
    label PolicyGradient, Reward 88: -34.278, Len(game): 163, Training Time: 57.217s, Prediction Time: 51.115s
    label PolicyGradient, Reward 89: -308.502, Len(game): 342, Training Time: 57.217s, Prediction Time: 51.691s
    label PolicyGradient, Reward 90: 20.146, Len(game): 146, Training Time: 57.217s, Prediction Time: 51.938s
    label PolicyGradient, Reward 91: -83.766, Len(game): 117, Training Time: 57.217s, Prediction Time: 52.138s
    label PolicyGradient, Reward 92: -33.971, Len(game): 213, Training Time: 57.217s, Prediction Time: 52.498s
    label PolicyGradient, Reward 93: -2.186, Len(game): 211, Training Time: 57.217s, Prediction Time: 52.854s
    label PolicyGradient, Reward 94: -207.453, Len(game): 172, Training Time: 57.217s, Prediction Time: 53.148s
    label PolicyGradient, Reward 95: 1.965, Len(game): 162, Training Time: 57.217s, Prediction Time: 53.427s
    label PolicyGradient, Reward 96: -299.235, Len(game): 109, Training Time: 57.217s, Prediction Time: 53.613s
    label PolicyGradient, Reward 97: -56.088, Len(game): 125, Training Time: 57.217s, Prediction Time: 53.826s
    label PolicyGradient, Reward 98: 8.456, Len(game): 150, Training Time: 57.217s, Prediction Time: 54.081s
    label PolicyGradient, Reward 99: -67.207, Len(game): 126, Training Time: 57.217s, Prediction Time: 54.292s
    label DQNAgent, Reward 0: -32.345, Len(game): 76, Training Time: 0.075s, Prediction Time: 0.003s
    label DQNAgent, Reward 1: -341.777, Len(game): 115, Training Time: 0.213s, Prediction Time: 0.008s
    label DQNAgent, Reward 2: -45.154, Len(game): 97, Training Time: 0.333s, Prediction Time: 0.013s
    label DQNAgent, Reward 3: -39.019, Len(game): 100, Training Time: 0.458s, Prediction Time: 0.018s
    label DQNAgent, Reward 4: -244.580, Len(game): 104, Training Time: 0.593s, Prediction Time: 0.023s
    label DQNAgent, Reward 5: -109.868, Len(game): 82, Training Time: 0.698s, Prediction Time: 0.028s
    label DQNAgent, Reward 6: -28.085, Len(game): 112, Training Time: 0.838s, Prediction Time: 0.036s
    label DQNAgent, Reward 7: -56.343, Len(game): 180, Training Time: 1.073s, Prediction Time: 0.048s
    label DQNAgent, Reward 8: -30.156, Len(game): 137, Training Time: 1.247s, Prediction Time: 0.058s
    label DQNAgent, Reward 9: -154.895, Len(game): 158, Training Time: 1.446s, Prediction Time: 0.069s
    label DQNAgent, Reward 10: -23.092, Len(game): 250, Training Time: 1.760s, Prediction Time: 0.088s
    label DQNAgent, Reward 11: -318.624, Len(game): 2000, Training Time: 4.284s, Prediction Time: 0.259s
    label DQNAgent, Reward 12: 66.829, Len(game): 149, Training Time: 4.473s, Prediction Time: 0.273s
    label DQNAgent, Reward 13: -294.896, Len(game): 2000, Training Time: 7.040s, Prediction Time: 0.450s
    label DQNAgent, Reward 14: -128.997, Len(game): 257, Training Time: 7.366s, Prediction Time: 0.472s
    label DQNAgent, Reward 15: 86.828, Len(game): 1235, Training Time: 8.977s, Prediction Time: 0.582s
    label DQNAgent, Reward 16: -79.989, Len(game): 293, Training Time: 9.362s, Prediction Time: 0.609s
    label DQNAgent, Reward 17: -76.022, Len(game): 289, Training Time: 9.736s, Prediction Time: 0.635s
    label DQNAgent, Reward 18: -76.693, Len(game): 497, Training Time: 10.386s, Prediction Time: 0.677s
    label DQNAgent, Reward 19: 17.214, Len(game): 222, Training Time: 10.675s, Prediction Time: 0.698s
    label DQNAgent, Reward 20: -77.502, Len(game): 707, Training Time: 11.589s, Prediction Time: 0.760s
    label DQNAgent, Reward 21: -143.504, Len(game): 139, Training Time: 11.771s, Prediction Time: 0.773s
    label DQNAgent, Reward 22: -138.275, Len(game): 1069, Training Time: 13.171s, Prediction Time: 0.864s
    label DQNAgent, Reward 23: 108.491, Len(game): 1276, Training Time: 14.849s, Prediction Time: 0.974s
    label DQNAgent, Reward 24: 242.335, Len(game): 451, Training Time: 15.442s, Prediction Time: 1.014s
    label DQNAgent, Reward 25: -158.982, Len(game): 2000, Training Time: 18.097s, Prediction Time: 1.199s
    label DQNAgent, Reward 26: 42.868, Len(game): 218, Training Time: 18.394s, Prediction Time: 1.219s
    label DQNAgent, Reward 27: 214.845, Len(game): 577, Training Time: 19.155s, Prediction Time: 1.271s
    label DQNAgent, Reward 28: 217.150, Len(game): 566, Training Time: 19.904s, Prediction Time: 1.322s
    label DQNAgent, Reward 29: 258.343, Len(game): 307, Training Time: 20.305s, Prediction Time: 1.353s
    label DQNAgent, Reward 30: 226.532, Len(game): 360, Training Time: 20.778s, Prediction Time: 1.386s
    label DQNAgent, Reward 31: 224.708, Len(game): 560, Training Time: 21.520s, Prediction Time: 1.438s
    label DQNAgent, Reward 32: 258.744, Len(game): 373, Training Time: 22.023s, Prediction Time: 1.473s
    label DQNAgent, Reward 33: 245.188, Len(game): 357, Training Time: 22.497s, Prediction Time: 1.507s
    label DQNAgent, Reward 34: -41.809, Len(game): 444, Training Time: 23.089s, Prediction Time: 1.545s
    label DQNAgent, Reward 35: -36.180, Len(game): 145, Training Time: 23.283s, Prediction Time: 1.558s
    label DQNAgent, Reward 36: 274.259, Len(game): 320, Training Time: 23.704s, Prediction Time: 1.587s
    label DQNAgent, Reward 37: 224.643, Len(game): 416, Training Time: 24.267s, Prediction Time: 1.625s
    label DQNAgent, Reward 38: 252.419, Len(game): 734, Training Time: 25.254s, Prediction Time: 1.700s
    label DQNAgent, Reward 39: -48.439, Len(game): 169, Training Time: 25.484s, Prediction Time: 1.715s
    label DQNAgent, Reward 40: -7.429, Len(game): 228, Training Time: 25.789s, Prediction Time: 1.735s
    label DQNAgent, Reward 41: 39.578, Len(game): 148, Training Time: 25.987s, Prediction Time: 1.750s
    label DQNAgent, Reward 42: 94.496, Len(game): 2000, Training Time: 28.908s, Prediction Time: 1.956s
    label DQNAgent, Reward 43: -77.404, Len(game): 121, Training Time: 29.072s, Prediction Time: 1.969s
    label DQNAgent, Reward 44: -34.316, Len(game): 82, Training Time: 29.180s, Prediction Time: 1.978s
    label DQNAgent, Reward 45: -101.696, Len(game): 261, Training Time: 29.526s, Prediction Time: 2.001s
    label DQNAgent, Reward 46: -52.923, Len(game): 124, Training Time: 29.688s, Prediction Time: 2.011s
    label DQNAgent, Reward 47: 266.802, Len(game): 266, Training Time: 30.050s, Prediction Time: 2.036s
    label DQNAgent, Reward 48: 1.389, Len(game): 232, Training Time: 30.366s, Prediction Time: 2.055s
    label DQNAgent, Reward 49: -10.112, Len(game): 179, Training Time: 30.603s, Prediction Time: 2.073s
    label DQNAgent, Reward 50: 70.895, Len(game): 162, Training Time: 30.818s, Prediction Time: 2.089s
    label DQNAgent, Reward 51: 54.644, Len(game): 185, Training Time: 31.064s, Prediction Time: 2.106s
    label DQNAgent, Reward 52: 29.431, Len(game): 244, Training Time: 31.393s, Prediction Time: 2.128s
    label DQNAgent, Reward 53: 6.784, Len(game): 122, Training Time: 31.555s, Prediction Time: 2.140s
    label DQNAgent, Reward 54: 195.130, Len(game): 708, Training Time: 32.499s, Prediction Time: 2.208s
    label DQNAgent, Reward 55: 16.518, Len(game): 101, Training Time: 32.633s, Prediction Time: 2.218s
    label DQNAgent, Reward 56: 229.328, Len(game): 892, Training Time: 33.818s, Prediction Time: 2.304s
    label DQNAgent, Reward 57: 10.079, Len(game): 203, Training Time: 34.091s, Prediction Time: 2.322s
    label DQNAgent, Reward 58: -32.992, Len(game): 103, Training Time: 34.227s, Prediction Time: 2.333s
    label DQNAgent, Reward 59: -10.238, Len(game): 206, Training Time: 34.505s, Prediction Time: 2.352s
    label DQNAgent, Reward 60: -11.807, Len(game): 272, Training Time: 34.869s, Prediction Time: 2.376s
    label DQNAgent, Reward 61: 37.263, Len(game): 119, Training Time: 35.027s, Prediction Time: 2.388s
    label DQNAgent, Reward 62: -73.480, Len(game): 247, Training Time: 35.359s, Prediction Time: 2.411s
    label DQNAgent, Reward 63: -5.797, Len(game): 255, Training Time: 35.693s, Prediction Time: 2.436s
    label DQNAgent, Reward 64: -90.319, Len(game): 220, Training Time: 35.987s, Prediction Time: 2.458s
    label DQNAgent, Reward 65: 9.954, Len(game): 176, Training Time: 36.219s, Prediction Time: 2.474s
    label DQNAgent, Reward 66: -0.394, Len(game): 2000, Training Time: 38.900s, Prediction Time: 2.676s
    label DQNAgent, Reward 67: -36.812, Len(game): 2000, Training Time: 41.547s, Prediction Time: 2.870s
    label DQNAgent, Reward 68: -24.807, Len(game): 413, Training Time: 42.104s, Prediction Time: 2.908s
    label DQNAgent, Reward 69: -131.701, Len(game): 134, Training Time: 42.284s, Prediction Time: 2.920s
    label DQNAgent, Reward 70: -23.883, Len(game): 160, Training Time: 42.501s, Prediction Time: 2.935s
    label DQNAgent, Reward 71: -238.337, Len(game): 111, Training Time: 42.649s, Prediction Time: 2.945s
    label DQNAgent, Reward 72: -19.809, Len(game): 130, Training Time: 42.823s, Prediction Time: 2.957s
    label DQNAgent, Reward 73: -17.040, Len(game): 111, Training Time: 42.969s, Prediction Time: 2.967s
    label DQNAgent, Reward 74: -286.120, Len(game): 185, Training Time: 43.217s, Prediction Time: 2.984s
    label DQNAgent, Reward 75: 262.301, Len(game): 509, Training Time: 43.902s, Prediction Time: 3.034s
    label DQNAgent, Reward 76: 154.535, Len(game): 198, Training Time: 44.161s, Prediction Time: 3.054s
    label DQNAgent, Reward 77: 24.319, Len(game): 92, Training Time: 44.283s, Prediction Time: 3.063s
    label DQNAgent, Reward 78: -241.954, Len(game): 88, Training Time: 44.405s, Prediction Time: 3.070s
    label DQNAgent, Reward 79: -330.269, Len(game): 53, Training Time: 44.476s, Prediction Time: 3.075s
    label DQNAgent, Reward 80: -19.116, Len(game): 206, Training Time: 44.752s, Prediction Time: 3.091s
    label DQNAgent, Reward 81: -69.397, Len(game): 80, Training Time: 44.858s, Prediction Time: 3.099s
    label DQNAgent, Reward 82: -254.371, Len(game): 73, Training Time: 44.955s, Prediction Time: 3.106s
    label DQNAgent, Reward 83: -195.465, Len(game): 58, Training Time: 45.032s, Prediction Time: 3.113s
    label DQNAgent, Reward 84: -87.897, Len(game): 103, Training Time: 45.167s, Prediction Time: 3.122s
    label DQNAgent, Reward 85: -225.071, Len(game): 63, Training Time: 45.253s, Prediction Time: 3.128s
    label DQNAgent, Reward 86: -34.987, Len(game): 142, Training Time: 45.443s, Prediction Time: 3.143s
    label DQNAgent, Reward 87: -22.486, Len(game): 159, Training Time: 45.656s, Prediction Time: 3.158s
    label DQNAgent, Reward 88: -19.353, Len(game): 106, Training Time: 45.798s, Prediction Time: 3.167s
    label DQNAgent, Reward 89: -61.599, Len(game): 96, Training Time: 45.927s, Prediction Time: 3.177s
    label DQNAgent, Reward 90: 243.727, Len(game): 347, Training Time: 46.396s, Prediction Time: 3.211s
    label DQNAgent, Reward 91: -17.510, Len(game): 103, Training Time: 46.532s, Prediction Time: 3.221s
    label DQNAgent, Reward 92: 262.185, Len(game): 263, Training Time: 46.881s, Prediction Time: 3.245s
    label DQNAgent, Reward 93: 277.732, Len(game): 440, Training Time: 47.474s, Prediction Time: 3.288s
    label DQNAgent, Reward 94: -77.197, Len(game): 180, Training Time: 47.713s, Prediction Time: 3.304s
    label DQNAgent, Reward 95: -157.270, Len(game): 436, Training Time: 48.296s, Prediction Time: 3.341s
    label DQNAgent, Reward 96: 181.631, Len(game): 299, Training Time: 48.692s, Prediction Time: 3.372s
    label DQNAgent, Reward 97: -44.327, Len(game): 96, Training Time: 48.824s, Prediction Time: 3.382s
    label DQNAgent, Reward 98: 216.648, Len(game): 281, Training Time: 49.202s, Prediction Time: 3.409s
    label DQNAgent, Reward 99: -101.295, Len(game): 245, Training Time: 49.523s, Prediction Time: 3.431s
    Computed global error Bellman mean:  1.3189805374485473  iter:  5
    Computed global error Bellman mean:  1.2523126574679762  iter:  1
    label KQLearning, Reward 0: -372.562, Len(game): 92, Training Time: 0.072s, Prediction Time: 0.002s
    Computed global error Bellman mean:  7.411354497632007e-08  iter:  1
    Computed global error Bellman mean:  1.2523126574679762  iter:  0
    label KQLearning, Reward 1: -584.066, Len(game): 65, Training Time: 0.099s, Prediction Time: 0.019s
    Computed global error Bellman mean:  7.100065456596863e-07  iter:  4
    label KQLearning, Reward 2: -437.061, Len(game): 151, Training Time: 0.160s, Prediction Time: 0.133s
    Computed global error Bellman mean:  0.0019651913974301073  iter:  5
    label KQLearning, Reward 3: -135.792, Len(game): 69, Training Time: 0.184s, Prediction Time: 0.212s
    Computed global error Bellman mean:  2.919271432600007e-08  iter:  5
    label KQLearning, Reward 4: -139.419, Len(game): 69, Training Time: 0.208s, Prediction Time: 0.299s
    Computed global error Bellman mean:  2.0043804439961245e-08  iter:  4
    label KQLearning, Reward 5: -140.436, Len(game): 87, Training Time: 0.237s, Prediction Time: 0.436s
    Computed global error Bellman mean:  0.13576275654331782  iter:  5
    Computed global error Bellman mean:  0.020962495119750388  iter:  5
    label KQLearning, Reward 6: -130.612, Len(game): 76, Training Time: 0.306s, Prediction Time: 0.577s
    Computed global error Bellman mean:  0.15713600956428617  iter:  5
    Computed global error Bellman mean:  0.0020650690661126385  iter:  5
    Computed global error Bellman mean:  0.024317659835294214  iter:  5
    label KQLearning, Reward 7: -126.258, Len(game): 66, Training Time: 0.393s, Prediction Time: 0.705s
    Computed global error Bellman mean:  0.017851189729401005  iter:  5
    Computed global error Bellman mean:  0.0007217437808625365  iter:  5
    Computed global error Bellman mean:  0.0015827117802054164  iter:  5
    label KQLearning, Reward 8: -135.454, Len(game): 67, Training Time: 0.476s, Prediction Time: 0.846s
    Computed global error Bellman mean:  2.063433774096256e-08  iter:  3
    label KQLearning, Reward 9: -243.629, Len(game): 89, Training Time: 0.504s, Prediction Time: 1.050s
    Computed global error Bellman mean:  6.700834498567436e-08  iter:  5
    label KQLearning, Reward 10: -153.162, Len(game): 61, Training Time: 0.530s, Prediction Time: 1.220s
    Computed global error Bellman mean:  9.495778856264773e-07  iter:  2
    label KQLearning, Reward 11: -146.903, Len(game): 60, Training Time: 0.547s, Prediction Time: 1.396s
    Computed global error Bellman mean:  0.0030796875629900615  iter:  5
    label KQLearning, Reward 12: -109.240, Len(game): 54, Training Time: 0.571s, Prediction Time: 1.556s
    Computed global error Bellman mean:  0.03341978501448979  iter:  5
    Computed global error Bellman mean:  0.009044383680920833  iter:  5
    label KQLearning, Reward 13: -119.908, Len(game): 62, Training Time: 0.624s, Prediction Time: 1.755s
    Computed global error Bellman mean:  0.20808780242644276  iter:  5
    Computed global error Bellman mean:  0.1585449000412748  iter:  5
    label KQLearning, Reward 14: -130.531, Len(game): 72, Training Time: 0.690s, Prediction Time: 1.996s
    Computed global error Bellman mean:  0.2679024756682797  iter:  5
    Computed global error Bellman mean:  0.07897550232560825  iter:  5
    Computed global error Bellman mean:  0.2188307754703123  iter:  5
    label KQLearning, Reward 15: -120.269, Len(game): 76, Training Time: 0.794s, Prediction Time: 2.276s
    Computed global error Bellman mean:  0.00817506846491195  iter:  5
    Computed global error Bellman mean:  0.022806644167262444  iter:  5
    Computed global error Bellman mean:  0.08965084354436514  iter:  5
    label KQLearning, Reward 16: -93.561, Len(game): 64, Training Time: 0.896s, Prediction Time: 2.516s
    Computed global error Bellman mean:  0.2859106155482297  iter:  5
    Computed global error Bellman mean:  0.000860394198221856  iter:  5
    Computed global error Bellman mean:  0.03149992370752994  iter:  5
    Computed global error Bellman mean:  0.12055260590243212  iter:  5
    label KQLearning, Reward 17: -122.599, Len(game): 73, Training Time: 1.053s, Prediction Time: 2.819s
    Computed global error Bellman mean:  1.83044586710146e-08  iter:  3
    Computed global error Bellman mean:  0.0007470566333216264  iter:  5
    Computed global error Bellman mean:  0.0774641048007587  iter:  5
    label KQLearning, Reward 18: -134.459, Len(game): 80, Training Time: 1.150s, Prediction Time: 3.157s
    Computed global error Bellman mean:  0.10731042912237276  iter:  5
    Computed global error Bellman mean:  0.02610362985333355  iter:  5
    Computed global error Bellman mean:  0.002993378342766674  iter:  5
    label KQLearning, Reward 19: -138.905, Len(game): 85, Training Time: 1.266s, Prediction Time: 3.530s
    Computed global error Bellman mean:  0.23419570223319502  iter:  5
    Computed global error Bellman mean:  0.002914609285175714  iter:  5
    Computed global error Bellman mean:  0.02236859336212139  iter:  5
    label KQLearning, Reward 20: -129.713, Len(game): 81, Training Time: 1.383s, Prediction Time: 3.903s
    Computed global error Bellman mean:  1.6014807838326037e-08  iter:  3
    Computed global error Bellman mean:  0.0003924388587439735  iter:  5
    label KQLearning, Reward 21: -203.197, Len(game): 84, Training Time: 1.456s, Prediction Time: 4.301s
    Computed global error Bellman mean:  0.05100331124491497  iter:  5
    Computed global error Bellman mean:  0.03186978686701474  iter:  5
    label KQLearning, Reward 22: -133.069, Len(game): 70, Training Time: 1.521s, Prediction Time: 4.664s
    Computed global error Bellman mean:  0.003950938087914362  iter:  5
    Computed global error Bellman mean:  0.005001478837385489  iter:  5
    label KQLearning, Reward 23: -146.560, Len(game): 75, Training Time: 1.589s, Prediction Time: 5.061s
    Computed global error Bellman mean:  6.546387785609096e-08  iter:  4
    label KQLearning, Reward 24: -123.907, Len(game): 74, Training Time: 1.613s, Prediction Time: 5.476s
    Computed global error Bellman mean:  0.10879058808361394  iter:  5
    Computed global error Bellman mean:  0.07290738721268956  iter:  5
    label KQLearning, Reward 25: -133.527, Len(game): 60, Training Time: 1.661s, Prediction Time: 5.805s
    Computed global error Bellman mean:  0.016974743021143903  iter:  5
    Computed global error Bellman mean:  0.04238259138373758  iter:  5
    Computed global error Bellman mean:  0.05721483609996408  iter:  0
    label KQLearning, Reward 26: -101.744, Len(game): 78, Training Time: 1.737s, Prediction Time: 6.262s
    Computed global error Bellman mean:  0.37744042431767055  iter:  5
    Computed global error Bellman mean:  0.014116144514556426  iter:  5
    Computed global error Bellman mean:  0.3204706232566178  iter:  5
    label KQLearning, Reward 27: -97.095, Len(game): 55, Training Time: 1.811s, Prediction Time: 6.610s
    Computed global error Bellman mean:  0.2453438557181358  iter:  5
    Computed global error Bellman mean:  0.014108517998810804  iter:  4
    Computed global error Bellman mean:  0.24250058526812496  iter:  5
    Computed global error Bellman mean:  0.06446487764621492  iter:  5
    label KQLearning, Reward 28: -131.495, Len(game): 72, Training Time: 1.929s, Prediction Time: 7.061s
    Computed global error Bellman mean:  0.012575581683966493  iter:  5
    Computed global error Bellman mean:  0.14278238596181772  iter:  5
    Computed global error Bellman mean:  0.019633335268757077  iter:  5
    Computed global error Bellman mean:  0.00040599195590076536  iter:  5
    label KQLearning, Reward 29: -174.707, Len(game): 71, Training Time: 2.054s, Prediction Time: 7.519s
    Computed global error Bellman mean:  0.04208379596448945  iter:  5
    Computed global error Bellman mean:  0.023102606487868323  iter:  5
    Computed global error Bellman mean:  0.007103893949980033  iter:  5
    Computed global error Bellman mean:  0.04317234861216545  iter:  5
    label KQLearning, Reward 30: -124.773, Len(game): 85, Training Time: 2.212s, Prediction Time: 8.081s
    Computed global error Bellman mean:  0.12058780308266578  iter:  5
    Computed global error Bellman mean:  0.00020293306826099392  iter:  4
    Computed global error Bellman mean:  0.08392629178733378  iter:  5
    label KQLearning, Reward 31: -76.054, Len(game): 92, Training Time: 2.333s, Prediction Time: 8.709s
    Computed global error Bellman mean:  0.09828368033783424  iter:  5
    Computed global error Bellman mean:  0.037782878816039166  iter:  5
    Computed global error Bellman mean:  0.06029686964513399  iter:  5
    label KQLearning, Reward 32: -137.634, Len(game): 65, Training Time: 2.447s, Prediction Time: 9.179s
    Computed global error Bellman mean:  0.5136728081973398  iter:  5
    Computed global error Bellman mean:  0.0009918043434892655  iter:  5
    Computed global error Bellman mean:  0.01235009238282136  iter:  5
    Computed global error Bellman mean:  0.180617942613106  iter:  5
    label KQLearning, Reward 33: -86.094, Len(game): 53, Training Time: 2.581s, Prediction Time: 9.575s
    Computed global error Bellman mean:  1.2584718080871243e-08  iter:  5
    Computed global error Bellman mean:  0.0018476014637430129  iter:  5
    Computed global error Bellman mean:  0.056234339680268246  iter:  5
    label KQLearning, Reward 34: -173.360, Len(game): 94, Training Time: 2.684s, Prediction Time: 10.282s
    Computed global error Bellman mean:  7.844241938522363e-08  iter:  5
    Computed global error Bellman mean:  0.00043903664548086436  iter:  5
    label KQLearning, Reward 35: -102.194, Len(game): 75, Training Time: 2.740s, Prediction Time: 10.868s
    Computed global error Bellman mean:  0.19120189207201235  iter:  5
    Computed global error Bellman mean:  0.0829391213649813  iter:  5
    label KQLearning, Reward 36: -108.647, Len(game): 52, Training Time: 2.784s, Prediction Time: 11.290s
    Computed global error Bellman mean:  9.700896319046813e-08  iter:  4
    Computed global error Bellman mean:  0.03956048601856398  iter:  5
    label KQLearning, Reward 37: -196.977, Len(game): 84, Training Time: 2.841s, Prediction Time: 11.964s
    Computed global error Bellman mean:  0.05229476501481367  iter:  5
    Computed global error Bellman mean:  0.009654067140801232  iter:  3
    Computed global error Bellman mean:  0.028373453359756315  iter:  5
    label KQLearning, Reward 38: -129.452, Len(game): 85, Training Time: 2.944s, Prediction Time: 12.690s
    Computed global error Bellman mean:  0.019472858023542202  iter:  5
    Computed global error Bellman mean:  0.0011164661361261203  iter:  5
    Computed global error Bellman mean:  0.0006816488909307312  iter:  5
    label KQLearning, Reward 39: -117.806, Len(game): 54, Training Time: 3.040s, Prediction Time: 13.161s
    Computed global error Bellman mean:  0.015399148714532362  iter:  5
    Computed global error Bellman mean:  0.008808021489861489  iter:  4
    label KQLearning, Reward 40: -124.378, Len(game): 140, Training Time: 3.254s, Prediction Time: 14.373s
    Computed global error Bellman mean:  0.03609878105199148  iter:  5
    Computed global error Bellman mean:  0.005822428587485815  iter:  5
    label KQLearning, Reward 41: -164.887, Len(game): 82, Training Time: 3.337s, Prediction Time: 15.120s
    Computed global error Bellman mean:  0.26996843196918135  iter:  5
    Computed global error Bellman mean:  0.06656820291052908  iter:  5
    label KQLearning, Reward 42: -120.646, Len(game): 73, Training Time: 3.404s, Prediction Time: 15.806s
    Computed global error Bellman mean:  0.017139636495967635  iter:  5
    Computed global error Bellman mean:  0.022196231913062057  iter:  5
    Computed global error Bellman mean:  0.006494194517247013  iter:  5
    label KQLearning, Reward 43: -104.596, Len(game): 55, Training Time: 3.484s, Prediction Time: 16.338s
    Computed global error Bellman mean:  2.3037905815209953e-08  iter:  5
    Computed global error Bellman mean:  0.00020246571991301376  iter:  5
    label KQLearning, Reward 44: -142.510, Len(game): 67, Training Time: 3.553s, Prediction Time: 16.993s
    Computed global error Bellman mean:  5.6706410073423916e-08  iter:  4
    label KQLearning, Reward 45: -194.903, Len(game): 105, Training Time: 3.591s, Prediction Time: 18.028s
    Computed global error Bellman mean:  0.05119886950137757  iter:  5
    Computed global error Bellman mean:  0.01042185810882918  iter:  5
    label KQLearning, Reward 46: -128.087, Len(game): 59, Training Time: 3.639s, Prediction Time: 18.634s
    Computed global error Bellman mean:  2.5003558635979185e-08  iter:  3
    Computed global error Bellman mean:  0.00014307256291403762  iter:  5
    label KQLearning, Reward 47: -177.993, Len(game): 59, Training Time: 3.687s, Prediction Time: 19.258s
    Computed global error Bellman mean:  1.48014213633436e-07  iter:  4
    label KQLearning, Reward 48: -137.158, Len(game): 63, Training Time: 3.711s, Prediction Time: 19.938s
    Computed global error Bellman mean:  3.0484152879067805e-08  iter:  4
    label KQLearning, Reward 49: -134.247, Len(game): 96, Training Time: 3.750s, Prediction Time: 20.964s
    Computed global error Bellman mean:  0.07536368772118596  iter:  5
    Computed global error Bellman mean:  0.03990564935632911  iter:  5
    label KQLearning, Reward 50: -144.798, Len(game): 79, Training Time: 3.828s, Prediction Time: 21.834s
    Computed global error Bellman mean:  0.0017133615512324154  iter:  5
    Computed global error Bellman mean:  0.0007265210284082993  iter:  4
    label KQLearning, Reward 51: -160.618, Len(game): 72, Training Time: 3.899s, Prediction Time: 22.647s
    Computed global error Bellman mean:  8.044588339347824e-08  iter:  5
    label KQLearning, Reward 52: -131.064, Len(game): 78, Training Time: 3.932s, Prediction Time: 23.539s
    Computed global error Bellman mean:  3.605390685740978e-08  iter:  3
    label KQLearning, Reward 53: -100.629, Len(game): 64, Training Time: 3.951s, Prediction Time: 24.297s
    Computed global error Bellman mean:  0.2004441542827797  iter:  5
    Computed global error Bellman mean:  0.1658095968517145  iter:  5
    label KQLearning, Reward 54: -126.926, Len(game): 75, Training Time: 4.016s, Prediction Time: 25.197s
    Computed global error Bellman mean:  0.09288273713150447  iter:  5
    Computed global error Bellman mean:  0.11327606177504151  iter:  5
    Computed global error Bellman mean:  0.04359814015673651  iter:  5
    label KQLearning, Reward 55: -81.539, Len(game): 52, Training Time: 4.101s, Prediction Time: 25.873s
    Computed global error Bellman mean:  0.0005143734323825142  iter:  5
    Computed global error Bellman mean:  0.04430338207013421  iter:  5
    Computed global error Bellman mean:  0.0011049728149696504  iter:  5
    label KQLearning, Reward 56: -92.505, Len(game): 69, Training Time: 4.187s, Prediction Time: 26.721s
    Computed global error Bellman mean:  0.10746479195065407  iter:  5
    Computed global error Bellman mean:  0.0005125659828415754  iter:  5
    Computed global error Bellman mean:  0.10009934921669968  iter:  5
    label KQLearning, Reward 57: -139.251, Len(game): 85, Training Time: 4.319s, Prediction Time: 27.778s
    Computed global error Bellman mean:  9.029182978766732e-07  iter:  5
    label KQLearning, Reward 58: -184.628, Len(game): 183, Training Time: 4.449s, Prediction Time: 30.099s
    Computed global error Bellman mean:  0.2589339881900597  iter:  5
    Computed global error Bellman mean:  0.03163800796125399  iter:  5
    label KQLearning, Reward 59: -110.732, Len(game): 84, Training Time: 4.537s, Prediction Time: 31.219s
    Computed global error Bellman mean:  1.8859149875067765e-06  iter:  5
    Computed global error Bellman mean:  0.006564670144132386  iter:  5
    label KQLearning, Reward 60: -22.853, Len(game): 83, Training Time: 4.623s, Prediction Time: 32.330s
    Computed global error Bellman mean:  0.01863704602424488  iter:  5
    Computed global error Bellman mean:  0.04687310605471646  iter:  2
    label KQLearning, Reward 61: -54.676, Len(game): 79, Training Time: 4.686s, Prediction Time: 33.436s
    Computed global error Bellman mean:  2.8455440473054395e-08  iter:  2
    label KQLearning, Reward 62: -125.842, Len(game): 54, Training Time: 4.701s, Prediction Time: 34.210s
    Computed global error Bellman mean:  0.0111061336365271  iter:  5
    Computed global error Bellman mean:  0.044638940747805854  iter:  1
    label KQLearning, Reward 63: -122.321, Len(game): 81, Training Time: 4.762s, Prediction Time: 35.336s
    Computed global error Bellman mean:  1.3872605528209014e-08  iter:  4
    label KQLearning, Reward 64: -146.616, Len(game): 80, Training Time: 4.793s, Prediction Time: 36.493s
    Computed global error Bellman mean:  2.3803313065473975e-08  iter:  3
    label KQLearning, Reward 65: -94.491, Len(game): 81, Training Time: 4.817s, Prediction Time: 37.685s
    Computed global error Bellman mean:  0.32549571653429427  iter:  5
    Computed global error Bellman mean:  0.08911715330411236  iter:  5
    label KQLearning, Reward 66: -180.841, Len(game): 63, Training Time: 4.868s, Prediction Time: 38.608s
    Computed global error Bellman mean:  4.000977550183407e-08  iter:  5
    Computed global error Bellman mean:  0.004253662822167722  iter:  5
    label KQLearning, Reward 67: -137.975, Len(game): 97, Training Time: 4.945s, Prediction Time: 40.080s
    Computed global error Bellman mean:  2.2676408195643205e-08  iter:  2
    label KQLearning, Reward 68: -291.425, Len(game): 89, Training Time: 4.971s, Prediction Time: 41.437s
    Computed global error Bellman mean:  6.718595003287946e-08  iter:  5
    label KQLearning, Reward 69: -125.370, Len(game): 92, Training Time: 5.014s, Prediction Time: 42.872s
    Computed global error Bellman mean:  2.7517433216756364e-07  iter:  5
    label KQLearning, Reward 70: -56.264, Len(game): 85, Training Time: 5.050s, Prediction Time: 44.178s
    Computed global error Bellman mean:  6.962977897463758e-08  iter:  5
    label KQLearning, Reward 71: -344.401, Len(game): 59, Training Time: 5.075s, Prediction Time: 45.118s
    Computed global error Bellman mean:  1.0711693531115362e-07  iter:  5
    label KQLearning, Reward 72: -112.351, Len(game): 84, Training Time: 5.109s, Prediction Time: 46.423s
    Computed global error Bellman mean:  1.6126971528758742e-07  iter:  3
    label KQLearning, Reward 73: -335.432, Len(game): 77, Training Time: 5.134s, Prediction Time: 47.659s
    Computed global error Bellman mean:  0.024788042740105546  iter:  5
    Computed global error Bellman mean:  0.06001539848507912  iter:  0
    label KQLearning, Reward 74: -440.919, Len(game): 131, Training Time: 5.235s, Prediction Time: 49.771s
    Computed global error Bellman mean:  1.2423309922889423e-07  iter:  5
    label KQLearning, Reward 75: -504.080, Len(game): 97, Training Time: 5.281s, Prediction Time: 51.371s
    Computed global error Bellman mean:  0.00036428533820603365  iter:  5
    label KQLearning, Reward 76: -390.916, Len(game): 149, Training Time: 5.367s, Prediction Time: 53.852s
    Computed global error Bellman mean:  3.0315014591201596e-08  iter:  5
    label KQLearning, Reward 77: -128.353, Len(game): 72, Training Time: 5.395s, Prediction Time: 55.087s
    Computed global error Bellman mean:  2.403641870596385e-07  iter:  4
    label KQLearning, Reward 78: -59.733, Len(game): 80, Training Time: 5.421s, Prediction Time: 56.456s
    Computed global error Bellman mean:  0.015106520761554639  iter:  5
    Computed global error Bellman mean:  0.048738974110085945  iter:  0
    label KQLearning, Reward 79: -235.885, Len(game): 106, Training Time: 5.491s, Prediction Time: 58.292s
    Computed global error Bellman mean:  0.09022535538969577  iter:  5
    Computed global error Bellman mean:  0.04019099592898272  iter:  5
    label KQLearning, Reward 80: -73.021, Len(game): 52, Training Time: 5.532s, Prediction Time: 59.227s
    Computed global error Bellman mean:  2.947203161549867e-07  iter:  5
    Computed global error Bellman mean:  0.006143285630891342  iter:  3
    label KQLearning, Reward 81: -307.058, Len(game): 124, Training Time: 5.611s, Prediction Time: 61.439s
    Computed global error Bellman mean:  4.629798574282561e-07  iter:  5
    label KQLearning, Reward 82: -25.641, Len(game): 132, Training Time: 5.678s, Prediction Time: 63.834s
    Computed global error Bellman mean:  2.378586373122265e-07  iter:  5
    label KQLearning, Reward 83: -57.890, Len(game): 71, Training Time: 5.706s, Prediction Time: 65.157s
    Computed global error Bellman mean:  1.7407564755874163  iter:  5
    Computed global error Bellman mean:  0.017441600045207832  iter:  5
    label KQLearning, Reward 84: -102.269, Len(game): 80, Training Time: 5.785s, Prediction Time: 66.657s
    Computed global error Bellman mean:  8.440503129960384e-08  iter:  5
    Computed global error Bellman mean:  0.017441371956869352  iter:  1
    label KQLearning, Reward 85: -118.463, Len(game): 110, Training Time: 5.860s, Prediction Time: 68.724s
    Computed global error Bellman mean:  0.012457805780351211  iter:  5
    Computed global error Bellman mean:  0.014751384531825552  iter:  4
    label KQLearning, Reward 86: -110.380, Len(game): 129, Training Time: 6.043s, Prediction Time: 71.191s
    Computed global error Bellman mean:  0.03129491033117738  iter:  5
    Computed global error Bellman mean:  0.022611835804580472  iter:  4
    label KQLearning, Reward 87: -354.707, Len(game): 192, Training Time: 6.423s, Prediction Time: 74.929s
    Computed global error Bellman mean:  1.0985266453257158e-07  iter:  5
    label KQLearning, Reward 88: -85.678, Len(game): 110, Training Time: 6.477s, Prediction Time: 77.135s
    Computed global error Bellman mean:  7.63840391769728e-07  iter:  5
    label KQLearning, Reward 89: -163.239, Len(game): 188, Training Time: 6.617s, Prediction Time: 80.950s
    Computed global error Bellman mean:  1.7936836930029392e-07  iter:  5
    label KQLearning, Reward 90: -51.148, Len(game): 84, Training Time: 6.656s, Prediction Time: 82.703s
    Computed global error Bellman mean:  4.589197083895158e-07  iter:  5
    label KQLearning, Reward 91: -422.452, Len(game): 125, Training Time: 6.713s, Prediction Time: 85.309s
    Computed global error Bellman mean:  0.007104244655817143  iter:  5
    label KQLearning, Reward 92: -68.175, Len(game): 78, Training Time: 6.747s, Prediction Time: 86.972s
    Computed global error Bellman mean:  2.9076078389845777e-07  iter:  4
    label KQLearning, Reward 93: -120.549, Len(game): 105, Training Time: 6.788s, Prediction Time: 89.213s
    Computed global error Bellman mean:  0.754567174156408  iter:  5
    Computed global error Bellman mean:  0.03965196013333954  iter:  5
    label KQLearning, Reward 94: -108.527, Len(game): 117, Training Time: 6.933s, Prediction Time: 91.759s
    Computed global error Bellman mean:  2.395615999451938e-07  iter:  5
    Computed global error Bellman mean:  0.014594064069726452  iter:  3
    label KQLearning, Reward 95: -83.180, Len(game): 110, Training Time: 7.051s, Prediction Time: 94.168s
    Computed global error Bellman mean:  2.981905245218355e-08  iter:  5
    Computed global error Bellman mean:  0.014594064069726452  iter:  0
    label KQLearning, Reward 96: -143.282, Len(game): 82, Training Time: 7.108s, Prediction Time: 96.003s
    Computed global error Bellman mean:  0.007248935573219148  iter:  5
    label KQLearning, Reward 97: -169.214, Len(game): 185, Training Time: 7.243s, Prediction Time: 100.149s
    Computed global error Bellman mean:  7.16126345106912e-07  iter:  5
    label KQLearning, Reward 98: -65.753, Len(game): 152, Training Time: 7.323s, Prediction Time: 103.635s
    Computed global error Bellman mean:  0.019225974127145058  iter:  5
    Computed global error Bellman mean:  0.023936736444295056  iter:  5
    label KQLearning, Reward 99: -332.120, Len(game): 125, Training Time: 7.472s, Prediction Time: 106.541s
    1
    label PPOAgent, Reward 0: -154.002, Len(game): 91, Training Time: 0.026s, Prediction Time: 0.026s
    label PPOAgent, Reward 1: -127.024, Len(game): 74, Training Time: 0.049s, Prediction Time: 0.049s
    label PPOAgent, Reward 2: -191.153, Len(game): 67, Training Time: 0.070s, Prediction Time: 0.070s
    label PPOAgent, Reward 3: -124.929, Len(game): 101, Training Time: 0.101s, Prediction Time: 0.101s
    label PPOAgent, Reward 4: -242.717, Len(game): 96, Training Time: 0.130s, Prediction Time: 0.130s
    label PPOAgent, Reward 5: -307.849, Len(game): 103, Training Time: 0.161s, Prediction Time: 0.161s
    label PPOAgent, Reward 6: -416.849, Len(game): 115, Training Time: 0.195s, Prediction Time: 0.195s
    label PPOAgent, Reward 7: -108.819, Len(game): 74, Training Time: 0.214s, Prediction Time: 0.214s
    label PPOAgent, Reward 8: -84.504, Len(game): 77, Training Time: 0.234s, Prediction Time: 0.234s
    label PPOAgent, Reward 9: -103.856, Len(game): 83, Training Time: 0.255s, Prediction Time: 0.255s
    label PPOAgent, Reward 10: -259.775, Len(game): 102, Training Time: 0.281s, Prediction Time: 0.281s
    label PPOAgent, Reward 11: -158.886, Len(game): 96, Training Time: 0.305s, Prediction Time: 0.305s
    label PPOAgent, Reward 12: -445.462, Len(game): 107, Training Time: 0.332s, Prediction Time: 0.332s
    label PPOAgent, Reward 13: -238.863, Len(game): 71, Training Time: 0.393s, Prediction Time: 0.393s
    label PPOAgent, Reward 14: -124.438, Len(game): 100, Training Time: 0.423s, Prediction Time: 0.423s
    label PPOAgent, Reward 15: -139.580, Len(game): 62, Training Time: 0.443s, Prediction Time: 0.443s
    label PPOAgent, Reward 16: -80.564, Len(game): 95, Training Time: 0.472s, Prediction Time: 0.472s
    label PPOAgent, Reward 17: -82.568, Len(game): 74, Training Time: 0.495s, Prediction Time: 0.495s
    label PPOAgent, Reward 18: -95.051, Len(game): 81, Training Time: 0.520s, Prediction Time: 0.520s
    label PPOAgent, Reward 19: -258.006, Len(game): 90, Training Time: 0.549s, Prediction Time: 0.549s
    label PPOAgent, Reward 20: -241.132, Len(game): 105, Training Time: 0.575s, Prediction Time: 0.575s
    label PPOAgent, Reward 21: -144.110, Len(game): 77, Training Time: 0.595s, Prediction Time: 0.595s
    label PPOAgent, Reward 22: -103.296, Len(game): 82, Training Time: 0.616s, Prediction Time: 0.616s
    label PPOAgent, Reward 23: -191.619, Len(game): 121, Training Time: 0.647s, Prediction Time: 0.647s
    label PPOAgent, Reward 24: -300.092, Len(game): 77, Training Time: 0.667s, Prediction Time: 0.667s
    label PPOAgent, Reward 25: -149.666, Len(game): 77, Training Time: 0.686s, Prediction Time: 0.686s
    label PPOAgent, Reward 26: -106.092, Len(game): 81, Training Time: 0.706s, Prediction Time: 0.706s
    label PPOAgent, Reward 27: -124.350, Len(game): 73, Training Time: 0.764s, Prediction Time: 0.764s
    label PPOAgent, Reward 28: -232.704, Len(game): 83, Training Time: 0.789s, Prediction Time: 0.789s
    label PPOAgent, Reward 29: -73.539, Len(game): 107, Training Time: 0.821s, Prediction Time: 0.821s
    label PPOAgent, Reward 30: -365.718, Len(game): 118, Training Time: 0.855s, Prediction Time: 0.855s
    label PPOAgent, Reward 31: -124.252, Len(game): 74, Training Time: 0.877s, Prediction Time: 0.877s
    label PPOAgent, Reward 32: -116.795, Len(game): 78, Training Time: 0.901s, Prediction Time: 0.901s
    label PPOAgent, Reward 33: 51.603, Len(game): 125, Training Time: 0.936s, Prediction Time: 0.936s
    label PPOAgent, Reward 34: -194.867, Len(game): 86, Training Time: 0.957s, Prediction Time: 0.957s
    label PPOAgent, Reward 35: -371.477, Len(game): 82, Training Time: 0.979s, Prediction Time: 0.979s
    label PPOAgent, Reward 36: -378.084, Len(game): 79, Training Time: 0.999s, Prediction Time: 0.999s
    label PPOAgent, Reward 37: -94.966, Len(game): 89, Training Time: 1.022s, Prediction Time: 1.022s
    label PPOAgent, Reward 38: -134.562, Len(game): 107, Training Time: 1.049s, Prediction Time: 1.049s
    label PPOAgent, Reward 39: -109.103, Len(game): 61, Training Time: 1.064s, Prediction Time: 1.064s
    label PPOAgent, Reward 40: -212.743, Len(game): 66, Training Time: 1.118s, Prediction Time: 1.118s
    label PPOAgent, Reward 41: -119.781, Len(game): 72, Training Time: 1.139s, Prediction Time: 1.139s
    label PPOAgent, Reward 42: -132.433, Len(game): 89, Training Time: 1.166s, Prediction Time: 1.166s
    label PPOAgent, Reward 43: -11.774, Len(game): 104, Training Time: 1.197s, Prediction Time: 1.197s
    label PPOAgent, Reward 44: -143.701, Len(game): 91, Training Time: 1.226s, Prediction Time: 1.226s
    label PPOAgent, Reward 45: -293.914, Len(game): 89, Training Time: 1.253s, Prediction Time: 1.253s
    label PPOAgent, Reward 46: -150.273, Len(game): 75, Training Time: 1.275s, Prediction Time: 1.275s
    label PPOAgent, Reward 47: -74.135, Len(game): 64, Training Time: 1.293s, Prediction Time: 1.293s
    label PPOAgent, Reward 48: -190.206, Len(game): 62, Training Time: 1.309s, Prediction Time: 1.309s
    label PPOAgent, Reward 49: -58.671, Len(game): 64, Training Time: 1.326s, Prediction Time: 1.326s
    label PPOAgent, Reward 50: -113.440, Len(game): 62, Training Time: 1.341s, Prediction Time: 1.341s
    label PPOAgent, Reward 51: -394.934, Len(game): 99, Training Time: 1.366s, Prediction Time: 1.366s
    label PPOAgent, Reward 52: -173.129, Len(game): 103, Training Time: 1.392s, Prediction Time: 1.392s
    label PPOAgent, Reward 53: -79.432, Len(game): 59, Training Time: 1.407s, Prediction Time: 1.407s
    label PPOAgent, Reward 54: -129.777, Len(game): 66, Training Time: 1.425s, Prediction Time: 1.425s
    label PPOAgent, Reward 55: -140.662, Len(game): 74, Training Time: 1.443s, Prediction Time: 1.443s
    label PPOAgent, Reward 56: -115.539, Len(game): 68, Training Time: 1.499s, Prediction Time: 1.499s
    label PPOAgent, Reward 57: -112.054, Len(game): 65, Training Time: 1.518s, Prediction Time: 1.518s
    label PPOAgent, Reward 58: -306.989, Len(game): 94, Training Time: 1.545s, Prediction Time: 1.545s
    label PPOAgent, Reward 59: -19.252, Len(game): 94, Training Time: 1.573s, Prediction Time: 1.573s
    label PPOAgent, Reward 60: -101.722, Len(game): 75, Training Time: 1.597s, Prediction Time: 1.597s
    label PPOAgent, Reward 61: -137.490, Len(game): 55, Training Time: 1.615s, Prediction Time: 1.615s
    label PPOAgent, Reward 62: -96.619, Len(game): 58, Training Time: 1.633s, Prediction Time: 1.633s
    label PPOAgent, Reward 63: -110.281, Len(game): 68, Training Time: 1.654s, Prediction Time: 1.654s
    label PPOAgent, Reward 64: -324.520, Len(game): 87, Training Time: 1.678s, Prediction Time: 1.678s
    label PPOAgent, Reward 65: -115.957, Len(game): 94, Training Time: 1.701s, Prediction Time: 1.701s
    label PPOAgent, Reward 66: -140.492, Len(game): 77, Training Time: 1.721s, Prediction Time: 1.721s
    label PPOAgent, Reward 67: -86.799, Len(game): 54, Training Time: 1.734s, Prediction Time: 1.734s
    label PPOAgent, Reward 68: -262.537, Len(game): 96, Training Time: 1.758s, Prediction Time: 1.758s
    label PPOAgent, Reward 69: -109.956, Len(game): 95, Training Time: 1.782s, Prediction Time: 1.782s
    label PPOAgent, Reward 70: -327.549, Len(game): 115, Training Time: 1.811s, Prediction Time: 1.811s
    label PPOAgent, Reward 71: -157.178, Len(game): 106, Training Time: 1.877s, Prediction Time: 1.877s
    label PPOAgent, Reward 72: -202.787, Len(game): 82, Training Time: 1.902s, Prediction Time: 1.902s
    label PPOAgent, Reward 73: -93.070, Len(game): 60, Training Time: 1.920s, Prediction Time: 1.920s
    label PPOAgent, Reward 74: -125.885, Len(game): 63, Training Time: 1.939s, Prediction Time: 1.939s
    label PPOAgent, Reward 75: -84.186, Len(game): 65, Training Time: 1.958s, Prediction Time: 1.958s
    label PPOAgent, Reward 76: -140.366, Len(game): 70, Training Time: 1.979s, Prediction Time: 1.979s
    label PPOAgent, Reward 77: -246.796, Len(game): 105, Training Time: 2.012s, Prediction Time: 2.012s
    label PPOAgent, Reward 78: -76.461, Len(game): 64, Training Time: 2.032s, Prediction Time: 2.032s
    label PPOAgent, Reward 79: -218.941, Len(game): 112, Training Time: 2.061s, Prediction Time: 2.061s
    label PPOAgent, Reward 80: -152.219, Len(game): 79, Training Time: 2.080s, Prediction Time: 2.080s
    label PPOAgent, Reward 81: -85.561, Len(game): 58, Training Time: 2.095s, Prediction Time: 2.095s
    label PPOAgent, Reward 82: -345.654, Len(game): 93, Training Time: 2.119s, Prediction Time: 2.119s
    label PPOAgent, Reward 83: -263.042, Len(game): 102, Training Time: 2.145s, Prediction Time: 2.145s
    label PPOAgent, Reward 84: -113.060, Len(game): 76, Training Time: 2.165s, Prediction Time: 2.165s
    label PPOAgent, Reward 85: -119.133, Len(game): 61, Training Time: 2.180s, Prediction Time: 2.180s
    label PPOAgent, Reward 86: -111.076, Len(game): 75, Training Time: 2.237s, Prediction Time: 2.237s
    label PPOAgent, Reward 87: -197.642, Len(game): 86, Training Time: 2.263s, Prediction Time: 2.263s
    label PPOAgent, Reward 88: -111.857, Len(game): 67, Training Time: 2.283s, Prediction Time: 2.283s
    label PPOAgent, Reward 89: -72.098, Len(game): 91, Training Time: 2.311s, Prediction Time: 2.311s
    label PPOAgent, Reward 90: -94.863, Len(game): 88, Training Time: 2.337s, Prediction Time: 2.337s
    label PPOAgent, Reward 91: -102.801, Len(game): 64, Training Time: 2.357s, Prediction Time: 2.357s
    label PPOAgent, Reward 92: -109.546, Len(game): 59, Training Time: 2.376s, Prediction Time: 2.376s
    label PPOAgent, Reward 93: -146.651, Len(game): 75, Training Time: 2.398s, Prediction Time: 2.398s
    label PPOAgent, Reward 94: -69.519, Len(game): 61, Training Time: 2.414s, Prediction Time: 2.414s
    label PPOAgent, Reward 95: -130.846, Len(game): 80, Training Time: 2.434s, Prediction Time: 2.434s
    label PPOAgent, Reward 96: -87.744, Len(game): 61, Training Time: 2.451s, Prediction Time: 2.451s
    label PPOAgent, Reward 97: -176.966, Len(game): 104, Training Time: 2.476s, Prediction Time: 2.476s
    label PPOAgent, Reward 98: -129.096, Len(game): 106, Training Time: 2.503s, Prediction Time: 2.503s
    label PPOAgent, Reward 99: -90.169, Len(game): 73, Training Time: 2.521s, Prediction Time: 2.521s
    label Controller-based, Reward 0: -140.004, Len(game): 69, Training Time: 0.002s, Prediction Time: 0.002s
    label Controller-based, Reward 1: -133.286, Len(game): 78, Training Time: 0.004s, Prediction Time: 0.004s
    label Controller-based, Reward 2: -74.503, Len(game): 54, Training Time: 0.014s, Prediction Time: 0.004s
    label Controller-based, Reward 3: -115.268, Len(game): 53, Training Time: 0.024s, Prediction Time: 0.006s
    label Controller-based, Reward 4: -137.267, Len(game): 84, Training Time: 0.033s, Prediction Time: 0.009s
    label Controller-based, Reward 5: -3.014, Len(game): 65, Training Time: 0.042s, Prediction Time: 0.012s
    label Controller-based, Reward 6: -283.395, Len(game): 79, Training Time: 0.054s, Prediction Time: 0.014s
    label Controller-based, Reward 7: -52.856, Len(game): 57, Training Time: 0.064s, Prediction Time: 0.016s
    label Controller-based, Reward 8: -131.374, Len(game): 57, Training Time: 0.075s, Prediction Time: 0.019s
    label Controller-based, Reward 9: -117.601, Len(game): 65, Training Time: 0.088s, Prediction Time: 0.021s
    label Controller-based, Reward 10: -146.089, Len(game): 64, Training Time: 0.098s, Prediction Time: 0.023s
    label Controller-based, Reward 11: -159.764, Len(game): 65, Training Time: 0.109s, Prediction Time: 0.027s
    label Controller-based, Reward 12: -159.547, Len(game): 67, Training Time: 0.120s, Prediction Time: 0.030s
    label Controller-based, Reward 13: -148.711, Len(game): 60, Training Time: 0.134s, Prediction Time: 0.032s
    label Controller-based, Reward 14: -99.802, Len(game): 88, Training Time: 0.147s, Prediction Time: 0.035s
    label Controller-based, Reward 15: -77.539, Len(game): 86, Training Time: 0.160s, Prediction Time: 0.039s
    label Controller-based, Reward 16: -83.987, Len(game): 76, Training Time: 0.173s, Prediction Time: 0.041s
    label Controller-based, Reward 17: -95.219, Len(game): 76, Training Time: 0.186s, Prediction Time: 0.043s
    label Controller-based, Reward 18: -16.084, Len(game): 91, Training Time: 0.200s, Prediction Time: 0.046s
    label Controller-based, Reward 19: -34.542, Len(game): 66, Training Time: 0.215s, Prediction Time: 0.048s
    label Controller-based, Reward 20: -202.657, Len(game): 102, Training Time: 0.229s, Prediction Time: 0.051s
    label Controller-based, Reward 21: -337.431, Len(game): 133, Training Time: 0.244s, Prediction Time: 0.054s
    label Controller-based, Reward 22: -217.754, Len(game): 90, Training Time: 0.258s, Prediction Time: 0.057s
    label Controller-based, Reward 23: -131.460, Len(game): 104, Training Time: 0.272s, Prediction Time: 0.060s
    label Controller-based, Reward 24: -110.646, Len(game): 54, Training Time: 0.286s, Prediction Time: 0.061s
    label Controller-based, Reward 25: -137.962, Len(game): 70, Training Time: 0.299s, Prediction Time: 0.064s
    label Controller-based, Reward 26: -345.138, Len(game): 86, Training Time: 0.313s, Prediction Time: 0.066s
    label Controller-based, Reward 27: -205.343, Len(game): 72, Training Time: 0.326s, Prediction Time: 0.068s
    label Controller-based, Reward 28: -247.341, Len(game): 96, Training Time: 0.340s, Prediction Time: 0.072s
    label Controller-based, Reward 29: -288.672, Len(game): 112, Training Time: 0.354s, Prediction Time: 0.074s
    label Controller-based, Reward 30: -52.723, Len(game): 88, Training Time: 0.368s, Prediction Time: 0.076s
    label Controller-based, Reward 31: -191.440, Len(game): 92, Training Time: 0.382s, Prediction Time: 0.079s
    label Controller-based, Reward 32: -45.545, Len(game): 59, Training Time: 0.396s, Prediction Time: 0.080s
    label Controller-based, Reward 33: 14.740, Len(game): 65, Training Time: 0.408s, Prediction Time: 0.083s
    label Controller-based, Reward 34: -159.346, Len(game): 80, Training Time: 0.423s, Prediction Time: 0.086s
    label Controller-based, Reward 35: -46.395, Len(game): 80, Training Time: 0.436s, Prediction Time: 0.088s
    label Controller-based, Reward 36: -9.743, Len(game): 71, Training Time: 0.451s, Prediction Time: 0.090s
    label Controller-based, Reward 37: -13.123, Len(game): 94, Training Time: 0.465s, Prediction Time: 0.092s
    label Controller-based, Reward 38: -53.729, Len(game): 95, Training Time: 0.480s, Prediction Time: 0.095s
    label Controller-based, Reward 39: -38.773, Len(game): 93, Training Time: 0.494s, Prediction Time: 0.098s
    label Controller-based, Reward 40: 0.637, Len(game): 114, Training Time: 0.508s, Prediction Time: 0.103s
    label Controller-based, Reward 41: -24.424, Len(game): 113, Training Time: 0.524s, Prediction Time: 0.107s
    label Controller-based, Reward 42: 20.500, Len(game): 139, Training Time: 0.539s, Prediction Time: 0.110s
    label Controller-based, Reward 43: -142.386, Len(game): 183, Training Time: 0.555s, Prediction Time: 0.116s
    label Controller-based, Reward 44: -38.658, Len(game): 85, Training Time: 0.571s, Prediction Time: 0.119s
    label Controller-based, Reward 45: -39.666, Len(game): 97, Training Time: 0.587s, Prediction Time: 0.122s
    label Controller-based, Reward 46: -42.530, Len(game): 92, Training Time: 0.603s, Prediction Time: 0.124s
    label Controller-based, Reward 47: -36.293, Len(game): 123, Training Time: 0.619s, Prediction Time: 0.127s
    label Controller-based, Reward 48: -23.337, Len(game): 90, Training Time: 0.634s, Prediction Time: 0.130s
    label Controller-based, Reward 49: -74.321, Len(game): 90, Training Time: 0.650s, Prediction Time: 0.133s
    label Controller-based, Reward 50: 19.134, Len(game): 144, Training Time: 0.668s, Prediction Time: 0.136s
    label Controller-based, Reward 51: -101.110, Len(game): 142, Training Time: 0.685s, Prediction Time: 0.141s
    label Controller-based, Reward 52: -283.108, Len(game): 2000, Training Time: 0.727s, Prediction Time: 0.200s
    label Controller-based, Reward 53: -84.156, Len(game): 227, Training Time: 0.745s, Prediction Time: 0.207s
    label Controller-based, Reward 54: -3.956, Len(game): 106, Training Time: 0.762s, Prediction Time: 0.212s
    label Controller-based, Reward 55: -48.074, Len(game): 74, Training Time: 0.780s, Prediction Time: 0.215s
    label Controller-based, Reward 56: -9.968, Len(game): 65, Training Time: 0.797s, Prediction Time: 0.218s
    label Controller-based, Reward 57: -100.251, Len(game): 215, Training Time: 0.817s, Prediction Time: 0.223s
    label Controller-based, Reward 58: 235.041, Len(game): 324, Training Time: 0.837s, Prediction Time: 0.233s
    label Controller-based, Reward 59: 189.897, Len(game): 516, Training Time: 0.860s, Prediction Time: 0.247s
    label Controller-based, Reward 60: -104.188, Len(game): 184, Training Time: 0.879s, Prediction Time: 0.253s
    label Controller-based, Reward 61: -85.103, Len(game): 208, Training Time: 0.897s, Prediction Time: 0.258s
    label Controller-based, Reward 62: -15.177, Len(game): 346, Training Time: 0.918s, Prediction Time: 0.268s
    label Controller-based, Reward 63: -87.819, Len(game): 245, Training Time: 0.937s, Prediction Time: 0.273s
    label Controller-based, Reward 64: 192.459, Len(game): 289, Training Time: 0.956s, Prediction Time: 0.281s
    label Controller-based, Reward 65: -40.399, Len(game): 229, Training Time: 0.974s, Prediction Time: 0.289s
    label Controller-based, Reward 66: -85.762, Len(game): 138, Training Time: 0.992s, Prediction Time: 0.292s
    label Controller-based, Reward 67: -109.227, Len(game): 195, Training Time: 1.011s, Prediction Time: 0.299s
    label Controller-based, Reward 68: 167.952, Len(game): 770, Training Time: 1.045s, Prediction Time: 0.323s
    label Controller-based, Reward 69: -240.846, Len(game): 960, Training Time: 1.075s, Prediction Time: 0.358s
    label Controller-based, Reward 70: -23.430, Len(game): 293, Training Time: 1.096s, Prediction Time: 0.367s
    label Controller-based, Reward 71: 15.711, Len(game): 203, Training Time: 1.114s, Prediction Time: 0.374s
    label Controller-based, Reward 72: -18.079, Len(game): 213, Training Time: 1.136s, Prediction Time: 0.381s
    label Controller-based, Reward 73: -138.834, Len(game): 760, Training Time: 1.160s, Prediction Time: 0.401s
    label Controller-based, Reward 74: -78.313, Len(game): 212, Training Time: 1.178s, Prediction Time: 0.406s
    label Controller-based, Reward 75: -118.766, Len(game): 202, Training Time: 1.198s, Prediction Time: 0.411s
    label Controller-based, Reward 76: 14.496, Len(game): 2000, Training Time: 1.244s, Prediction Time: 0.487s
    label Controller-based, Reward 77: 86.466, Len(game): 2000, Training Time: 1.288s, Prediction Time: 0.552s
    label Controller-based, Reward 78: 50.990, Len(game): 2000, Training Time: 1.337s, Prediction Time: 0.627s
    label Controller-based, Reward 79: -126.798, Len(game): 170, Training Time: 1.358s, Prediction Time: 0.633s
    label Controller-based, Reward 80: 218.697, Len(game): 267, Training Time: 1.377s, Prediction Time: 0.640s
    label Controller-based, Reward 81: -195.316, Len(game): 490, Training Time: 1.400s, Prediction Time: 0.656s
    label Controller-based, Reward 82: -79.591, Len(game): 283, Training Time: 1.421s, Prediction Time: 0.663s
    label Controller-based, Reward 83: 161.744, Len(game): 469, Training Time: 1.443s, Prediction Time: 0.677s
    label Controller-based, Reward 84: -78.987, Len(game): 526, Training Time: 1.465s, Prediction Time: 0.689s
    label Controller-based, Reward 85: 124.039, Len(game): 835, Training Time: 1.493s, Prediction Time: 0.713s
    label Controller-based, Reward 86: -92.462, Len(game): 164, Training Time: 1.512s, Prediction Time: 0.719s
    label Controller-based, Reward 87: -101.355, Len(game): 244, Training Time: 1.531s, Prediction Time: 0.727s
    label Controller-based, Reward 88: 54.230, Len(game): 2000, Training Time: 1.575s, Prediction Time: 0.805s
    label Controller-based, Reward 89: -60.153, Len(game): 272, Training Time: 1.595s, Prediction Time: 0.814s
    label Controller-based, Reward 90: 142.720, Len(game): 413, Training Time: 1.620s, Prediction Time: 0.829s
    label Controller-based, Reward 91: -83.812, Len(game): 156, Training Time: 1.638s, Prediction Time: 0.834s
    label Controller-based, Reward 92: -79.527, Len(game): 181, Training Time: 1.657s, Prediction Time: 0.841s
    label Controller-based, Reward 93: -78.391, Len(game): 158, Training Time: 1.676s, Prediction Time: 0.845s
    label Controller-based, Reward 94: -184.310, Len(game): 573, Training Time: 1.700s, Prediction Time: 0.859s
    label Controller-based, Reward 95: -82.016, Len(game): 194, Training Time: 1.719s, Prediction Time: 0.864s
    label Controller-based, Reward 96: -55.855, Len(game): 338, Training Time: 1.740s, Prediction Time: 0.873s
    label Controller-based, Reward 97: -220.122, Len(game): 499, Training Time: 1.764s, Prediction Time: 0.889s
    label Controller-based, Reward 98: -96.453, Len(game): 196, Training Time: 1.781s, Prediction Time: 0.893s
    label Controller-based, Reward 99: 224.983, Len(game): 434, Training Time: 1.801s, Prediction Time: 0.906s
    label KACAgent, Reward 0: -77.953, Len(game): 73, Training Time: 0.009s, Prediction Time: 0.002s
    label KACAgent, Reward 1: -197.529, Len(game): 88, Training Time: 0.019s, Prediction Time: 0.018s
    label KACAgent, Reward 2: -144.463, Len(game): 89, Training Time: 0.033s, Prediction Time: 0.033s
    label KACAgent, Reward 3: -428.558, Len(game): 82, Training Time: 0.063s, Prediction Time: 0.054s
    label KACAgent, Reward 4: -49.328, Len(game): 76, Training Time: 0.099s, Prediction Time: 0.079s
    label KACAgent, Reward 5: -264.959, Len(game): 115, Training Time: 0.160s, Prediction Time: 0.120s
    label KACAgent, Reward 6: -182.674, Len(game): 74, Training Time: 0.234s, Prediction Time: 0.160s
    label KACAgent, Reward 7: -179.179, Len(game): 122, Training Time: 0.332s, Prediction Time: 0.228s
    label KACAgent, Reward 8: -58.507, Len(game): 88, Training Time: 0.465s, Prediction Time: 0.292s
    label KACAgent, Reward 9: -99.668, Len(game): 99, Training Time: 0.625s, Prediction Time: 0.380s
    label KACAgent, Reward 10: -170.758, Len(game): 103, Training Time: 0.814s, Prediction Time: 0.477s
    label KACAgent, Reward 11: -252.071, Len(game): 153, Training Time: 1.046s, Prediction Time: 0.626s
    label KACAgent, Reward 12: -207.480, Len(game): 123, Training Time: 1.347s, Prediction Time: 0.776s
    label KACAgent, Reward 13: -75.008, Len(game): 160, Training Time: 1.699s, Prediction Time: 0.981s
    label KACAgent, Reward 14: -311.284, Len(game): 115, Training Time: 2.129s, Prediction Time: 1.177s
    label KACAgent, Reward 15: -48.366, Len(game): 117, Training Time: 2.639s, Prediction Time: 1.409s
    label KACAgent, Reward 16: 13.890, Len(game): 158, Training Time: 3.200s, Prediction Time: 1.706s
    label KACAgent, Reward 17: -35.270, Len(game): 104, Training Time: 3.875s, Prediction Time: 1.992s
    label KACAgent, Reward 18: -79.153, Len(game): 128, Training Time: 4.600s, Prediction Time: 2.354s
    label KACAgent, Reward 19: -99.495, Len(game): 131, Training Time: 5.426s, Prediction Time: 2.737s
    label KACAgent, Reward 20: -119.606, Len(game): 123, Training Time: 6.320s, Prediction Time: 3.160s
    label KACAgent, Reward 21: -227.100, Len(game): 110, Training Time: 7.323s, Prediction Time: 3.619s
    label KACAgent, Reward 22: -120.408, Len(game): 121, Training Time: 8.419s, Prediction Time: 4.148s
    label KACAgent, Reward 23: 7.015, Len(game): 194, Training Time: 9.632s, Prediction Time: 4.790s
    label KACAgent, Reward 24: -63.053, Len(game): 105, Training Time: 10.849s, Prediction Time: 5.331s
    label KACAgent, Reward 25: -109.040, Len(game): 108, Training Time: 12.138s, Prediction Time: 5.945s
    label KACAgent, Reward 26: -75.487, Len(game): 107, Training Time: 13.514s, Prediction Time: 6.604s
    label KACAgent, Reward 27: 1.007, Len(game): 150, Training Time: 14.948s, Prediction Time: 7.348s
    label KACAgent, Reward 28: -73.049, Len(game): 113, Training Time: 16.539s, Prediction Time: 8.109s
    label KACAgent, Reward 29: -45.320, Len(game): 113, Training Time: 18.261s, Prediction Time: 8.940s
    label KACAgent, Reward 30: -87.183, Len(game): 90, Training Time: 20.130s, Prediction Time: 9.777s
    label KACAgent, Reward 31: -260.053, Len(game): 145, Training Time: 22.035s, Prediction Time: 10.758s
    label KACAgent, Reward 32: -189.274, Len(game): 170, Training Time: 24.126s, Prediction Time: 11.804s
    label KACAgent, Reward 33: -147.145, Len(game): 322, Training Time: 26.415s, Prediction Time: 13.104s
    label KACAgent, Reward 34: 15.210, Len(game): 131, Training Time: 29.152s, Prediction Time: 14.302s
    label KACAgent, Reward 35: -42.190, Len(game): 156, Training Time: 32.058s, Prediction Time: 15.708s
    label KACAgent, Reward 36: -67.462, Len(game): 150, Training Time: 35.142s, Prediction Time: 17.279s
    label KACAgent, Reward 37: 14.520, Len(game): 143, Training Time: 38.484s, Prediction Time: 18.877s
    label KACAgent, Reward 38: -70.947, Len(game): 143, Training Time: 42.025s, Prediction Time: 20.651s
    label KACAgent, Reward 39: -56.439, Len(game): 120, Training Time: 45.796s, Prediction Time: 22.540s
    label KACAgent, Reward 40: -84.625, Len(game): 2000, Training Time: 49.789s, Prediction Time: 27.106s
    label KACAgent, Reward 41: 10.722, Len(game): 181, Training Time: 55.736s, Prediction Time: 29.239s
    label KACAgent, Reward 42: -45.300, Len(game): 199, Training Time: 55.736s, Prediction Time: 32.498s
    label KACAgent, Reward 43: 5.563, Len(game): 146, Training Time: 55.736s, Prediction Time: 32.736s
    label KACAgent, Reward 44: -203.386, Len(game): 140, Training Time: 55.736s, Prediction Time: 32.965s
    label KACAgent, Reward 45: 12.574, Len(game): 120, Training Time: 55.736s, Prediction Time: 33.162s
    label KACAgent, Reward 46: -212.257, Len(game): 168, Training Time: 55.736s, Prediction Time: 33.439s
    label KACAgent, Reward 47: -33.197, Len(game): 121, Training Time: 55.736s, Prediction Time: 33.637s
    label KACAgent, Reward 48: 15.646, Len(game): 113, Training Time: 55.736s, Prediction Time: 33.821s
    label KACAgent, Reward 49: 2.483, Len(game): 154, Training Time: 55.736s, Prediction Time: 34.073s
    label KACAgent, Reward 50: -25.754, Len(game): 143, Training Time: 55.736s, Prediction Time: 34.304s
    label KACAgent, Reward 51: -27.675, Len(game): 155, Training Time: 55.736s, Prediction Time: 34.558s
    label KACAgent, Reward 52: -44.397, Len(game): 113, Training Time: 55.736s, Prediction Time: 34.741s
    label KACAgent, Reward 53: -139.682, Len(game): 149, Training Time: 55.736s, Prediction Time: 34.984s
    label KACAgent, Reward 54: -38.973, Len(game): 181, Training Time: 55.736s, Prediction Time: 35.278s
    label KACAgent, Reward 55: -28.929, Len(game): 135, Training Time: 55.736s, Prediction Time: 35.499s
    label KACAgent, Reward 56: -52.824, Len(game): 106, Training Time: 55.736s, Prediction Time: 35.674s
    label KACAgent, Reward 57: -210.850, Len(game): 156, Training Time: 55.736s, Prediction Time: 35.929s
    label KACAgent, Reward 58: -169.413, Len(game): 172, Training Time: 55.736s, Prediction Time: 36.207s
    label KACAgent, Reward 59: -36.120, Len(game): 156, Training Time: 55.736s, Prediction Time: 36.462s
    label KACAgent, Reward 60: -64.864, Len(game): 93, Training Time: 55.736s, Prediction Time: 36.613s
    label KACAgent, Reward 61: -174.224, Len(game): 183, Training Time: 55.736s, Prediction Time: 36.915s
    label KACAgent, Reward 62: 30.066, Len(game): 117, Training Time: 55.736s, Prediction Time: 37.105s
    label KACAgent, Reward 63: 23.879, Len(game): 139, Training Time: 55.736s, Prediction Time: 37.331s
    label KACAgent, Reward 64: -327.643, Len(game): 126, Training Time: 55.736s, Prediction Time: 37.538s
    label KACAgent, Reward 65: -46.474, Len(game): 125, Training Time: 55.736s, Prediction Time: 37.742s
    label KACAgent, Reward 66: -238.404, Len(game): 594, Training Time: 55.736s, Prediction Time: 38.721s
    label KACAgent, Reward 67: -81.705, Len(game): 109, Training Time: 55.736s, Prediction Time: 38.899s
    label KACAgent, Reward 68: -13.623, Len(game): 167, Training Time: 55.736s, Prediction Time: 39.169s
    label KACAgent, Reward 69: -257.633, Len(game): 185, Training Time: 55.736s, Prediction Time: 39.470s
    label KACAgent, Reward 70: -45.296, Len(game): 128, Training Time: 55.736s, Prediction Time: 39.681s
    label KACAgent, Reward 71: -7.928, Len(game): 130, Training Time: 55.736s, Prediction Time: 39.893s
    label KACAgent, Reward 72: -67.325, Len(game): 135, Training Time: 55.736s, Prediction Time: 40.114s
    label KACAgent, Reward 73: -79.229, Len(game): 228, Training Time: 55.736s, Prediction Time: 40.486s
    label KACAgent, Reward 74: 39.268, Len(game): 164, Training Time: 55.736s, Prediction Time: 40.753s
    label KACAgent, Reward 75: 17.092, Len(game): 164, Training Time: 55.736s, Prediction Time: 41.020s
    label KACAgent, Reward 76: 8.964, Len(game): 120, Training Time: 55.736s, Prediction Time: 41.217s
    label KACAgent, Reward 77: -98.428, Len(game): 2000, Training Time: 55.736s, Prediction Time: 44.502s
    label KACAgent, Reward 78: -49.424, Len(game): 112, Training Time: 55.736s, Prediction Time: 44.686s
    label KACAgent, Reward 79: -145.271, Len(game): 108, Training Time: 55.736s, Prediction Time: 44.862s
    label KACAgent, Reward 80: -38.801, Len(game): 164, Training Time: 55.736s, Prediction Time: 45.132s
    label KACAgent, Reward 81: -262.900, Len(game): 168, Training Time: 55.736s, Prediction Time: 45.405s
    label KACAgent, Reward 82: -41.493, Len(game): 157, Training Time: 55.736s, Prediction Time: 45.661s
    label KACAgent, Reward 83: -285.713, Len(game): 177, Training Time: 55.736s, Prediction Time: 45.949s
    label KACAgent, Reward 84: -1.833, Len(game): 146, Training Time: 55.736s, Prediction Time: 46.187s
    label KACAgent, Reward 85: -159.719, Len(game): 154, Training Time: 55.736s, Prediction Time: 46.439s
    label KACAgent, Reward 86: 6.576, Len(game): 154, Training Time: 55.736s, Prediction Time: 46.688s
    label KACAgent, Reward 87: -29.381, Len(game): 129, Training Time: 55.736s, Prediction Time: 46.898s
    label KACAgent, Reward 88: -53.275, Len(game): 146, Training Time: 55.736s, Prediction Time: 47.134s
    label KACAgent, Reward 89: -215.667, Len(game): 176, Training Time: 55.736s, Prediction Time: 47.422s
    label KACAgent, Reward 90: -71.019, Len(game): 124, Training Time: 55.736s, Prediction Time: 47.622s
    label KACAgent, Reward 91: -63.618, Len(game): 142, Training Time: 55.736s, Prediction Time: 47.853s
    label KACAgent, Reward 92: -57.553, Len(game): 142, Training Time: 55.736s, Prediction Time: 48.084s
    label KACAgent, Reward 93: -10.609, Len(game): 151, Training Time: 55.736s, Prediction Time: 48.328s
    label KACAgent, Reward 94: 3.260, Len(game): 124, Training Time: 55.736s, Prediction Time: 48.529s
    label KACAgent, Reward 95: -81.402, Len(game): 169, Training Time: 55.736s, Prediction Time: 48.803s
    label KACAgent, Reward 96: -172.443, Len(game): 147, Training Time: 55.736s, Prediction Time: 49.042s
    label KACAgent, Reward 97: -241.177, Len(game): 161, Training Time: 55.736s, Prediction Time: 49.304s
    label KACAgent, Reward 98: 6.522, Len(game): 126, Training Time: 55.736s, Prediction Time: 49.510s
    label KACAgent, Reward 99: 0.284, Len(game): 132, Training Time: 55.736s, Prediction Time: 49.725s
    label PolicyGradient, Reward 0: -376.577, Len(game): 81, Training Time: 0.008s, Prediction Time: 0.002s
    label PolicyGradient, Reward 1: -317.620, Len(game): 116, Training Time: 0.017s, Prediction Time: 0.022s
    label PolicyGradient, Reward 2: -110.302, Len(game): 74, Training Time: 0.123s, Prediction Time: 0.036s
    label PolicyGradient, Reward 3: -104.416, Len(game): 60, Training Time: 0.148s, Prediction Time: 0.053s
    label PolicyGradient, Reward 4: -436.721, Len(game): 93, Training Time: 0.184s, Prediction Time: 0.084s
    label PolicyGradient, Reward 5: -176.985, Len(game): 128, Training Time: 0.246s, Prediction Time: 0.130s
    label PolicyGradient, Reward 6: -93.553, Len(game): 70, Training Time: 0.335s, Prediction Time: 0.169s
    label PolicyGradient, Reward 7: -200.846, Len(game): 92, Training Time: 0.437s, Prediction Time: 0.225s
    label PolicyGradient, Reward 8: -196.270, Len(game): 89, Training Time: 0.562s, Prediction Time: 0.291s
    label PolicyGradient, Reward 9: -257.503, Len(game): 94, Training Time: 0.712s, Prediction Time: 0.373s
    label PolicyGradient, Reward 10: -155.851, Len(game): 97, Training Time: 0.886s, Prediction Time: 0.468s
    label PolicyGradient, Reward 11: -209.418, Len(game): 94, Training Time: 1.112s, Prediction Time: 0.575s
    label PolicyGradient, Reward 12: -112.545, Len(game): 66, Training Time: 1.364s, Prediction Time: 0.672s
    label PolicyGradient, Reward 13: -50.909, Len(game): 91, Training Time: 1.657s, Prediction Time: 0.805s
    label PolicyGradient, Reward 14: -87.790, Len(game): 119, Training Time: 1.988s, Prediction Time: 0.965s
    label PolicyGradient, Reward 15: -237.999, Len(game): 100, Training Time: 2.370s, Prediction Time: 1.145s
    label PolicyGradient, Reward 16: -120.887, Len(game): 154, Training Time: 2.798s, Prediction Time: 1.361s
    label PolicyGradient, Reward 17: -73.387, Len(game): 120, Training Time: 3.305s, Prediction Time: 1.579s
    label PolicyGradient, Reward 18: -124.230, Len(game): 172, Training Time: 3.880s, Prediction Time: 1.876s
    label PolicyGradient, Reward 19: -105.460, Len(game): 121, Training Time: 4.586s, Prediction Time: 2.163s
    label PolicyGradient, Reward 20: -54.722, Len(game): 123, Training Time: 5.342s, Prediction Time: 2.499s
    label PolicyGradient, Reward 21: -104.015, Len(game): 192, Training Time: 6.190s, Prediction Time: 2.925s
    label PolicyGradient, Reward 22: -126.083, Len(game): 196, Training Time: 7.165s, Prediction Time: 3.418s
    label PolicyGradient, Reward 23: -125.526, Len(game): 204, Training Time: 8.319s, Prediction Time: 3.970s
    label PolicyGradient, Reward 24: -172.179, Len(game): 170, Training Time: 9.679s, Prediction Time: 4.545s
    label PolicyGradient, Reward 25: -90.296, Len(game): 198, Training Time: 11.146s, Prediction Time: 5.236s
    label PolicyGradient, Reward 26: -189.260, Len(game): 170, Training Time: 12.845s, Prediction Time: 5.961s
    label PolicyGradient, Reward 27: 2.232, Len(game): 163, Training Time: 14.711s, Prediction Time: 6.824s
    label PolicyGradient, Reward 28: -17.712, Len(game): 156, Training Time: 16.873s, Prediction Time: 7.755s
    label PolicyGradient, Reward 29: -132.171, Len(game): 201, Training Time: 19.165s, Prediction Time: 8.836s
    label PolicyGradient, Reward 30: -80.358, Len(game): 280, Training Time: 21.705s, Prediction Time: 10.031s
    label PolicyGradient, Reward 31: 35.367, Len(game): 157, Training Time: 24.720s, Prediction Time: 11.337s
    label PolicyGradient, Reward 32: -172.774, Len(game): 280, Training Time: 27.898s, Prediction Time: 13.042s
    label PolicyGradient, Reward 33: -50.473, Len(game): 243, Training Time: 31.489s, Prediction Time: 14.744s
    label PolicyGradient, Reward 34: -270.038, Len(game): 296, Training Time: 35.514s, Prediction Time: 16.694s
    label PolicyGradient, Reward 35: -79.860, Len(game): 176, Training Time: 40.046s, Prediction Time: 18.645s
    label PolicyGradient, Reward 36: -59.044, Len(game): 363, Training Time: 45.408s, Prediction Time: 21.142s
    label PolicyGradient, Reward 37: -89.982, Len(game): 274, Training Time: 52.149s, Prediction Time: 24.229s
    label PolicyGradient, Reward 38: -178.406, Len(game): 174, Training Time: 52.149s, Prediction Time: 27.746s
    label PolicyGradient, Reward 39: -128.704, Len(game): 314, Training Time: 52.149s, Prediction Time: 28.246s
    label PolicyGradient, Reward 40: -102.935, Len(game): 275, Training Time: 52.149s, Prediction Time: 28.694s
    label PolicyGradient, Reward 41: -102.138, Len(game): 315, Training Time: 52.149s, Prediction Time: 29.203s
    label PolicyGradient, Reward 42: -122.916, Len(game): 225, Training Time: 52.149s, Prediction Time: 29.567s
    label PolicyGradient, Reward 43: -124.166, Len(game): 340, Training Time: 52.149s, Prediction Time: 30.116s
    label PolicyGradient, Reward 44: -94.108, Len(game): 219, Training Time: 52.149s, Prediction Time: 30.469s
    label PolicyGradient, Reward 45: -88.906, Len(game): 212, Training Time: 52.149s, Prediction Time: 30.815s
    label PolicyGradient, Reward 46: -165.790, Len(game): 239, Training Time: 52.149s, Prediction Time: 31.204s
    label PolicyGradient, Reward 47: -144.381, Len(game): 634, Training Time: 52.149s, Prediction Time: 32.231s
    label PolicyGradient, Reward 48: -155.797, Len(game): 167, Training Time: 52.149s, Prediction Time: 32.502s
    label PolicyGradient, Reward 49: -209.558, Len(game): 346, Training Time: 52.149s, Prediction Time: 33.057s
    label PolicyGradient, Reward 50: -150.038, Len(game): 309, Training Time: 52.149s, Prediction Time: 33.563s
    label PolicyGradient, Reward 51: -69.295, Len(game): 262, Training Time: 52.149s, Prediction Time: 33.979s
    label PolicyGradient, Reward 52: -15.160, Len(game): 152, Training Time: 52.149s, Prediction Time: 34.228s
    label PolicyGradient, Reward 53: -58.064, Len(game): 262, Training Time: 52.149s, Prediction Time: 34.651s
    label PolicyGradient, Reward 54: -116.293, Len(game): 286, Training Time: 52.149s, Prediction Time: 35.109s
    label PolicyGradient, Reward 55: -49.577, Len(game): 440, Training Time: 52.149s, Prediction Time: 35.819s
    label PolicyGradient, Reward 56: -201.275, Len(game): 354, Training Time: 52.149s, Prediction Time: 36.393s
    label PolicyGradient, Reward 57: -45.327, Len(game): 238, Training Time: 52.149s, Prediction Time: 36.780s
    label PolicyGradient, Reward 58: -293.379, Len(game): 560, Training Time: 52.149s, Prediction Time: 37.685s
    label PolicyGradient, Reward 59: -235.921, Len(game): 385, Training Time: 52.149s, Prediction Time: 38.309s
    label PolicyGradient, Reward 60: -297.602, Len(game): 277, Training Time: 52.149s, Prediction Time: 38.752s
    label PolicyGradient, Reward 61: -89.178, Len(game): 218, Training Time: 52.149s, Prediction Time: 39.106s
    label PolicyGradient, Reward 62: -182.739, Len(game): 331, Training Time: 52.149s, Prediction Time: 39.640s
    label PolicyGradient, Reward 63: -215.044, Len(game): 467, Training Time: 52.149s, Prediction Time: 40.396s
    label PolicyGradient, Reward 64: -43.587, Len(game): 275, Training Time: 52.149s, Prediction Time: 40.845s
    label PolicyGradient, Reward 65: -92.450, Len(game): 302, Training Time: 52.149s, Prediction Time: 41.336s
    label PolicyGradient, Reward 66: -137.102, Len(game): 348, Training Time: 52.149s, Prediction Time: 41.902s
    label PolicyGradient, Reward 67: -220.336, Len(game): 657, Training Time: 52.149s, Prediction Time: 42.971s
    label PolicyGradient, Reward 68: -90.526, Len(game): 205, Training Time: 52.149s, Prediction Time: 43.303s
    label PolicyGradient, Reward 69: -64.013, Len(game): 253, Training Time: 52.149s, Prediction Time: 43.713s
    label PolicyGradient, Reward 70: -302.684, Len(game): 304, Training Time: 52.149s, Prediction Time: 44.202s
    label PolicyGradient, Reward 71: -233.605, Len(game): 218, Training Time: 52.149s, Prediction Time: 44.554s
    label PolicyGradient, Reward 72: -143.846, Len(game): 161, Training Time: 52.149s, Prediction Time: 44.816s
    label PolicyGradient, Reward 73: -94.678, Len(game): 197, Training Time: 52.149s, Prediction Time: 45.133s
    label PolicyGradient, Reward 74: -94.786, Len(game): 201, Training Time: 52.149s, Prediction Time: 45.458s
    label PolicyGradient, Reward 75: -49.233, Len(game): 200, Training Time: 52.149s, Prediction Time: 45.782s
    label PolicyGradient, Reward 76: -269.528, Len(game): 441, Training Time: 52.149s, Prediction Time: 46.498s
    label PolicyGradient, Reward 77: -163.477, Len(game): 305, Training Time: 52.149s, Prediction Time: 46.989s
    label PolicyGradient, Reward 78: -136.235, Len(game): 229, Training Time: 52.149s, Prediction Time: 47.361s
    label PolicyGradient, Reward 79: -186.023, Len(game): 245, Training Time: 52.149s, Prediction Time: 47.758s
    label PolicyGradient, Reward 80: -139.848, Len(game): 255, Training Time: 52.149s, Prediction Time: 48.175s
    label PolicyGradient, Reward 81: -120.263, Len(game): 349, Training Time: 52.149s, Prediction Time: 48.742s
    label PolicyGradient, Reward 82: -199.564, Len(game): 190, Training Time: 52.149s, Prediction Time: 49.054s
    label PolicyGradient, Reward 83: -181.473, Len(game): 398, Training Time: 52.149s, Prediction Time: 49.699s
    label PolicyGradient, Reward 84: -31.888, Len(game): 333, Training Time: 52.149s, Prediction Time: 50.233s
    label PolicyGradient, Reward 85: -175.663, Len(game): 328, Training Time: 52.149s, Prediction Time: 50.759s
    label PolicyGradient, Reward 86: -97.013, Len(game): 292, Training Time: 52.149s, Prediction Time: 51.235s
    label PolicyGradient, Reward 87: -36.709, Len(game): 156, Training Time: 52.149s, Prediction Time: 51.488s
    label PolicyGradient, Reward 88: -77.101, Len(game): 173, Training Time: 52.149s, Prediction Time: 51.767s
    label PolicyGradient, Reward 89: -89.409, Len(game): 196, Training Time: 52.149s, Prediction Time: 52.081s
    label PolicyGradient, Reward 90: -104.524, Len(game): 252, Training Time: 52.149s, Prediction Time: 52.490s
    label PolicyGradient, Reward 91: -81.670, Len(game): 180, Training Time: 52.149s, Prediction Time: 52.780s
    label PolicyGradient, Reward 92: -103.460, Len(game): 364, Training Time: 52.149s, Prediction Time: 53.368s
    label PolicyGradient, Reward 93: -50.038, Len(game): 247, Training Time: 52.149s, Prediction Time: 53.768s
    label PolicyGradient, Reward 94: -164.729, Len(game): 391, Training Time: 52.149s, Prediction Time: 54.400s
    label PolicyGradient, Reward 95: -63.206, Len(game): 158, Training Time: 52.149s, Prediction Time: 54.655s
    label PolicyGradient, Reward 96: -70.394, Len(game): 321, Training Time: 52.149s, Prediction Time: 55.177s
    label PolicyGradient, Reward 97: 39.600, Len(game): 151, Training Time: 52.149s, Prediction Time: 55.425s
    label PolicyGradient, Reward 98: -199.498, Len(game): 300, Training Time: 52.149s, Prediction Time: 55.908s
    label PolicyGradient, Reward 99: -105.230, Len(game): 186, Training Time: 52.149s, Prediction Time: 56.213s
    label DQNAgent, Reward 0: -100.353, Len(game): 113, Training Time: 0.127s, Prediction Time: 0.004s
    label DQNAgent, Reward 1: -292.729, Len(game): 104, Training Time: 0.257s, Prediction Time: 0.009s
    label DQNAgent, Reward 2: -251.125, Len(game): 113, Training Time: 0.391s, Prediction Time: 0.015s
    label DQNAgent, Reward 3: -60.100, Len(game): 85, Training Time: 0.493s, Prediction Time: 0.019s
    label DQNAgent, Reward 4: -283.162, Len(game): 338, Training Time: 0.912s, Prediction Time: 0.039s
    label DQNAgent, Reward 5: -308.415, Len(game): 242, Training Time: 1.216s, Prediction Time: 0.056s
    label DQNAgent, Reward 6: 121.699, Len(game): 1173, Training Time: 2.656s, Prediction Time: 0.160s
    label DQNAgent, Reward 7: -104.280, Len(game): 585, Training Time: 3.388s, Prediction Time: 0.209s
    label DQNAgent, Reward 8: -248.759, Len(game): 1246, Training Time: 4.995s, Prediction Time: 0.315s
    label DQNAgent, Reward 9: -220.946, Len(game): 2000, Training Time: 7.546s, Prediction Time: 0.481s
    label DQNAgent, Reward 10: 208.528, Len(game): 644, Training Time: 8.384s, Prediction Time: 0.542s
    label DQNAgent, Reward 11: 209.618, Len(game): 363, Training Time: 8.854s, Prediction Time: 0.576s
    label DQNAgent, Reward 12: -98.795, Len(game): 137, Training Time: 9.031s, Prediction Time: 0.589s
    label DQNAgent, Reward 13: -21.690, Len(game): 146, Training Time: 9.219s, Prediction Time: 0.603s
    label DQNAgent, Reward 14: 209.860, Len(game): 365, Training Time: 9.696s, Prediction Time: 0.638s
    label DQNAgent, Reward 15: 150.999, Len(game): 536, Training Time: 10.412s, Prediction Time: 0.688s
    label DQNAgent, Reward 16: -114.041, Len(game): 210, Training Time: 10.682s, Prediction Time: 0.706s
    label DQNAgent, Reward 17: -60.068, Len(game): 110, Training Time: 10.827s, Prediction Time: 0.714s
    label DQNAgent, Reward 18: 177.521, Len(game): 239, Training Time: 11.141s, Prediction Time: 0.738s
    label DQNAgent, Reward 19: -150.340, Len(game): 2000, Training Time: 13.976s, Prediction Time: 0.939s
    label DQNAgent, Reward 20: -167.060, Len(game): 121, Training Time: 14.131s, Prediction Time: 0.949s
    label DQNAgent, Reward 21: 27.313, Len(game): 245, Training Time: 14.455s, Prediction Time: 0.971s
    label DQNAgent, Reward 22: -108.195, Len(game): 717, Training Time: 15.402s, Prediction Time: 1.036s
    label DQNAgent, Reward 23: -233.710, Len(game): 2000, Training Time: 18.018s, Prediction Time: 1.218s
    label DQNAgent, Reward 24: 119.225, Len(game): 852, Training Time: 19.131s, Prediction Time: 1.294s
    label DQNAgent, Reward 25: -83.181, Len(game): 395, Training Time: 19.649s, Prediction Time: 1.329s
    label DQNAgent, Reward 26: -157.777, Len(game): 1281, Training Time: 21.340s, Prediction Time: 1.445s
    label DQNAgent, Reward 27: -106.503, Len(game): 2000, Training Time: 23.965s, Prediction Time: 1.636s
    label DQNAgent, Reward 28: 94.857, Len(game): 950, Training Time: 25.228s, Prediction Time: 1.724s
    label DQNAgent, Reward 29: -262.092, Len(game): 2000, Training Time: 27.876s, Prediction Time: 1.899s
    label DQNAgent, Reward 30: -184.724, Len(game): 2000, Training Time: 30.531s, Prediction Time: 2.073s
    label DQNAgent, Reward 31: -179.814, Len(game): 2000, Training Time: 33.188s, Prediction Time: 2.250s
    label DQNAgent, Reward 32: -199.041, Len(game): 2000, Training Time: 35.850s, Prediction Time: 2.425s
    label DQNAgent, Reward 33: -188.248, Len(game): 2000, Training Time: 38.510s, Prediction Time: 2.602s
    label DQNAgent, Reward 34: 81.636, Len(game): 1669, Training Time: 40.770s, Prediction Time: 2.758s
    label DQNAgent, Reward 35: 234.293, Len(game): 420, Training Time: 41.335s, Prediction Time: 2.799s
    label DQNAgent, Reward 36: -208.190, Len(game): 2000, Training Time: 44.027s, Prediction Time: 2.976s
    label DQNAgent, Reward 37: -189.187, Len(game): 2000, Training Time: 46.709s, Prediction Time: 3.150s
    label DQNAgent, Reward 38: 195.834, Len(game): 748, Training Time: 47.724s, Prediction Time: 3.224s
    label DQNAgent, Reward 39: 209.305, Len(game): 479, Training Time: 48.367s, Prediction Time: 3.270s
    label DQNAgent, Reward 40: 231.972, Len(game): 450, Training Time: 48.979s, Prediction Time: 3.310s
    label DQNAgent, Reward 41: 239.903, Len(game): 344, Training Time: 49.447s, Prediction Time: 3.339s
    label DQNAgent, Reward 42: 105.298, Len(game): 2000, Training Time: 52.166s, Prediction Time: 3.541s
    label DQNAgent, Reward 43: 267.084, Len(game): 394, Training Time: 52.166s, Prediction Time: 3.578s
    label DQNAgent, Reward 44: 228.900, Len(game): 394, Training Time: 52.166s, Prediction Time: 3.615s
    label DQNAgent, Reward 45: 289.386, Len(game): 259, Training Time: 52.166s, Prediction Time: 3.639s
    label DQNAgent, Reward 46: 253.069, Len(game): 359, Training Time: 52.166s, Prediction Time: 3.672s
    label DQNAgent, Reward 47: -206.768, Len(game): 2000, Training Time: 52.166s, Prediction Time: 3.827s
    label DQNAgent, Reward 48: -207.327, Len(game): 2000, Training Time: 52.166s, Prediction Time: 3.973s
    label DQNAgent, Reward 49: 276.521, Len(game): 453, Training Time: 52.166s, Prediction Time: 4.007s
    label DQNAgent, Reward 50: -214.902, Len(game): 2000, Training Time: 52.166s, Prediction Time: 4.150s
    label DQNAgent, Reward 51: 208.379, Len(game): 427, Training Time: 52.166s, Prediction Time: 4.180s
    label DQNAgent, Reward 52: -195.631, Len(game): 765, Training Time: 52.166s, Prediction Time: 4.234s
    label DQNAgent, Reward 53: 215.616, Len(game): 514, Training Time: 52.166s, Prediction Time: 4.271s
    label DQNAgent, Reward 54: -197.951, Len(game): 2000, Training Time: 52.166s, Prediction Time: 4.414s
    label DQNAgent, Reward 55: -201.905, Len(game): 2000, Training Time: 52.166s, Prediction Time: 4.557s
    label DQNAgent, Reward 56: 291.084, Len(game): 260, Training Time: 52.166s, Prediction Time: 4.577s
    label DQNAgent, Reward 57: 223.495, Len(game): 585, Training Time: 52.166s, Prediction Time: 4.619s
    label DQNAgent, Reward 58: -190.981, Len(game): 2000, Training Time: 52.166s, Prediction Time: 4.763s
    label DQNAgent, Reward 59: -76.292, Len(game): 192, Training Time: 52.166s, Prediction Time: 4.777s
    label DQNAgent, Reward 60: 221.002, Len(game): 401, Training Time: 52.166s, Prediction Time: 4.808s
    label DQNAgent, Reward 61: 232.984, Len(game): 419, Training Time: 52.166s, Prediction Time: 4.838s
    label DQNAgent, Reward 62: -206.538, Len(game): 2000, Training Time: 52.166s, Prediction Time: 4.980s
    label DQNAgent, Reward 63: 240.155, Len(game): 438, Training Time: 52.166s, Prediction Time: 5.013s
    label DQNAgent, Reward 64: -226.895, Len(game): 2000, Training Time: 52.166s, Prediction Time: 5.157s
    label DQNAgent, Reward 65: 259.743, Len(game): 419, Training Time: 52.166s, Prediction Time: 5.188s
    label DQNAgent, Reward 66: 245.651, Len(game): 374, Training Time: 52.166s, Prediction Time: 5.215s
    label DQNAgent, Reward 67: 265.587, Len(game): 431, Training Time: 52.166s, Prediction Time: 5.247s
    label DQNAgent, Reward 68: -180.004, Len(game): 842, Training Time: 52.166s, Prediction Time: 5.306s
    label DQNAgent, Reward 69: -25.527, Len(game): 164, Training Time: 52.166s, Prediction Time: 5.318s
    label DQNAgent, Reward 70: -100.565, Len(game): 201, Training Time: 52.166s, Prediction Time: 5.331s
    label DQNAgent, Reward 71: -216.997, Len(game): 2000, Training Time: 52.166s, Prediction Time: 5.476s
    label DQNAgent, Reward 72: 246.259, Len(game): 388, Training Time: 52.166s, Prediction Time: 5.506s
    label DQNAgent, Reward 73: -119.716, Len(game): 584, Training Time: 52.166s, Prediction Time: 5.547s
    label DQNAgent, Reward 74: 258.951, Len(game): 357, Training Time: 52.166s, Prediction Time: 5.572s
    label DQNAgent, Reward 75: 213.073, Len(game): 399, Training Time: 52.166s, Prediction Time: 5.603s
    label DQNAgent, Reward 76: 189.632, Len(game): 838, Training Time: 52.166s, Prediction Time: 5.661s
    label DQNAgent, Reward 77: -241.961, Len(game): 2000, Training Time: 52.166s, Prediction Time: 5.805s
    label DQNAgent, Reward 78: 75.758, Len(game): 1708, Training Time: 52.166s, Prediction Time: 5.943s
    label DQNAgent, Reward 79: -227.349, Len(game): 2000, Training Time: 52.166s, Prediction Time: 6.086s
    label DQNAgent, Reward 80: -245.627, Len(game): 2000, Training Time: 52.166s, Prediction Time: 6.234s
    label DQNAgent, Reward 81: 207.837, Len(game): 460, Training Time: 52.166s, Prediction Time: 6.267s
    label DQNAgent, Reward 82: 272.286, Len(game): 478, Training Time: 52.166s, Prediction Time: 6.303s
    label DQNAgent, Reward 83: 169.565, Len(game): 642, Training Time: 52.166s, Prediction Time: 6.351s
    label DQNAgent, Reward 84: -222.107, Len(game): 2000, Training Time: 52.166s, Prediction Time: 6.494s
    label DQNAgent, Reward 85: 200.884, Len(game): 511, Training Time: 52.166s, Prediction Time: 6.533s
    label DQNAgent, Reward 86: 265.252, Len(game): 400, Training Time: 52.166s, Prediction Time: 6.562s
    label DQNAgent, Reward 87: -214.872, Len(game): 2000, Training Time: 52.166s, Prediction Time: 6.705s
    label DQNAgent, Reward 88: -190.053, Len(game): 2000, Training Time: 52.166s, Prediction Time: 6.844s
    label DQNAgent, Reward 89: 240.586, Len(game): 479, Training Time: 52.166s, Prediction Time: 6.878s
    label DQNAgent, Reward 90: -192.312, Len(game): 2000, Training Time: 52.166s, Prediction Time: 7.020s
    label DQNAgent, Reward 91: 219.191, Len(game): 448, Training Time: 52.166s, Prediction Time: 7.052s
    label DQNAgent, Reward 92: 3.452, Len(game): 1716, Training Time: 52.166s, Prediction Time: 7.176s
    label DQNAgent, Reward 93: -213.273, Len(game): 2000, Training Time: 52.166s, Prediction Time: 7.320s
    label DQNAgent, Reward 94: -243.764, Len(game): 2000, Training Time: 52.166s, Prediction Time: 7.461s
    label DQNAgent, Reward 95: -194.640, Len(game): 2000, Training Time: 52.166s, Prediction Time: 7.602s
    label DQNAgent, Reward 96: 201.674, Len(game): 444, Training Time: 52.166s, Prediction Time: 7.634s
    label DQNAgent, Reward 97: 279.104, Len(game): 193, Training Time: 52.166s, Prediction Time: 7.649s
    label DQNAgent, Reward 98: 254.010, Len(game): 355, Training Time: 52.166s, Prediction Time: 7.675s
    label DQNAgent, Reward 99: -217.626, Len(game): 2000, Training Time: 52.166s, Prediction Time: 7.818s
    Computed global error Bellman mean:  0.4570717447881489  iter:  5
    Computed global error Bellman mean:  0.06250832986135778  iter:  4
    label KQLearning, Reward 0: -94.652, Len(game): 117, Training Time: 0.138s, Prediction Time: 0.003s
    Computed global error Bellman mean:  0.3264326785246641  iter:  5
    Computed global error Bellman mean:  0.06250832986135778  iter:  0
    Computed global error Bellman mean:  0.006574753577857203  iter:  5
    label KQLearning, Reward 1: -85.618, Len(game): 90, Training Time: 0.278s, Prediction Time: 0.028s
    Computed global error Bellman mean:  2.9554992113685035e-07  iter:  4
    label KQLearning, Reward 2: -651.926, Len(game): 77, Training Time: 0.300s, Prediction Time: 0.103s
    Computed global error Bellman mean:  8.383290537888723e-08  iter:  3
    label KQLearning, Reward 3: -220.218, Len(game): 101, Training Time: 0.326s, Prediction Time: 0.202s
    Computed global error Bellman mean:  5.367322145548921e-07  iter:  5
    label KQLearning, Reward 4: -289.222, Len(game): 137, Training Time: 0.398s, Prediction Time: 0.378s
    Computed global error Bellman mean:  2.105239215464313  iter:  5
    Computed global error Bellman mean:  0.08960391448271526  iter:  5
    label KQLearning, Reward 5: -403.410, Len(game): 129, Training Time: 0.572s, Prediction Time: 0.606s
    Computed global error Bellman mean:  0.0012465311615857508  iter:  5
    Computed global error Bellman mean:  0.018157787194138943  iter:  5
    label KQLearning, Reward 6: -706.512, Len(game): 331, Training Time: 1.074s, Prediction Time: 1.260s
    Computed global error Bellman mean:  7.564830843865642e-07  iter:  5
    Computed global error Bellman mean:  0.018157787194138943  iter:  0
    label KQLearning, Reward 7: -115.241, Len(game): 157, Training Time: 1.193s, Prediction Time: 1.671s
    Computed global error Bellman mean:  9.80103593083591e-07  iter:  4
    label KQLearning, Reward 8: -11.041, Len(game): 94, Training Time: 1.225s, Prediction Time: 1.953s
    Computed global error Bellman mean:  4.197550024370722e-07  iter:  5
    label KQLearning, Reward 9: -19.129, Len(game): 128, Training Time: 1.290s, Prediction Time: 2.350s
    Computed global error Bellman mean:  2.0669793039081446e-07  iter:  5
    label KQLearning, Reward 10: -301.420, Len(game): 117, Training Time: 1.354s, Prediction Time: 2.766s
    Computed global error Bellman mean:  0.08373126374953485  iter:  5
    Computed global error Bellman mean:  0.05462393912900874  iter:  5
    label KQLearning, Reward 11: -42.674, Len(game): 78, Training Time: 1.421s, Prediction Time: 3.079s
    Computed global error Bellman mean:  0.5468703081806887  iter:  5
    Computed global error Bellman mean:  0.028781752528066643  iter:  5
    Computed global error Bellman mean:  0.03715632726194866  iter:  5
    label KQLearning, Reward 12: -109.428, Len(game): 82, Training Time: 1.549s, Prediction Time: 3.397s
    Computed global error Bellman mean:  8.808398602292144e-08  iter:  3
    Computed global error Bellman mean:  0.012347614152382254  iter:  5
    Computed global error Bellman mean:  0.0016687136170853616  iter:  5
    label KQLearning, Reward 13: -86.860, Len(game): 73, Training Time: 1.654s, Prediction Time: 3.718s
    Computed global error Bellman mean:  1.4648319706077046e-07  iter:  3
    Computed global error Bellman mean:  0.0062496450771168345  iter:  3
    label KQLearning, Reward 14: -111.675, Len(game): 93, Training Time: 1.717s, Prediction Time: 4.127s
    Computed global error Bellman mean:  5.1603041547531387e-05  iter:  5
    label KQLearning, Reward 15: -292.237, Len(game): 139, Training Time: 1.791s, Prediction Time: 4.771s
    Computed global error Bellman mean:  0.01138081093322849  iter:  5
    Computed global error Bellman mean:  0.030162712067387184  iter:  0
    label KQLearning, Reward 16: -175.876, Len(game): 123, Training Time: 1.877s, Prediction Time: 5.403s
    Computed global error Bellman mean:  0.03295929019194882  iter:  5
    Computed global error Bellman mean:  0.02128038177079849  iter:  5
    label KQLearning, Reward 17: -90.069, Len(game): 53, Training Time: 1.919s, Prediction Time: 5.697s
    Computed global error Bellman mean:  1.4094325009420564e-08  iter:  4
    Computed global error Bellman mean:  0.017127490517891347  iter:  5
    label KQLearning, Reward 18: -116.886, Len(game): 86, Training Time: 1.975s, Prediction Time: 6.162s
    Computed global error Bellman mean:  5.740827606455394e-08  iter:  5
    label KQLearning, Reward 19: -144.746, Len(game): 78, Training Time: 2.009s, Prediction Time: 6.624s
    Computed global error Bellman mean:  0.022450273448984336  iter:  5
    Computed global error Bellman mean:  0.043383487625418306  iter:  1
    label KQLearning, Reward 20: -201.620, Len(game): 128, Training Time: 2.131s, Prediction Time: 7.407s
    Computed global error Bellman mean:  1.9583929515387393e-07  iter:  5
    label KQLearning, Reward 21: -125.644, Len(game): 117, Training Time: 2.186s, Prediction Time: 8.182s
    Computed global error Bellman mean:  0.05815241736539713  iter:  5
    Computed global error Bellman mean:  0.02677875059985211  iter:  5
    label KQLearning, Reward 22: -217.014, Len(game): 118, Training Time: 2.323s, Prediction Time: 8.978s
    Computed global error Bellman mean:  2.530994727251449e-08  iter:  3
    Computed global error Bellman mean:  0.02677600244381529  iter:  2
    label KQLearning, Reward 23: -123.391, Len(game): 57, Training Time: 2.398s, Prediction Time: 9.386s
    Computed global error Bellman mean:  3.548205378400837e-07  iter:  5
    label KQLearning, Reward 24: -2.588, Len(game): 92, Training Time: 2.436s, Prediction Time: 10.063s
    Computed global error Bellman mean:  4.144973016135494e-07  iter:  5
    label KQLearning, Reward 25: -97.150, Len(game): 70, Training Time: 2.464s, Prediction Time: 10.591s
    Computed global error Bellman mean:  0.6790841272588155  iter:  5
    Computed global error Bellman mean:  0.01880443414888047  iter:  5
    label KQLearning, Reward 26: -129.596, Len(game): 118, Training Time: 2.606s, Prediction Time: 11.489s
    Computed global error Bellman mean:  6.261852524217356e-07  iter:  5
    Computed global error Bellman mean:  0.01880443414888047  iter:  0
    label KQLearning, Reward 27: -157.306, Len(game): 140, Training Time: 2.706s, Prediction Time: 12.604s
    Computed global error Bellman mean:  3.1689305608928463e-07  iter:  4
    label KQLearning, Reward 28: -119.922, Len(game): 75, Training Time: 2.736s, Prediction Time: 13.218s
    Computed global error Bellman mean:  0.009533756980243  iter:  5
    label KQLearning, Reward 29: 39.082, Len(game): 122, Training Time: 2.798s, Prediction Time: 14.270s
    Computed global error Bellman mean:  0.003404561565090435  iter:  5
    label KQLearning, Reward 30: -234.000, Len(game): 158, Training Time: 2.890s, Prediction Time: 15.667s
    Computed global error Bellman mean:  0.03960780091341623  iter:  5
    Computed global error Bellman mean:  0.009449045477446339  iter:  5
    label KQLearning, Reward 31: -117.435, Len(game): 144, Training Time: 3.122s, Prediction Time: 17.007s
    Computed global error Bellman mean:  0.18783579814596527  iter:  5
    Computed global error Bellman mean:  0.02035434571222894  iter:  5
    label KQLearning, Reward 32: -319.152, Len(game): 71, Training Time: 3.187s, Prediction Time: 17.696s
    Computed global error Bellman mean:  0.0154158768209216  iter:  5
    Computed global error Bellman mean:  0.020309987993327658  iter:  1
    Computed global error Bellman mean:  0.0073802352033646304  iter:  5
    label KQLearning, Reward 33: -119.783, Len(game): 74, Training Time: 3.281s, Prediction Time: 18.423s
    Computed global error Bellman mean:  3.162234209349161e-07  iter:  5
    label KQLearning, Reward 34: -53.407, Len(game): 77, Training Time: 3.315s, Prediction Time: 19.196s
    Computed global error Bellman mean:  6.136186387437214e-07  iter:  5
    label KQLearning, Reward 35: -60.238, Len(game): 97, Training Time: 3.359s, Prediction Time: 20.199s
    Computed global error Bellman mean:  5.47988723438321e-05  iter:  5
    label KQLearning, Reward 36: -116.926, Len(game): 89, Training Time: 3.405s, Prediction Time: 21.140s
    Computed global error Bellman mean:  1.208702274299961e-07  iter:  5
    label KQLearning, Reward 37: -84.143, Len(game): 97, Training Time: 3.451s, Prediction Time: 22.185s
    Computed global error Bellman mean:  1.0152060020348307e-07  iter:  5
    label KQLearning, Reward 38: -146.181, Len(game): 79, Training Time: 3.485s, Prediction Time: 23.051s
    Computed global error Bellman mean:  7.532708188289975e-08  iter:  3
    label KQLearning, Reward 39: -121.399, Len(game): 79, Training Time: 3.512s, Prediction Time: 23.937s
    Computed global error Bellman mean:  3.2772637445152575e-07  iter:  5
    label KQLearning, Reward 40: -29.127, Len(game): 128, Training Time: 3.578s, Prediction Time: 25.412s
    Computed global error Bellman mean:  0.0002285378999159503  iter:  5
    label KQLearning, Reward 41: -164.180, Len(game): 157, Training Time: 3.666s, Prediction Time: 27.241s
    Computed global error Bellman mean:  0.0021274911285027523  iter:  5
    label KQLearning, Reward 42: -128.157, Len(game): 66, Training Time: 3.693s, Prediction Time: 28.052s
    Computed global error Bellman mean:  0.15732504860907268  iter:  5
    Computed global error Bellman mean:  0.007424960495000498  iter:  5
    label KQLearning, Reward 43: -309.978, Len(game): 103, Training Time: 3.808s, Prediction Time: 29.293s
    Computed global error Bellman mean:  2.840977078238798e-07  iter:  5
    label KQLearning, Reward 44: -42.808, Len(game): 119, Training Time: 3.858s, Prediction Time: 30.801s
    Computed global error Bellman mean:  0.00030923682043483086  iter:  5
    label KQLearning, Reward 45: -205.911, Len(game): 218, Training Time: 4.032s, Prediction Time: 33.577s
    Computed global error Bellman mean:  2.1282839249811437e-07  iter:  5
    label KQLearning, Reward 46: -203.133, Len(game): 115, Training Time: 4.089s, Prediction Time: 35.100s
    Computed global error Bellman mean:  5.204880500451022e-07  iter:  5
    label KQLearning, Reward 47: -84.245, Len(game): 171, Training Time: 4.192s, Prediction Time: 37.424s
    Computed global error Bellman mean:  2.503797588159602e-07  iter:  4
    label KQLearning, Reward 48: -18.125, Len(game): 107, Training Time: 4.237s, Prediction Time: 38.926s
    Computed global error Bellman mean:  0.011789465241330416  iter:  5
    Computed global error Bellman mean:  0.05543863172482571  iter:  0
    label KQLearning, Reward 49: -110.104, Len(game): 123, Training Time: 4.322s, Prediction Time: 40.700s
    Computed global error Bellman mean:  0.1959760255735043  iter:  5
    Computed global error Bellman mean:  0.008643040231840576  iter:  5
    label KQLearning, Reward 50: -98.471, Len(game): 52, Training Time: 4.363s, Prediction Time: 41.465s
    Computed global error Bellman mean:  2.7521039868641024e-08  iter:  4
    label KQLearning, Reward 51: -178.284, Len(game): 73, Training Time: 4.388s, Prediction Time: 42.550s
    Computed global error Bellman mean:  0.0004555004023591073  iter:  5
    label KQLearning, Reward 52: -146.419, Len(game): 179, Training Time: 4.499s, Prediction Time: 45.209s
    Computed global error Bellman mean:  1.0163691674797325e-07  iter:  4
    label KQLearning, Reward 53: -121.030, Len(game): 89, Training Time: 4.536s, Prediction Time: 46.580s
    Computed global error Bellman mean:  0.012641620007352687  iter:  5
    Computed global error Bellman mean:  0.046800591951224296  iter:  0
    label KQLearning, Reward 54: -97.419, Len(game): 140, Training Time: 4.646s, Prediction Time: 48.763s
    Computed global error Bellman mean:  3.572373777531067e-05  iter:  5
    label KQLearning, Reward 55: -501.496, Len(game): 176, Training Time: 4.744s, Prediction Time: 51.561s
    Computed global error Bellman mean:  5.092050690066715e-07  iter:  4
    label KQLearning, Reward 56: -156.934, Len(game): 119, Training Time: 4.792s, Prediction Time: 53.510s
    Computed global error Bellman mean:  1.3249586081752544e-07  iter:  4
    label KQLearning, Reward 57: -59.142, Len(game): 101, Training Time: 4.832s, Prediction Time: 55.197s
    Computed global error Bellman mean:  0.0008537668046471069  iter:  5
    label KQLearning, Reward 58: -59.348, Len(game): 104, Training Time: 4.881s, Prediction Time: 56.964s
    Computed global error Bellman mean:  0.0003649535757385929  iter:  5
    label KQLearning, Reward 59: -90.430, Len(game): 105, Training Time: 4.924s, Prediction Time: 58.753s
    Computed global error Bellman mean:  9.117884074816971e-06  iter:  5
    label KQLearning, Reward 60: -155.125, Len(game): 162, Training Time: 5.018s, Prediction Time: 61.601s
    Computed global error Bellman mean:  1.0825032417671743e-07  iter:  5
    label KQLearning, Reward 61: -82.988, Len(game): 96, Training Time: 5.060s, Prediction Time: 63.307s
    Computed global error Bellman mean:  0.0013346809601749632  iter:  5
    label KQLearning, Reward 62: -222.612, Len(game): 139, Training Time: 5.138s, Prediction Time: 65.804s
    Computed global error Bellman mean:  5.090026332355291e-07  iter:  5
    label KQLearning, Reward 63: -152.451, Len(game): 154, Training Time: 5.220s, Prediction Time: 68.643s
    Computed global error Bellman mean:  0.004513168539569411  iter:  5
    label KQLearning, Reward 64: -294.110, Len(game): 143, Training Time: 5.300s, Prediction Time: 71.337s
    Computed global error Bellman mean:  1.9014556235134658e-07  iter:  4
    label KQLearning, Reward 65: -123.407, Len(game): 149, Training Time: 5.373s, Prediction Time: 74.203s
    Computed global error Bellman mean:  2.484734640619861e-07  iter:  5
    label KQLearning, Reward 66: -105.602, Len(game): 118, Training Time: 5.434s, Prediction Time: 76.510s
    Computed global error Bellman mean:  4.619221917740615e-07  iter:  5
    label KQLearning, Reward 67: -116.142, Len(game): 115, Training Time: 5.491s, Prediction Time: 78.795s
    Computed global error Bellman mean:  0.001422132753413292  iter:  5
    label KQLearning, Reward 68: -93.970, Len(game): 98, Training Time: 5.537s, Prediction Time: 80.762s
    Computed global error Bellman mean:  2.61457428500898e-07  iter:  5
    label KQLearning, Reward 69: -22.622, Len(game): 93, Training Time: 5.577s, Prediction Time: 82.677s
    Computed global error Bellman mean:  0.0025050210505242108  iter:  5
    label KQLearning, Reward 70: -5.392, Len(game): 119, Training Time: 5.637s, Prediction Time: 85.150s
    Computed global error Bellman mean:  3.583561777771582e-07  iter:  5
    label KQLearning, Reward 71: -129.371, Len(game): 128, Training Time: 5.701s, Prediction Time: 87.859s
    Computed global error Bellman mean:  0.004820714404071528  iter:  5
    label KQLearning, Reward 72: -72.912, Len(game): 108, Training Time: 5.751s, Prediction Time: 90.181s
    Computed global error Bellman mean:  9.461471605248522e-05  iter:  5
    label KQLearning, Reward 73: -105.822, Len(game): 122, Training Time: 5.811s, Prediction Time: 92.829s
    Computed global error Bellman mean:  0.09486958154788548  iter:  5
    Computed global error Bellman mean:  0.0016687594018044314  iter:  1
    label KQLearning, Reward 74: -94.668, Len(game): 59, Training Time: 5.846s, Prediction Time: 94.156s
    Computed global error Bellman mean:  0.00031950302598816345  iter:  5
    label KQLearning, Reward 75: -181.803, Len(game): 103, Training Time: 5.892s, Prediction Time: 96.432s
    Computed global error Bellman mean:  0.011202575501709945  iter:  5
    Computed global error Bellman mean:  0.013921933623654334  iter:  5
    label KQLearning, Reward 76: -150.242, Len(game): 155, Training Time: 6.137s, Prediction Time: 99.916s
    Computed global error Bellman mean:  0.021038602882958946  iter:  5
    Computed global error Bellman mean:  0.09897872040415234  iter:  0
    label KQLearning, Reward 77: -13.402, Len(game): 370, Training Time: 6.852s, Prediction Time: 108.332s
    Computed global error Bellman mean:  0.007980741227699435  iter:  5
    label KQLearning, Reward 78: -163.447, Len(game): 100, Training Time: 6.899s, Prediction Time: 110.691s
    Computed global error Bellman mean:  1.8182842907339178e-07  iter:  5
    label KQLearning, Reward 79: -133.992, Len(game): 138, Training Time: 6.973s, Prediction Time: 113.949s
    Computed global error Bellman mean:  0.05116569406321372  iter:  5
    Computed global error Bellman mean:  0.0012370798462112086  iter:  5
    label KQLearning, Reward 80: -196.670, Len(game): 200, Training Time: 7.398s, Prediction Time: 118.742s
    Computed global error Bellman mean:  0.00019546730587924641  iter:  5
    label KQLearning, Reward 81: -35.824, Len(game): 108, Training Time: 7.452s, Prediction Time: 121.398s
    Computed global error Bellman mean:  3.0157625469636436e-07  iter:  5
    label KQLearning, Reward 82: -111.068, Len(game): 113, Training Time: 7.509s, Prediction Time: 124.204s
    Computed global error Bellman mean:  0.012549751185760634  iter:  5
    Computed global error Bellman mean:  0.02876021067227448  iter:  1
    label KQLearning, Reward 83: -98.277, Len(game): 117, Training Time: 7.606s, Prediction Time: 127.154s
    Computed global error Bellman mean:  0.0021161812979306215  iter:  5
    label KQLearning, Reward 84: -31.986, Len(game): 100, Training Time: 7.647s, Prediction Time: 129.722s
    Computed global error Bellman mean:  0.007686386470016417  iter:  5
    label KQLearning, Reward 85: -129.342, Len(game): 199, Training Time: 7.796s, Prediction Time: 134.826s
    Computed global error Bellman mean:  0.006022229526358748  iter:  5
    label KQLearning, Reward 86: -33.197, Len(game): 215, Training Time: 7.971s, Prediction Time: 140.428s
    Computed global error Bellman mean:  0.024062407919038683  iter:  5
    Computed global error Bellman mean:  0.05594900899344003  iter:  0
    label KQLearning, Reward 87: -72.977, Len(game): 93, Training Time: 8.021s, Prediction Time: 142.897s
    Computed global error Bellman mean:  0.004463153589297446  iter:  5
    label KQLearning, Reward 88: -14.426, Len(game): 126, Training Time: 8.086s, Prediction Time: 146.300s
    Computed global error Bellman mean:  2.2844102611080386e-07  iter:  5
    label KQLearning, Reward 89: -162.264, Len(game): 160, Training Time: 8.179s, Prediction Time: 150.642s
    Computed global error Bellman mean:  6.328115239436929e-07  iter:  5
    label KQLearning, Reward 90: -121.643, Len(game): 119, Training Time: 8.238s, Prediction Time: 153.919s
    Computed global error Bellman mean:  0.0013526667514851798  iter:  5
    label KQLearning, Reward 91: -170.828, Len(game): 130, Training Time: 8.303s, Prediction Time: 157.533s
    Computed global error Bellman mean:  3.914867129444644e-07  iter:  5
    label KQLearning, Reward 92: -127.942, Len(game): 242, Training Time: 8.512s, Prediction Time: 164.247s
    Computed global error Bellman mean:  0.014867821455283158  iter:  5
    Computed global error Bellman mean:  0.001951332574971139  iter:  5
    label KQLearning, Reward 93: -23.825, Len(game): 88, Training Time: 8.600s, Prediction Time: 166.739s
    Computed global error Bellman mean:  0.0022563746800983893  iter:  5
    label KQLearning, Reward 94: -79.366, Len(game): 113, Training Time: 8.654s, Prediction Time: 169.979s
    Computed global error Bellman mean:  0.007368897319150435  iter:  5
    label KQLearning, Reward 95: -106.928, Len(game): 112, Training Time: 8.704s, Prediction Time: 173.217s
    Computed global error Bellman mean:  0.4083079100777788  iter:  5
    Computed global error Bellman mean:  0.04476234941152717  iter:  2
    label KQLearning, Reward 96: -128.432, Len(game): 88, Training Time: 8.774s, Prediction Time: 175.802s
    Computed global error Bellman mean:  0.022882453257802766  iter:  5
    Computed global error Bellman mean:  0.04476234941152717  iter:  0
    Computed global error Bellman mean:  0.01976457817603097  iter:  3
    label KQLearning, Reward 97: -123.442, Len(game): 255, Training Time: 9.365s, Prediction Time: 183.280s
    Computed global error Bellman mean:  3.740304578463439e-07  iter:  5
    label KQLearning, Reward 98: -221.741, Len(game): 189, Training Time: 9.506s, Prediction Time: 188.951s
    Computed global error Bellman mean:  0.032296455106274734  iter:  5
    Computed global error Bellman mean:  0.0494154502474197  iter:  1
    label KQLearning, Reward 99: -204.370, Len(game): 97, Training Time: 9.580s, Prediction Time: 191.913s
    2


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** (21 minutes 1.483 seconds)


.. _sphx_glr_download_auto_ch8_ch8_lunarlander.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: ch8_lunarlander.ipynb <ch8_lunarlander.ipynb>`

    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: ch8_lunarlander.py <ch8_lunarlander.py>`

    .. container:: sphx-glr-download sphx-glr-download-zip

      :download:`Download zipped: ch8_lunarlander.zip <ch8_lunarlander.zip>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_