8.2 Experiments - Cartpole

We use the OpenAI Gym library to instanciate the gymnasium CartPole-v1 environment and reproduce the figure from chapter 8_XXX.

We train the following agents:

  • PPO

  • DQN

  • Controller-based

  • Kernel Actor-Critic

  • Kernel Q-Learning

  • Kernel Q-Learning HJB

  • Kernel Policy-Gradient

We show how you can tweak some methods in each algorithm to tune them to the environment. For a detailed documentation on KAgents, see codpy documentation.

# Importing necessary modules
import sys

from matplotlib import pyplot as plt
import numpy as np

import codpy.core as core
import codpy.KQLearning as KQLearning

from ignore_utils import *

KQLearning

class KQLearningCP(KQLearning.KQLearning):

    def format(self, sarsd, max_training_game_size=None, **kwargs):
        """
        In Cartpole, we only want to keep a certain amount of timesteps for each episode. The original format approach keep all the data.
        """
        states, actions, next_states, rewards, dones = [
            core.get_matrix(e) for e in sarsd
        ]

        actions = KQLearning.rl_hot_encoder(actions, self.actions_dim)
        returns = self.compute_returns(
            states, actions, next_states, rewards, dones, **kwargs
        )
        dones = core.get_matrix(dones, dtype=bool)
        if max_training_game_size is not None:
            states, actions, next_states, rewards, returns, dones = (
                states[:max_training_game_size],
                actions[:max_training_game_size],
                next_states[:max_training_game_size],
                rewards[:max_training_game_size],
                returns[:max_training_game_size],
                dones[:max_training_game_size],
            )

        return states, actions, next_states, rewards, returns, dones

    def train(self, game, max_training_game_size =sys.maxsize,tol=1e-4,**kwargs):
        """
        In cartpole we don't want clustering so we override the train method.
        """
        states, actions, next_states, rewards, dones = game

        # In cartpole we skip training if we already solved the environment.
        if len(states) >= kwargs.get("max_game", 1e12):
            print("no training")
            return
        states, actions, next_states, rewards, returns, dones = self.format(game, max_training_game_size=max_training_game_size,**kwargs)
        if self.critic.is_valid():
            returns = self.critic(np.concatenate([states,actions],axis=1))

        self.replay_buffer.push(states, actions, next_states, rewards, returns, dones)
        games = self.replay_buffer.memory

        # self.critic here is a kernel, and it fit on the entire replay buffer to solve for Bellman equations.
        self.critic = self.optimal_states_values_function(games,verbose=True,**kwargs)
        return

PolicyGradient

class PolicyGradientCP(KQLearning.PolicyGradient):

    def format(self, sarsd, max_training_game_size=None, **kwargs):
        states, actions, next_states, rewards, dones = [
            core.get_matrix(e) for e in sarsd
        ]

        actions = KQLearning.rl_hot_encoder(actions, self.actions_dim)
        returns = self.compute_returns(
            states, actions, next_states, rewards, dones, **kwargs
        )
        dones = core.get_matrix(dones, dtype=bool)
        if max_training_game_size is not None:
            states, actions, next_states, rewards, returns, dones = (
                states[:max_training_game_size],
                actions[:max_training_game_size],
                next_states[:max_training_game_size],
                rewards[:max_training_game_size],
                returns[:max_training_game_size],
                dones[:max_training_game_size],
            )

        return states, actions, next_states, rewards, returns, dones


    def train(self, game, **kwargs):
        states, actions, next_states, rewards, dones = game
        if len(states) >= kwargs.get("max_game", 1e12):
            print("no training")
            return
        super().train(game,clip=1., **kwargs)

KActorCritic

class KActorCriticCP(KQLearning.KActorCritic):

    def format(self, sarsd, max_training_game_size=None, **kwargs):
        """
        Format the game data by keeping only up to max_trainin_game_size timesteps.

        Parameters:
        - sarsd: tuple collection of game data (states, actions, next_states, rewards, dones).
        - max_training_game_size: maximum number of timesteps to keep for training.

        Returns:
        - states, actions, next_states, rewards, returns, dones: formatted game data.
        """
        states, actions, next_states, rewards, dones = [
            core.get_matrix(e) for e in sarsd
        ]

        actions = KQLearning.rl_hot_encoder(actions, self.actions_dim)
        returns = self.compute_returns(
            states, actions, next_states, rewards, dones, **kwargs
        )
        dones = core.get_matrix(dones, dtype=bool)
        if max_training_game_size is not None:
            states, actions, next_states, rewards, returns, dones = (
                states[:max_training_game_size],
                actions[:max_training_game_size],
                next_states[:max_training_game_size],
                rewards[:max_training_game_size],
                returns[:max_training_game_size],
                dones[:max_training_game_size],
            )

        return states, actions, next_states, rewards, returns, dones

    def train(self, game, **kwargs):
        """
        Skips training if the game was too long. (for cartpole, this means we already solved the environment.)
        """
        states, actions, next_states, rewards, dones = game
        if len(states) >= kwargs.get("max_game", 1e12):
            print("no training")
            return
        super().train(game, clip=1.,**kwargs)

HJB

class KQLearningHJBCP(KQLearning.KQLearningHJB):

    def format(self, sarsd, max_training_game_size=None, **kwargs):
        states, actions, next_states, rewards, dones = [
            core.get_matrix(e) for e in sarsd
        ]

        actions = KQLearning.rl_hot_encoder(actions, self.actions_dim)
        returns = self.compute_returns(
            states, actions, next_states, rewards, dones, **kwargs
        )
        dones = core.get_matrix(dones, dtype=bool)
        if max_training_game_size is not None:
            states, actions, next_states, rewards, returns, dones = (
                states[:max_training_game_size],
                actions[:max_training_game_size],
                next_states[:max_training_game_size],
                rewards[:max_training_game_size],
                returns[:max_training_game_size],
                dones[:max_training_game_size],
            )

        return states, actions, next_states, rewards, returns, dones


    def train(self, game, max_training_game_size =sys.maxsize,tol=1e-4,**kwargs):
        states, actions, next_states, rewards, dones = game

        if len(states) >= kwargs.get("max_game", 1e12):
            print("no training")
            return
        states, actions, next_states, rewards, returns, dones = self.format(game, max_training_game_size=max_training_game_size,**kwargs)

        self.replay_buffer.push(states, actions, next_states, rewards, returns, dones)
        games = self.replay_buffer.memory
        states, actions, next_states, rewards, returns, dones = games
        if self.critic.is_valid(): #This function returns False if the kernel hasn't be properly initialized, i.e x and fx haven't been set.
            # We compute returns using the critic instead of MC returns.
            returns = self.critic(np.concatenate([states,actions],axis=1))
            games = states, actions, next_states, rewards, returns, dones

        self.critic = self.optimal_states_values_function(games,verbose=True,**kwargs)
        return

KController

class heuristic_ControllerCP:
    """
    This class defines an expert-based heuristic controller for the CartPole environment.
    """
    # This is the number of parameters to be optimized
    dim = 4

    def __init__(self, w=None, **kwargs):
        if w is None:
            self.w = np.ones([self.dim]) * 0.5
        else:
            self.w = w
        pass

    def get_distribution(self):
        """
        This will be called by the optimizer. You need to define a way to sample from the parameters distribution, and get the support.
        """
        class uniform:
            def __init__(self, shape1):
                self.shape1 = shape1

            def __call__(self, n):
                return 2 * np.random.uniform(size=[n, self.shape1]) - 1

            def support(self, v):
                return v

        return uniform(self.w.shape[0])

    def get_thetas(self):
        return self.w

    def set_thetas(self, w):
        self.w = w.flatten()

    def __call__(self, s, **kwargs):
        """
        Will be used to make inference. This is where you define the action to be taken.

        Parameters:
        - s : state of the environment, a numpy array of shape (n, state_dim).

        Returns:
        - prod: int, action to be taken
        """
        prod = (self.w * s).sum()
        prod = int((np.sign(prod) + 1) / 2)
        return prod

class KControllerCP(KQLearning.KController):
    """
    This is the main class which will optimize the heuristic controller.
    """
    def __init__(self, state_dim, actions_dim, **kwargs):
        # This is where you would pass any other custom controller
        controller = heuristic_ControllerCP(state_dim=state_dim, **kwargs)
        super().__init__(state_dim, actions_dim, controller, **kwargs)

    def get_function(self, **kwargs):
        """
        The optimizer will find the best parameters which maximizes this function.

        This is where you would tweak the function to be maximized.
        """
        self.expectation_estimator = self.get_expectation_estimator(self.x, self.y, **kwargs)
        def function(x):
            expectation = self.expectation_estimator(x)
            distance = self.expectation_estimator.distance(x)
            return expectation * distance
        return function


    def format(self, sarsd, **kwargs):
        """
        In the case of the controller, the agent only sees the sum of the rewards for an entire episode.
        All other game data won't be used for training. The format function still need to output a tuple.
        """
        state, action, next_state, reward, done = [
            core.get_matrix(e) for e in sarsd
        ]
        reward[done.astype(bool)] = 0

        action = KQLearning.rl_hot_encoder(action, self.actions_dim)
        action = core.get_matrix(self.controller.get_thetas()).T
        done = core.get_matrix(done, dtype=bool)
        return (
            core.get_matrix(state.mean(axis=0)).T,
            core.get_matrix(action.mean(axis=0)).T,
            core.get_matrix(next_state.mean(axis=0)).T,
            core.get_matrix(reward.sum(axis=0)).T,
            core.get_matrix(done.mean(axis=0)).T,
        )

    def train(self, game, **kwargs):
        # Similarily, you can skip training if the game is too long to save training time.
        states, actions, next_states, rewards, dones = game
        if len(states) >= kwargs.get("max_game", 1e12):
            print("no training")
            return
        super().train(game, **kwargs)

if __name__ == "__main__":
    # Define agents here, which will be trained in the benchmark. If game_dictionnary is empty, the benchmark will try to load data from the .pkl file
    game_dictionary = {
        "PPOAgent": PPOAgent,
        "PolicyGradient": PolicyGradientCP,
        "Controller-based": KControllerCP,
        "KACAgent": KActorCriticCP,
        "DQNAgent": DQNAgent,
        "KQLearningHJBCP": KQLearningHJBCP,
        "KQLearning": KQLearningCP,
    }

    # Define your agent's parameters here. This dict will be passed in each agent's __init__() method.
    extras = {
        # "D":4,
        "KActor": {"n_batch": 1000000, "max_nystrom": 1000, "reg": 1e-9, "order": None},
        "KCritic": {
            "n_batch": 1000000,
            "max_nystrom": 1000,
            "reg": 1e-9,
            "order": None,
        },
        "Rewards": {
            "n_batch": 1000000,
            "max_nystrom": 1000,
            "reg": 1e-9,
            "order": None,
        },
        "DQNAgent": {
            # 'reward_function': mc_reward_function,
            "episodes": 500,
            "policy_param": 64,
            "target_param": 64,
        },
        "KController": {
            "reg": 1e-3,
            "order": None,
        },
        "HJBModel": {
            # "latent_shape":[100,50],
            "max_size": 100000,
            "n_batch": 1000000,
            "max_nystrom": 1000,
            "reg": 1e-9,
            "order": None,
            "state_dim": 4,
        },
        "max_game": 1000,
        "max_training_game_size": 1000,
        "gamma": 0.99,
        "capacity": 200000000,
        # "seed": 42,
    }
    seed = extras.get("seed", None)
    np.random.seed(seed)

    Benchmark()(
        game_dictionary,
        "CartPole-v1",
        num_games=100,
        num_repeats=3,
        max_time=3,
        axis="episode",
        # file_name="results_CP_final.pkl",
        **extras,
    )
    plt.show()
    pass
  • Cumulative Reward over 100 Games
  • Training Time per Game over 100 Games
label PPOAgent, Reward 0: 27.000, Len(game): 27, Training Time: 0.006s, Prediction Time: 0.006s
label PPOAgent, Reward 1: 25.000, Len(game): 25, Training Time: 0.010s, Prediction Time: 0.010s
label PPOAgent, Reward 2: 16.000, Len(game): 16, Training Time: 0.013s, Prediction Time: 0.013s
label PPOAgent, Reward 3: 13.000, Len(game): 13, Training Time: 0.016s, Prediction Time: 0.016s
label PPOAgent, Reward 4: 15.000, Len(game): 15, Training Time: 0.019s, Prediction Time: 0.019s
label PPOAgent, Reward 5: 12.000, Len(game): 12, Training Time: 0.021s, Prediction Time: 0.021s
label PPOAgent, Reward 6: 17.000, Len(game): 17, Training Time: 0.024s, Prediction Time: 0.024s
label PPOAgent, Reward 7: 33.000, Len(game): 33, Training Time: 0.030s, Prediction Time: 0.030s
label PPOAgent, Reward 8: 16.000, Len(game): 16, Training Time: 0.032s, Prediction Time: 0.032s
label PPOAgent, Reward 9: 27.000, Len(game): 27, Training Time: 0.037s, Prediction Time: 0.037s
label PPOAgent, Reward 10: 12.000, Len(game): 12, Training Time: 0.040s, Prediction Time: 0.040s
label PPOAgent, Reward 11: 14.000, Len(game): 14, Training Time: 0.042s, Prediction Time: 0.042s
label PPOAgent, Reward 12: 12.000, Len(game): 12, Training Time: 0.045s, Prediction Time: 0.045s
label PPOAgent, Reward 13: 20.000, Len(game): 20, Training Time: 0.048s, Prediction Time: 0.048s
label PPOAgent, Reward 14: 16.000, Len(game): 16, Training Time: 0.052s, Prediction Time: 0.052s
label PPOAgent, Reward 15: 37.000, Len(game): 37, Training Time: 0.058s, Prediction Time: 0.058s
label PPOAgent, Reward 16: 25.000, Len(game): 25, Training Time: 0.063s, Prediction Time: 0.063s
label PPOAgent, Reward 17: 10.000, Len(game): 10, Training Time: 0.065s, Prediction Time: 0.065s
label PPOAgent, Reward 18: 17.000, Len(game): 17, Training Time: 0.068s, Prediction Time: 0.068s
label PPOAgent, Reward 19: 17.000, Len(game): 17, Training Time: 0.072s, Prediction Time: 0.072s
label PPOAgent, Reward 20: 18.000, Len(game): 18, Training Time: 0.075s, Prediction Time: 0.075s
label PPOAgent, Reward 21: 16.000, Len(game): 16, Training Time: 0.078s, Prediction Time: 0.078s
label PPOAgent, Reward 22: 15.000, Len(game): 15, Training Time: 0.081s, Prediction Time: 0.081s
label PPOAgent, Reward 23: 14.000, Len(game): 14, Training Time: 0.083s, Prediction Time: 0.083s
label PPOAgent, Reward 24: 15.000, Len(game): 15, Training Time: 0.087s, Prediction Time: 0.087s
label PPOAgent, Reward 25: 21.000, Len(game): 21, Training Time: 0.091s, Prediction Time: 0.091s
label PPOAgent, Reward 26: 20.000, Len(game): 20, Training Time: 0.094s, Prediction Time: 0.094s
label PPOAgent, Reward 27: 11.000, Len(game): 11, Training Time: 0.096s, Prediction Time: 0.096s
label PPOAgent, Reward 28: 12.000, Len(game): 12, Training Time: 0.099s, Prediction Time: 0.099s
label PPOAgent, Reward 29: 19.000, Len(game): 19, Training Time: 0.102s, Prediction Time: 0.102s
label PPOAgent, Reward 30: 10.000, Len(game): 10, Training Time: 0.104s, Prediction Time: 0.104s
label PPOAgent, Reward 31: 17.000, Len(game): 17, Training Time: 0.108s, Prediction Time: 0.108s
label PPOAgent, Reward 32: 29.000, Len(game): 29, Training Time: 0.113s, Prediction Time: 0.113s
label PPOAgent, Reward 33: 15.000, Len(game): 15, Training Time: 0.117s, Prediction Time: 0.117s
label PPOAgent, Reward 34: 15.000, Len(game): 15, Training Time: 0.119s, Prediction Time: 0.119s
label PPOAgent, Reward 35: 15.000, Len(game): 15, Training Time: 0.122s, Prediction Time: 0.122s
label PPOAgent, Reward 36: 32.000, Len(game): 32, Training Time: 0.128s, Prediction Time: 0.128s
label PPOAgent, Reward 37: 22.000, Len(game): 22, Training Time: 0.133s, Prediction Time: 0.133s
label PPOAgent, Reward 38: 19.000, Len(game): 19, Training Time: 0.137s, Prediction Time: 0.137s
label PPOAgent, Reward 39: 12.000, Len(game): 12, Training Time: 0.138s, Prediction Time: 0.138s
label PPOAgent, Reward 40: 16.000, Len(game): 16, Training Time: 0.141s, Prediction Time: 0.141s
label PPOAgent, Reward 41: 12.000, Len(game): 12, Training Time: 0.143s, Prediction Time: 0.143s
label PPOAgent, Reward 42: 31.000, Len(game): 31, Training Time: 0.150s, Prediction Time: 0.150s
label PPOAgent, Reward 43: 12.000, Len(game): 12, Training Time: 0.151s, Prediction Time: 0.151s
label PPOAgent, Reward 44: 10.000, Len(game): 10, Training Time: 0.153s, Prediction Time: 0.153s
label PPOAgent, Reward 45: 50.000, Len(game): 50, Training Time: 0.162s, Prediction Time: 0.162s
label PPOAgent, Reward 46: 17.000, Len(game): 17, Training Time: 0.165s, Prediction Time: 0.165s
label PPOAgent, Reward 47: 28.000, Len(game): 28, Training Time: 0.172s, Prediction Time: 0.172s
label PPOAgent, Reward 48: 19.000, Len(game): 19, Training Time: 0.175s, Prediction Time: 0.175s
label PPOAgent, Reward 49: 13.000, Len(game): 13, Training Time: 0.178s, Prediction Time: 0.178s
label PPOAgent, Reward 50: 20.000, Len(game): 20, Training Time: 0.182s, Prediction Time: 0.182s
label PPOAgent, Reward 51: 9.000, Len(game): 9, Training Time: 0.184s, Prediction Time: 0.184s
label PPOAgent, Reward 52: 16.000, Len(game): 16, Training Time: 0.188s, Prediction Time: 0.188s
label PPOAgent, Reward 53: 42.000, Len(game): 42, Training Time: 0.195s, Prediction Time: 0.195s
label PPOAgent, Reward 54: 23.000, Len(game): 23, Training Time: 0.200s, Prediction Time: 0.200s
label PPOAgent, Reward 55: 9.000, Len(game): 9, Training Time: 0.202s, Prediction Time: 0.202s
label PPOAgent, Reward 56: 19.000, Len(game): 19, Training Time: 0.206s, Prediction Time: 0.206s
label PPOAgent, Reward 57: 12.000, Len(game): 12, Training Time: 0.208s, Prediction Time: 0.208s
label PPOAgent, Reward 58: 35.000, Len(game): 35, Training Time: 0.215s, Prediction Time: 0.215s
label PPOAgent, Reward 59: 18.000, Len(game): 18, Training Time: 0.218s, Prediction Time: 0.218s
label PPOAgent, Reward 60: 17.000, Len(game): 17, Training Time: 0.221s, Prediction Time: 0.221s
label PPOAgent, Reward 61: 18.000, Len(game): 18, Training Time: 0.225s, Prediction Time: 0.225s
label PPOAgent, Reward 62: 15.000, Len(game): 15, Training Time: 0.228s, Prediction Time: 0.228s
label PPOAgent, Reward 63: 24.000, Len(game): 24, Training Time: 0.273s, Prediction Time: 0.273s
label PPOAgent, Reward 64: 79.000, Len(game): 79, Training Time: 0.288s, Prediction Time: 0.288s
label PPOAgent, Reward 65: 46.000, Len(game): 46, Training Time: 0.296s, Prediction Time: 0.296s
label PPOAgent, Reward 66: 9.000, Len(game): 9, Training Time: 0.298s, Prediction Time: 0.298s
label PPOAgent, Reward 67: 37.000, Len(game): 37, Training Time: 0.305s, Prediction Time: 0.305s
label PPOAgent, Reward 68: 45.000, Len(game): 45, Training Time: 0.314s, Prediction Time: 0.314s
label PPOAgent, Reward 69: 13.000, Len(game): 13, Training Time: 0.316s, Prediction Time: 0.316s
label PPOAgent, Reward 70: 30.000, Len(game): 30, Training Time: 0.322s, Prediction Time: 0.322s
label PPOAgent, Reward 71: 9.000, Len(game): 9, Training Time: 0.324s, Prediction Time: 0.324s
label PPOAgent, Reward 72: 27.000, Len(game): 27, Training Time: 0.330s, Prediction Time: 0.330s
label PPOAgent, Reward 73: 10.000, Len(game): 10, Training Time: 0.332s, Prediction Time: 0.332s
label PPOAgent, Reward 74: 32.000, Len(game): 32, Training Time: 0.339s, Prediction Time: 0.339s
label PPOAgent, Reward 75: 22.000, Len(game): 22, Training Time: 0.343s, Prediction Time: 0.343s
label PPOAgent, Reward 76: 13.000, Len(game): 13, Training Time: 0.345s, Prediction Time: 0.345s
label PPOAgent, Reward 77: 28.000, Len(game): 28, Training Time: 0.351s, Prediction Time: 0.351s
label PPOAgent, Reward 78: 11.000, Len(game): 11, Training Time: 0.353s, Prediction Time: 0.353s
label PPOAgent, Reward 79: 20.000, Len(game): 20, Training Time: 0.357s, Prediction Time: 0.357s
label PPOAgent, Reward 80: 33.000, Len(game): 33, Training Time: 0.362s, Prediction Time: 0.362s
label PPOAgent, Reward 81: 13.000, Len(game): 13, Training Time: 0.366s, Prediction Time: 0.366s
label PPOAgent, Reward 82: 16.000, Len(game): 16, Training Time: 0.369s, Prediction Time: 0.369s
label PPOAgent, Reward 83: 19.000, Len(game): 19, Training Time: 0.373s, Prediction Time: 0.373s
label PPOAgent, Reward 84: 15.000, Len(game): 15, Training Time: 0.376s, Prediction Time: 0.376s
label PPOAgent, Reward 85: 33.000, Len(game): 33, Training Time: 0.383s, Prediction Time: 0.383s
label PPOAgent, Reward 86: 25.000, Len(game): 25, Training Time: 0.388s, Prediction Time: 0.388s
label PPOAgent, Reward 87: 27.000, Len(game): 27, Training Time: 0.392s, Prediction Time: 0.392s
label PPOAgent, Reward 88: 26.000, Len(game): 26, Training Time: 0.398s, Prediction Time: 0.398s
label PPOAgent, Reward 89: 40.000, Len(game): 40, Training Time: 0.405s, Prediction Time: 0.405s
label PPOAgent, Reward 90: 41.000, Len(game): 41, Training Time: 0.413s, Prediction Time: 0.413s
label PPOAgent, Reward 91: 14.000, Len(game): 14, Training Time: 0.415s, Prediction Time: 0.415s
label PPOAgent, Reward 92: 69.000, Len(game): 69, Training Time: 0.428s, Prediction Time: 0.428s
label PPOAgent, Reward 93: 99.000, Len(game): 99, Training Time: 0.447s, Prediction Time: 0.447s
label PPOAgent, Reward 94: 18.000, Len(game): 18, Training Time: 0.450s, Prediction Time: 0.450s
label PPOAgent, Reward 95: 20.000, Len(game): 20, Training Time: 0.454s, Prediction Time: 0.454s
label PPOAgent, Reward 96: 63.000, Len(game): 63, Training Time: 0.464s, Prediction Time: 0.464s
label PPOAgent, Reward 97: 16.000, Len(game): 16, Training Time: 0.468s, Prediction Time: 0.468s
label PPOAgent, Reward 98: 20.000, Len(game): 20, Training Time: 0.472s, Prediction Time: 0.472s
label PPOAgent, Reward 99: 14.000, Len(game): 14, Training Time: 0.474s, Prediction Time: 0.474s
label PolicyGradient, Reward 0: 25.000, Len(game): 25, Training Time: 0.005s, Prediction Time: 0.000s
label PolicyGradient, Reward 1: 30.000, Len(game): 30, Training Time: 0.010s, Prediction Time: 0.005s
label PolicyGradient, Reward 2: 12.000, Len(game): 12, Training Time: 0.016s, Prediction Time: 0.008s
label PolicyGradient, Reward 3: 62.000, Len(game): 62, Training Time: 0.027s, Prediction Time: 0.019s
label PolicyGradient, Reward 4: 17.000, Len(game): 17, Training Time: 0.036s, Prediction Time: 0.025s
label PolicyGradient, Reward 5: 12.000, Len(game): 12, Training Time: 0.047s, Prediction Time: 0.029s
label PolicyGradient, Reward 6: 17.000, Len(game): 17, Training Time: 0.059s, Prediction Time: 0.035s
label PolicyGradient, Reward 7: 32.000, Len(game): 32, Training Time: 0.072s, Prediction Time: 0.044s
label PolicyGradient, Reward 8: 12.000, Len(game): 12, Training Time: 0.086s, Prediction Time: 0.049s
label PolicyGradient, Reward 9: 39.000, Len(game): 39, Training Time: 0.104s, Prediction Time: 0.061s
label PolicyGradient, Reward 10: 18.000, Len(game): 18, Training Time: 0.126s, Prediction Time: 0.070s
label PolicyGradient, Reward 11: 14.000, Len(game): 14, Training Time: 0.147s, Prediction Time: 0.079s
label PolicyGradient, Reward 12: 41.000, Len(game): 41, Training Time: 0.175s, Prediction Time: 0.096s
label PolicyGradient, Reward 13: 55.000, Len(game): 55, Training Time: 0.216s, Prediction Time: 0.117s
label PolicyGradient, Reward 14: 10.000, Len(game): 10, Training Time: 0.254s, Prediction Time: 0.132s
label PolicyGradient, Reward 15: 18.000, Len(game): 18, Training Time: 0.298s, Prediction Time: 0.150s
label PolicyGradient, Reward 16: 12.000, Len(game): 12, Training Time: 0.343s, Prediction Time: 0.166s
label PolicyGradient, Reward 17: 89.000, Len(game): 89, Training Time: 0.405s, Prediction Time: 0.206s
label PolicyGradient, Reward 18: 11.000, Len(game): 11, Training Time: 0.464s, Prediction Time: 0.229s
label PolicyGradient, Reward 19: 94.000, Len(game): 94, Training Time: 0.546s, Prediction Time: 0.276s
label PolicyGradient, Reward 20: 26.000, Len(game): 26, Training Time: 0.636s, Prediction Time: 0.317s
label PolicyGradient, Reward 21: 62.000, Len(game): 62, Training Time: 0.737s, Prediction Time: 0.367s
label PolicyGradient, Reward 22: 77.000, Len(game): 77, Training Time: 0.852s, Prediction Time: 0.428s
label PolicyGradient, Reward 23: 13.000, Len(game): 13, Training Time: 0.972s, Prediction Time: 0.478s
label PolicyGradient, Reward 24: 97.000, Len(game): 97, Training Time: 1.125s, Prediction Time: 0.563s
label PolicyGradient, Reward 25: 108.000, Len(game): 108, Training Time: 1.319s, Prediction Time: 0.665s
label PolicyGradient, Reward 26: 22.000, Len(game): 22, Training Time: 1.512s, Prediction Time: 0.742s
label PolicyGradient, Reward 27: 102.000, Len(game): 102, Training Time: 1.718s, Prediction Time: 0.863s
label PolicyGradient, Reward 28: 109.000, Len(game): 109, Training Time: 1.996s, Prediction Time: 0.999s
label PolicyGradient, Reward 29: 117.000, Len(game): 117, Training Time: 2.310s, Prediction Time: 1.162s
label PolicyGradient, Reward 30: 16.000, Len(game): 16, Training Time: 2.625s, Prediction Time: 1.292s
label PolicyGradient, Reward 31: 149.000, Len(game): 149, Training Time: 2.999s, Prediction Time: 1.518s
label PolicyGradient, Reward 32: 105.000, Len(game): 105, Training Time: 3.434s, Prediction Time: 1.740s
label PolicyGradient, Reward 33: 153.000, Len(game): 153, Training Time: 3.434s, Prediction Time: 2.021s
label PolicyGradient, Reward 34: 117.000, Len(game): 117, Training Time: 3.434s, Prediction Time: 2.108s
label PolicyGradient, Reward 35: 12.000, Len(game): 12, Training Time: 3.434s, Prediction Time: 2.116s
label PolicyGradient, Reward 36: 265.000, Len(game): 265, Training Time: 3.434s, Prediction Time: 2.288s
label PolicyGradient, Reward 37: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 2.299s
label PolicyGradient, Reward 38: 119.000, Len(game): 119, Training Time: 3.434s, Prediction Time: 2.381s
label PolicyGradient, Reward 39: 16.000, Len(game): 16, Training Time: 3.434s, Prediction Time: 2.393s
label PolicyGradient, Reward 40: 176.000, Len(game): 176, Training Time: 3.434s, Prediction Time: 2.522s
label PolicyGradient, Reward 41: 122.000, Len(game): 122, Training Time: 3.434s, Prediction Time: 2.603s
label PolicyGradient, Reward 42: 103.000, Len(game): 103, Training Time: 3.434s, Prediction Time: 2.672s
label PolicyGradient, Reward 43: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 2.683s
label PolicyGradient, Reward 44: 34.000, Len(game): 34, Training Time: 3.434s, Prediction Time: 2.706s
label PolicyGradient, Reward 45: 210.000, Len(game): 210, Training Time: 3.434s, Prediction Time: 2.847s
label PolicyGradient, Reward 46: 128.000, Len(game): 128, Training Time: 3.434s, Prediction Time: 2.930s
label PolicyGradient, Reward 47: 122.000, Len(game): 122, Training Time: 3.434s, Prediction Time: 3.011s
label PolicyGradient, Reward 48: 219.000, Len(game): 219, Training Time: 3.434s, Prediction Time: 3.157s
label PolicyGradient, Reward 49: 37.000, Len(game): 37, Training Time: 3.434s, Prediction Time: 3.182s
label PolicyGradient, Reward 50: 39.000, Len(game): 39, Training Time: 3.434s, Prediction Time: 3.209s
label PolicyGradient, Reward 51: 130.000, Len(game): 130, Training Time: 3.434s, Prediction Time: 3.295s
label PolicyGradient, Reward 52: 104.000, Len(game): 104, Training Time: 3.434s, Prediction Time: 3.364s
label PolicyGradient, Reward 53: 46.000, Len(game): 46, Training Time: 3.434s, Prediction Time: 3.397s
label PolicyGradient, Reward 54: 18.000, Len(game): 18, Training Time: 3.434s, Prediction Time: 3.409s
label PolicyGradient, Reward 55: 13.000, Len(game): 13, Training Time: 3.434s, Prediction Time: 3.417s
label PolicyGradient, Reward 56: 111.000, Len(game): 111, Training Time: 3.434s, Prediction Time: 3.495s
label PolicyGradient, Reward 57: 130.000, Len(game): 130, Training Time: 3.434s, Prediction Time: 3.582s
label PolicyGradient, Reward 58: 112.000, Len(game): 112, Training Time: 3.434s, Prediction Time: 3.658s
label PolicyGradient, Reward 59: 128.000, Len(game): 128, Training Time: 3.434s, Prediction Time: 3.745s
label PolicyGradient, Reward 60: 122.000, Len(game): 122, Training Time: 3.434s, Prediction Time: 3.825s
label PolicyGradient, Reward 61: 10.000, Len(game): 10, Training Time: 3.434s, Prediction Time: 3.832s
label PolicyGradient, Reward 62: 16.000, Len(game): 16, Training Time: 3.434s, Prediction Time: 3.842s
label PolicyGradient, Reward 63: 20.000, Len(game): 20, Training Time: 3.434s, Prediction Time: 3.855s
label PolicyGradient, Reward 64: 115.000, Len(game): 115, Training Time: 3.434s, Prediction Time: 3.932s
label PolicyGradient, Reward 65: 42.000, Len(game): 42, Training Time: 3.434s, Prediction Time: 3.960s
label PolicyGradient, Reward 66: 148.000, Len(game): 148, Training Time: 3.434s, Prediction Time: 4.060s
label PolicyGradient, Reward 67: 43.000, Len(game): 43, Training Time: 3.434s, Prediction Time: 4.089s
label PolicyGradient, Reward 68: 124.000, Len(game): 124, Training Time: 3.434s, Prediction Time: 4.173s
label PolicyGradient, Reward 69: 63.000, Len(game): 63, Training Time: 3.434s, Prediction Time: 4.214s
label PolicyGradient, Reward 70: 112.000, Len(game): 112, Training Time: 3.434s, Prediction Time: 4.290s
label PolicyGradient, Reward 71: 114.000, Len(game): 114, Training Time: 3.434s, Prediction Time: 4.366s
label PolicyGradient, Reward 72: 17.000, Len(game): 17, Training Time: 3.434s, Prediction Time: 4.378s
label PolicyGradient, Reward 73: 142.000, Len(game): 142, Training Time: 3.434s, Prediction Time: 4.477s
label PolicyGradient, Reward 74: 152.000, Len(game): 152, Training Time: 3.434s, Prediction Time: 4.578s
label PolicyGradient, Reward 75: 224.000, Len(game): 224, Training Time: 3.434s, Prediction Time: 4.730s
label PolicyGradient, Reward 76: 106.000, Len(game): 106, Training Time: 3.434s, Prediction Time: 4.800s
label PolicyGradient, Reward 77: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 4.810s
label PolicyGradient, Reward 78: 149.000, Len(game): 149, Training Time: 3.434s, Prediction Time: 4.908s
label PolicyGradient, Reward 79: 81.000, Len(game): 81, Training Time: 3.434s, Prediction Time: 4.962s
label PolicyGradient, Reward 80: 109.000, Len(game): 109, Training Time: 3.434s, Prediction Time: 5.035s
label PolicyGradient, Reward 81: 116.000, Len(game): 116, Training Time: 3.434s, Prediction Time: 5.114s
label PolicyGradient, Reward 82: 37.000, Len(game): 37, Training Time: 3.434s, Prediction Time: 5.139s
label PolicyGradient, Reward 83: 219.000, Len(game): 219, Training Time: 3.434s, Prediction Time: 5.286s
label PolicyGradient, Reward 84: 33.000, Len(game): 33, Training Time: 3.434s, Prediction Time: 5.307s
label PolicyGradient, Reward 85: 61.000, Len(game): 61, Training Time: 3.434s, Prediction Time: 5.348s
label PolicyGradient, Reward 86: 21.000, Len(game): 21, Training Time: 3.434s, Prediction Time: 5.362s
label PolicyGradient, Reward 87: 138.000, Len(game): 138, Training Time: 3.434s, Prediction Time: 5.455s
label PolicyGradient, Reward 88: 42.000, Len(game): 42, Training Time: 3.434s, Prediction Time: 5.482s
label PolicyGradient, Reward 89: 109.000, Len(game): 109, Training Time: 3.434s, Prediction Time: 5.555s
label PolicyGradient, Reward 90: 113.000, Len(game): 113, Training Time: 3.434s, Prediction Time: 5.630s
label PolicyGradient, Reward 91: 67.000, Len(game): 67, Training Time: 3.434s, Prediction Time: 5.674s
label PolicyGradient, Reward 92: 51.000, Len(game): 51, Training Time: 3.434s, Prediction Time: 5.708s
label PolicyGradient, Reward 93: 128.000, Len(game): 128, Training Time: 3.434s, Prediction Time: 5.794s
label PolicyGradient, Reward 94: 77.000, Len(game): 77, Training Time: 3.434s, Prediction Time: 5.844s
label PolicyGradient, Reward 95: 43.000, Len(game): 43, Training Time: 3.434s, Prediction Time: 5.873s
label PolicyGradient, Reward 96: 40.000, Len(game): 40, Training Time: 3.434s, Prediction Time: 5.899s
label PolicyGradient, Reward 97: 45.000, Len(game): 45, Training Time: 3.434s, Prediction Time: 5.930s
label PolicyGradient, Reward 98: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 5.941s
label PolicyGradient, Reward 99: 15.000, Len(game): 15, Training Time: 3.434s, Prediction Time: 5.950s
label Controller-based, Reward 0: 112.000, Len(game): 112, Training Time: 0.002s, Prediction Time: 0.001s
label Controller-based, Reward 1: 79.000, Len(game): 79, Training Time: 0.003s, Prediction Time: 0.001s
label Controller-based, Reward 2: 9.000, Len(game): 9, Training Time: 0.011s, Prediction Time: 0.001s
label Controller-based, Reward 3: 69.000, Len(game): 69, Training Time: 0.021s, Prediction Time: 0.003s
no training
label Controller-based, Reward 4: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.011s
no training
label Controller-based, Reward 5: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.020s
no training
label Controller-based, Reward 6: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.029s
no training
label Controller-based, Reward 7: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.036s
no training
label Controller-based, Reward 8: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.046s
no training
label Controller-based, Reward 9: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.054s
no training
label Controller-based, Reward 10: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.063s
no training
label Controller-based, Reward 11: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.071s
no training
label Controller-based, Reward 12: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.079s
no training
label Controller-based, Reward 13: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.085s
no training
label Controller-based, Reward 14: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.095s
no training
label Controller-based, Reward 15: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.103s
no training
label Controller-based, Reward 16: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.112s
no training
label Controller-based, Reward 17: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.120s
no training
label Controller-based, Reward 18: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.129s
no training
label Controller-based, Reward 19: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.137s
no training
label Controller-based, Reward 20: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.146s
no training
label Controller-based, Reward 21: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.153s
no training
label Controller-based, Reward 22: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.162s
no training
label Controller-based, Reward 23: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.171s
no training
label Controller-based, Reward 24: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.179s
no training
label Controller-based, Reward 25: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.187s
no training
label Controller-based, Reward 26: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.195s
no training
label Controller-based, Reward 27: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.203s
no training
label Controller-based, Reward 28: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.212s
no training
label Controller-based, Reward 29: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.220s
no training
label Controller-based, Reward 30: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.228s
no training
label Controller-based, Reward 31: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.237s
no training
label Controller-based, Reward 32: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.245s
no training
label Controller-based, Reward 33: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.253s
no training
label Controller-based, Reward 34: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.261s
no training
label Controller-based, Reward 35: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.270s
no training
label Controller-based, Reward 36: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.278s
no training
label Controller-based, Reward 37: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.286s
no training
label Controller-based, Reward 38: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.295s
no training
label Controller-based, Reward 39: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.303s
no training
label Controller-based, Reward 40: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.312s
no training
label Controller-based, Reward 41: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.320s
no training
label Controller-based, Reward 42: 1000.000, Len(game): 1000, Training Time: 0.021s, Prediction Time: 0.328s
no training
label Controller-based, Reward 43: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.336s
no training
label Controller-based, Reward 44: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.344s
no training
label Controller-based, Reward 45: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.352s
no training
label Controller-based, Reward 46: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.360s
no training
label Controller-based, Reward 47: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.369s
no training
label Controller-based, Reward 48: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.377s
no training
label Controller-based, Reward 49: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.385s
no training
label Controller-based, Reward 50: 1000.000, Len(game): 1000, Training Time: 0.022s, Prediction Time: 0.393s
no training
label Controller-based, Reward 51: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.401s
no training
label Controller-based, Reward 52: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.409s
no training
label Controller-based, Reward 53: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.418s
no training
label Controller-based, Reward 54: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.425s
no training
label Controller-based, Reward 55: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.433s
no training
label Controller-based, Reward 56: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.442s
no training
label Controller-based, Reward 57: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.450s
no training
label Controller-based, Reward 58: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.458s
no training
label Controller-based, Reward 59: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.466s
no training
label Controller-based, Reward 60: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.475s
no training
label Controller-based, Reward 61: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.483s
no training
label Controller-based, Reward 62: 1000.000, Len(game): 1000, Training Time: 0.023s, Prediction Time: 0.491s
no training
label Controller-based, Reward 63: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.499s
no training
label Controller-based, Reward 64: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.507s
no training
label Controller-based, Reward 65: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.516s
no training
label Controller-based, Reward 66: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.524s
no training
label Controller-based, Reward 67: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.532s
no training
label Controller-based, Reward 68: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.541s
no training
label Controller-based, Reward 69: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.549s
no training
label Controller-based, Reward 70: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.557s
no training
label Controller-based, Reward 71: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.566s
no training
label Controller-based, Reward 72: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.574s
no training
label Controller-based, Reward 73: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.582s
no training
label Controller-based, Reward 74: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.591s
no training
label Controller-based, Reward 75: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.598s
no training
label Controller-based, Reward 76: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.607s
no training
label Controller-based, Reward 77: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.615s
no training
label Controller-based, Reward 78: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.623s
no training
label Controller-based, Reward 79: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.632s
no training
label Controller-based, Reward 80: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.640s
no training
label Controller-based, Reward 81: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.648s
no training
label Controller-based, Reward 82: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.656s
no training
label Controller-based, Reward 83: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.664s
no training
label Controller-based, Reward 84: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.672s
no training
label Controller-based, Reward 85: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.681s
no training
label Controller-based, Reward 86: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.689s
no training
label Controller-based, Reward 87: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.697s
no training
label Controller-based, Reward 88: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.705s
no training
label Controller-based, Reward 89: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.714s
no training
label Controller-based, Reward 90: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.723s
no training
label Controller-based, Reward 91: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.732s
no training
label Controller-based, Reward 92: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.740s
no training
label Controller-based, Reward 93: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.749s
no training
label Controller-based, Reward 94: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.758s
no training
label Controller-based, Reward 95: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.766s
no training
label Controller-based, Reward 96: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.775s
no training
label Controller-based, Reward 97: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.783s
no training
label Controller-based, Reward 98: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.790s
no training
label Controller-based, Reward 99: 1000.000, Len(game): 1000, Training Time: 0.024s, Prediction Time: 0.799s
label KACAgent, Reward 0: 20.000, Len(game): 20, Training Time: 0.004s, Prediction Time: 0.001s
label KACAgent, Reward 1: 40.000, Len(game): 40, Training Time: 0.009s, Prediction Time: 0.007s
label KACAgent, Reward 2: 15.000, Len(game): 15, Training Time: 0.015s, Prediction Time: 0.010s
label KACAgent, Reward 3: 26.000, Len(game): 26, Training Time: 0.022s, Prediction Time: 0.016s
label KACAgent, Reward 4: 113.000, Len(game): 113, Training Time: 0.037s, Prediction Time: 0.038s
label KACAgent, Reward 5: 97.000, Len(game): 97, Training Time: 0.060s, Prediction Time: 0.061s
label KACAgent, Reward 6: 101.000, Len(game): 101, Training Time: 0.097s, Prediction Time: 0.093s
label KACAgent, Reward 7: 388.000, Len(game): 388, Training Time: 0.207s, Prediction Time: 0.219s
label KACAgent, Reward 8: 284.000, Len(game): 284, Training Time: 0.385s, Prediction Time: 0.376s
label KACAgent, Reward 9: 71.000, Len(game): 71, Training Time: 0.574s, Prediction Time: 0.489s
label KACAgent, Reward 10: 130.000, Len(game): 130, Training Time: 0.809s, Prediction Time: 0.644s
label KACAgent, Reward 11: 118.000, Len(game): 118, Training Time: 1.099s, Prediction Time: 0.830s
label KACAgent, Reward 12: 262.000, Len(game): 262, Training Time: 1.471s, Prediction Time: 1.125s
label KACAgent, Reward 13: 197.000, Len(game): 197, Training Time: 1.927s, Prediction Time: 1.463s
label KACAgent, Reward 14: 217.000, Len(game): 217, Training Time: 2.488s, Prediction Time: 1.863s
label KACAgent, Reward 15: 219.000, Len(game): 219, Training Time: 3.157s, Prediction Time: 2.340s
label KACAgent, Reward 16: 299.000, Len(game): 299, Training Time: 3.157s, Prediction Time: 2.966s
label KACAgent, Reward 17: 194.000, Len(game): 194, Training Time: 3.157s, Prediction Time: 3.123s
label KACAgent, Reward 18: 637.000, Len(game): 637, Training Time: 3.157s, Prediction Time: 3.638s
label KACAgent, Reward 19: 191.000, Len(game): 191, Training Time: 3.157s, Prediction Time: 3.793s
label KACAgent, Reward 20: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 4.595s
label KACAgent, Reward 21: 161.000, Len(game): 161, Training Time: 3.157s, Prediction Time: 4.727s
label KACAgent, Reward 22: 335.000, Len(game): 335, Training Time: 3.157s, Prediction Time: 4.997s
label KACAgent, Reward 23: 557.000, Len(game): 557, Training Time: 3.157s, Prediction Time: 5.442s
label KACAgent, Reward 24: 414.000, Len(game): 414, Training Time: 3.157s, Prediction Time: 5.776s
label KACAgent, Reward 25: 194.000, Len(game): 194, Training Time: 3.157s, Prediction Time: 5.932s
label KACAgent, Reward 26: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 6.739s
label KACAgent, Reward 27: 254.000, Len(game): 254, Training Time: 3.157s, Prediction Time: 6.943s
label KACAgent, Reward 28: 204.000, Len(game): 204, Training Time: 3.157s, Prediction Time: 7.108s
label KACAgent, Reward 29: 223.000, Len(game): 223, Training Time: 3.157s, Prediction Time: 7.285s
label KACAgent, Reward 30: 234.000, Len(game): 234, Training Time: 3.157s, Prediction Time: 7.475s
label KACAgent, Reward 31: 167.000, Len(game): 167, Training Time: 3.157s, Prediction Time: 7.612s
label KACAgent, Reward 32: 316.000, Len(game): 316, Training Time: 3.157s, Prediction Time: 7.864s
label KACAgent, Reward 33: 453.000, Len(game): 453, Training Time: 3.157s, Prediction Time: 8.228s
label KACAgent, Reward 34: 146.000, Len(game): 146, Training Time: 3.157s, Prediction Time: 8.344s
label KACAgent, Reward 35: 199.000, Len(game): 199, Training Time: 3.157s, Prediction Time: 8.503s
label KACAgent, Reward 36: 193.000, Len(game): 193, Training Time: 3.157s, Prediction Time: 8.658s
label KACAgent, Reward 37: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 9.461s
label KACAgent, Reward 38: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 10.260s
label KACAgent, Reward 39: 500.000, Len(game): 500, Training Time: 3.157s, Prediction Time: 10.659s
label KACAgent, Reward 40: 211.000, Len(game): 211, Training Time: 3.157s, Prediction Time: 10.827s
label KACAgent, Reward 41: 216.000, Len(game): 216, Training Time: 3.157s, Prediction Time: 11.004s
label KACAgent, Reward 42: 225.000, Len(game): 225, Training Time: 3.157s, Prediction Time: 11.185s
label KACAgent, Reward 43: 364.000, Len(game): 364, Training Time: 3.157s, Prediction Time: 11.476s
label KACAgent, Reward 44: 152.000, Len(game): 152, Training Time: 3.157s, Prediction Time: 11.597s
label KACAgent, Reward 45: 135.000, Len(game): 135, Training Time: 3.157s, Prediction Time: 11.703s
label KACAgent, Reward 46: 367.000, Len(game): 367, Training Time: 3.157s, Prediction Time: 11.999s
label KACAgent, Reward 47: 232.000, Len(game): 232, Training Time: 3.157s, Prediction Time: 12.187s
label KACAgent, Reward 48: 252.000, Len(game): 252, Training Time: 3.157s, Prediction Time: 12.390s
label KACAgent, Reward 49: 234.000, Len(game): 234, Training Time: 3.157s, Prediction Time: 12.576s
label KACAgent, Reward 50: 311.000, Len(game): 311, Training Time: 3.157s, Prediction Time: 12.826s
label KACAgent, Reward 51: 174.000, Len(game): 174, Training Time: 3.157s, Prediction Time: 12.968s
label KACAgent, Reward 52: 251.000, Len(game): 251, Training Time: 3.157s, Prediction Time: 13.171s
label KACAgent, Reward 53: 212.000, Len(game): 212, Training Time: 3.157s, Prediction Time: 13.341s
label KACAgent, Reward 54: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 14.142s
label KACAgent, Reward 55: 343.000, Len(game): 343, Training Time: 3.157s, Prediction Time: 14.415s
label KACAgent, Reward 56: 476.000, Len(game): 476, Training Time: 3.157s, Prediction Time: 14.795s
label KACAgent, Reward 57: 195.000, Len(game): 195, Training Time: 3.157s, Prediction Time: 14.949s
label KACAgent, Reward 58: 177.000, Len(game): 177, Training Time: 3.157s, Prediction Time: 15.091s
label KACAgent, Reward 59: 207.000, Len(game): 207, Training Time: 3.157s, Prediction Time: 15.256s
label KACAgent, Reward 60: 135.000, Len(game): 135, Training Time: 3.157s, Prediction Time: 15.364s
label KACAgent, Reward 61: 185.000, Len(game): 185, Training Time: 3.157s, Prediction Time: 15.511s
label KACAgent, Reward 62: 201.000, Len(game): 201, Training Time: 3.157s, Prediction Time: 15.672s
label KACAgent, Reward 63: 344.000, Len(game): 344, Training Time: 3.157s, Prediction Time: 15.947s
label KACAgent, Reward 64: 197.000, Len(game): 197, Training Time: 3.157s, Prediction Time: 16.103s
label KACAgent, Reward 65: 265.000, Len(game): 265, Training Time: 3.157s, Prediction Time: 16.320s
label KACAgent, Reward 66: 168.000, Len(game): 168, Training Time: 3.157s, Prediction Time: 16.455s
label KACAgent, Reward 67: 166.000, Len(game): 166, Training Time: 3.157s, Prediction Time: 16.587s
label KACAgent, Reward 68: 191.000, Len(game): 191, Training Time: 3.157s, Prediction Time: 16.742s
label KACAgent, Reward 69: 257.000, Len(game): 257, Training Time: 3.157s, Prediction Time: 16.951s
label KACAgent, Reward 70: 221.000, Len(game): 221, Training Time: 3.157s, Prediction Time: 17.127s
label KACAgent, Reward 71: 161.000, Len(game): 161, Training Time: 3.157s, Prediction Time: 17.254s
label KACAgent, Reward 72: 1000.000, Len(game): 1000, Training Time: 3.157s, Prediction Time: 18.057s
label KACAgent, Reward 73: 315.000, Len(game): 315, Training Time: 3.157s, Prediction Time: 18.309s
label KACAgent, Reward 74: 229.000, Len(game): 229, Training Time: 3.157s, Prediction Time: 18.493s
label KACAgent, Reward 75: 830.000, Len(game): 830, Training Time: 3.157s, Prediction Time: 19.158s
label KACAgent, Reward 76: 206.000, Len(game): 206, Training Time: 3.157s, Prediction Time: 19.324s
label KACAgent, Reward 77: 353.000, Len(game): 353, Training Time: 3.157s, Prediction Time: 19.610s
label KACAgent, Reward 78: 184.000, Len(game): 184, Training Time: 3.157s, Prediction Time: 19.765s
label KACAgent, Reward 79: 186.000, Len(game): 186, Training Time: 3.157s, Prediction Time: 19.912s
label KACAgent, Reward 80: 387.000, Len(game): 387, Training Time: 3.157s, Prediction Time: 20.222s
label KACAgent, Reward 81: 189.000, Len(game): 189, Training Time: 3.157s, Prediction Time: 20.371s
label KACAgent, Reward 82: 185.000, Len(game): 185, Training Time: 3.157s, Prediction Time: 20.520s
label KACAgent, Reward 83: 328.000, Len(game): 328, Training Time: 3.157s, Prediction Time: 20.782s
label KACAgent, Reward 84: 353.000, Len(game): 353, Training Time: 3.157s, Prediction Time: 21.065s
label KACAgent, Reward 85: 264.000, Len(game): 264, Training Time: 3.157s, Prediction Time: 21.276s
label KACAgent, Reward 86: 221.000, Len(game): 221, Training Time: 3.157s, Prediction Time: 21.453s
label KACAgent, Reward 87: 180.000, Len(game): 180, Training Time: 3.157s, Prediction Time: 21.596s
label KACAgent, Reward 88: 310.000, Len(game): 310, Training Time: 3.157s, Prediction Time: 21.846s
label KACAgent, Reward 89: 365.000, Len(game): 365, Training Time: 3.157s, Prediction Time: 22.138s
label KACAgent, Reward 90: 248.000, Len(game): 248, Training Time: 3.157s, Prediction Time: 22.336s
label KACAgent, Reward 91: 688.000, Len(game): 688, Training Time: 3.157s, Prediction Time: 22.885s
label KACAgent, Reward 92: 162.000, Len(game): 162, Training Time: 3.157s, Prediction Time: 23.014s
label KACAgent, Reward 93: 173.000, Len(game): 173, Training Time: 3.157s, Prediction Time: 23.153s
label KACAgent, Reward 94: 165.000, Len(game): 165, Training Time: 3.157s, Prediction Time: 23.284s
label KACAgent, Reward 95: 226.000, Len(game): 226, Training Time: 3.157s, Prediction Time: 23.464s
label KACAgent, Reward 96: 200.000, Len(game): 200, Training Time: 3.157s, Prediction Time: 23.624s
label KACAgent, Reward 97: 334.000, Len(game): 334, Training Time: 3.157s, Prediction Time: 23.894s
label KACAgent, Reward 98: 310.000, Len(game): 310, Training Time: 3.157s, Prediction Time: 24.143s
label KACAgent, Reward 99: 364.000, Len(game): 364, Training Time: 3.157s, Prediction Time: 24.436s
label DQNAgent, Reward 0: 33.000, Len(game): 33, Training Time: 0.000s, Prediction Time: 0.000s
label DQNAgent, Reward 1: 17.000, Len(game): 17, Training Time: 0.000s, Prediction Time: 0.000s
label DQNAgent, Reward 2: 36.000, Len(game): 36, Training Time: 0.026s, Prediction Time: 0.001s
label DQNAgent, Reward 3: 11.000, Len(game): 11, Training Time: 0.038s, Prediction Time: 0.002s
label DQNAgent, Reward 4: 30.000, Len(game): 30, Training Time: 0.076s, Prediction Time: 0.003s
label DQNAgent, Reward 5: 28.000, Len(game): 28, Training Time: 0.107s, Prediction Time: 0.004s
label DQNAgent, Reward 6: 17.000, Len(game): 17, Training Time: 0.128s, Prediction Time: 0.005s
label DQNAgent, Reward 7: 20.000, Len(game): 20, Training Time: 0.154s, Prediction Time: 0.006s
label DQNAgent, Reward 8: 34.000, Len(game): 34, Training Time: 0.192s, Prediction Time: 0.006s
label DQNAgent, Reward 9: 17.000, Len(game): 17, Training Time: 0.210s, Prediction Time: 0.007s
label DQNAgent, Reward 10: 14.000, Len(game): 14, Training Time: 0.225s, Prediction Time: 0.008s
label DQNAgent, Reward 11: 14.000, Len(game): 14, Training Time: 0.241s, Prediction Time: 0.008s
label DQNAgent, Reward 12: 17.000, Len(game): 17, Training Time: 0.260s, Prediction Time: 0.009s
label DQNAgent, Reward 13: 31.000, Len(game): 31, Training Time: 0.294s, Prediction Time: 0.010s
label DQNAgent, Reward 14: 11.000, Len(game): 11, Training Time: 0.307s, Prediction Time: 0.010s
label DQNAgent, Reward 15: 10.000, Len(game): 10, Training Time: 0.319s, Prediction Time: 0.010s
label DQNAgent, Reward 16: 21.000, Len(game): 21, Training Time: 0.343s, Prediction Time: 0.010s
label DQNAgent, Reward 17: 27.000, Len(game): 27, Training Time: 0.373s, Prediction Time: 0.011s
label DQNAgent, Reward 18: 25.000, Len(game): 25, Training Time: 0.401s, Prediction Time: 0.012s
label DQNAgent, Reward 19: 14.000, Len(game): 14, Training Time: 0.417s, Prediction Time: 0.012s
label DQNAgent, Reward 20: 15.000, Len(game): 15, Training Time: 0.434s, Prediction Time: 0.012s
label DQNAgent, Reward 21: 17.000, Len(game): 17, Training Time: 0.452s, Prediction Time: 0.013s
label DQNAgent, Reward 22: 14.000, Len(game): 14, Training Time: 0.468s, Prediction Time: 0.014s
label DQNAgent, Reward 23: 29.000, Len(game): 29, Training Time: 0.502s, Prediction Time: 0.015s
label DQNAgent, Reward 24: 28.000, Len(game): 28, Training Time: 0.534s, Prediction Time: 0.016s
label DQNAgent, Reward 25: 78.000, Len(game): 78, Training Time: 0.630s, Prediction Time: 0.018s
label DQNAgent, Reward 26: 21.000, Len(game): 21, Training Time: 0.654s, Prediction Time: 0.018s
label DQNAgent, Reward 27: 23.000, Len(game): 23, Training Time: 0.681s, Prediction Time: 0.019s
label DQNAgent, Reward 28: 41.000, Len(game): 41, Training Time: 0.727s, Prediction Time: 0.020s
label DQNAgent, Reward 29: 40.000, Len(game): 40, Training Time: 0.773s, Prediction Time: 0.021s
label DQNAgent, Reward 30: 54.000, Len(game): 54, Training Time: 0.835s, Prediction Time: 0.022s
label DQNAgent, Reward 31: 65.000, Len(game): 65, Training Time: 0.908s, Prediction Time: 0.024s
label DQNAgent, Reward 32: 86.000, Len(game): 86, Training Time: 1.010s, Prediction Time: 0.026s
label DQNAgent, Reward 33: 27.000, Len(game): 27, Training Time: 1.039s, Prediction Time: 0.027s
label DQNAgent, Reward 34: 166.000, Len(game): 166, Training Time: 1.230s, Prediction Time: 0.032s
label DQNAgent, Reward 35: 48.000, Len(game): 48, Training Time: 1.287s, Prediction Time: 0.034s
label DQNAgent, Reward 36: 135.000, Len(game): 135, Training Time: 1.457s, Prediction Time: 0.040s
label DQNAgent, Reward 37: 118.000, Len(game): 118, Training Time: 1.607s, Prediction Time: 0.045s
label DQNAgent, Reward 38: 159.000, Len(game): 159, Training Time: 1.790s, Prediction Time: 0.053s
label DQNAgent, Reward 39: 222.000, Len(game): 222, Training Time: 2.040s, Prediction Time: 0.061s
label DQNAgent, Reward 40: 219.000, Len(game): 219, Training Time: 2.294s, Prediction Time: 0.070s
label DQNAgent, Reward 41: 180.000, Len(game): 180, Training Time: 2.503s, Prediction Time: 0.077s
label DQNAgent, Reward 42: 178.000, Len(game): 178, Training Time: 2.706s, Prediction Time: 0.084s
label DQNAgent, Reward 43: 176.000, Len(game): 176, Training Time: 2.900s, Prediction Time: 0.092s
label DQNAgent, Reward 44: 247.000, Len(game): 247, Training Time: 3.171s, Prediction Time: 0.102s
label DQNAgent, Reward 45: 195.000, Len(game): 195, Training Time: 3.171s, Prediction Time: 0.111s
label DQNAgent, Reward 46: 161.000, Len(game): 161, Training Time: 3.171s, Prediction Time: 0.118s
label DQNAgent, Reward 47: 205.000, Len(game): 205, Training Time: 3.171s, Prediction Time: 0.128s
label DQNAgent, Reward 48: 232.000, Len(game): 232, Training Time: 3.171s, Prediction Time: 0.139s
label DQNAgent, Reward 49: 236.000, Len(game): 236, Training Time: 3.171s, Prediction Time: 0.149s
label DQNAgent, Reward 50: 216.000, Len(game): 216, Training Time: 3.171s, Prediction Time: 0.158s
label DQNAgent, Reward 51: 178.000, Len(game): 178, Training Time: 3.171s, Prediction Time: 0.166s
label DQNAgent, Reward 52: 188.000, Len(game): 188, Training Time: 3.171s, Prediction Time: 0.175s
label DQNAgent, Reward 53: 205.000, Len(game): 205, Training Time: 3.171s, Prediction Time: 0.183s
label DQNAgent, Reward 54: 239.000, Len(game): 239, Training Time: 3.171s, Prediction Time: 0.193s
label DQNAgent, Reward 55: 235.000, Len(game): 235, Training Time: 3.171s, Prediction Time: 0.204s
label DQNAgent, Reward 56: 160.000, Len(game): 160, Training Time: 3.171s, Prediction Time: 0.212s
label DQNAgent, Reward 57: 173.000, Len(game): 173, Training Time: 3.171s, Prediction Time: 0.219s
label DQNAgent, Reward 58: 196.000, Len(game): 196, Training Time: 3.171s, Prediction Time: 0.227s
label DQNAgent, Reward 59: 171.000, Len(game): 171, Training Time: 3.171s, Prediction Time: 0.234s
label DQNAgent, Reward 60: 168.000, Len(game): 168, Training Time: 3.171s, Prediction Time: 0.241s
label DQNAgent, Reward 61: 223.000, Len(game): 223, Training Time: 3.171s, Prediction Time: 0.250s
label DQNAgent, Reward 62: 215.000, Len(game): 215, Training Time: 3.171s, Prediction Time: 0.260s
label DQNAgent, Reward 63: 182.000, Len(game): 182, Training Time: 3.171s, Prediction Time: 0.267s
label DQNAgent, Reward 64: 171.000, Len(game): 171, Training Time: 3.171s, Prediction Time: 0.274s
label DQNAgent, Reward 65: 268.000, Len(game): 268, Training Time: 3.171s, Prediction Time: 0.285s
label DQNAgent, Reward 66: 244.000, Len(game): 244, Training Time: 3.171s, Prediction Time: 0.295s
label DQNAgent, Reward 67: 162.000, Len(game): 162, Training Time: 3.171s, Prediction Time: 0.301s
label DQNAgent, Reward 68: 184.000, Len(game): 184, Training Time: 3.171s, Prediction Time: 0.308s
label DQNAgent, Reward 69: 228.000, Len(game): 228, Training Time: 3.171s, Prediction Time: 0.318s
label DQNAgent, Reward 70: 173.000, Len(game): 173, Training Time: 3.171s, Prediction Time: 0.325s
label DQNAgent, Reward 71: 161.000, Len(game): 161, Training Time: 3.171s, Prediction Time: 0.332s
label DQNAgent, Reward 72: 300.000, Len(game): 300, Training Time: 3.171s, Prediction Time: 0.344s
label DQNAgent, Reward 73: 228.000, Len(game): 228, Training Time: 3.171s, Prediction Time: 0.353s
label DQNAgent, Reward 74: 172.000, Len(game): 172, Training Time: 3.171s, Prediction Time: 0.360s
label DQNAgent, Reward 75: 260.000, Len(game): 260, Training Time: 3.171s, Prediction Time: 0.371s
label DQNAgent, Reward 76: 214.000, Len(game): 214, Training Time: 3.171s, Prediction Time: 0.379s
label DQNAgent, Reward 77: 251.000, Len(game): 251, Training Time: 3.171s, Prediction Time: 0.389s
label DQNAgent, Reward 78: 186.000, Len(game): 186, Training Time: 3.171s, Prediction Time: 0.397s
label DQNAgent, Reward 79: 243.000, Len(game): 243, Training Time: 3.171s, Prediction Time: 0.405s
label DQNAgent, Reward 80: 226.000, Len(game): 226, Training Time: 3.171s, Prediction Time: 0.415s
label DQNAgent, Reward 81: 240.000, Len(game): 240, Training Time: 3.171s, Prediction Time: 0.426s
label DQNAgent, Reward 82: 184.000, Len(game): 184, Training Time: 3.171s, Prediction Time: 0.433s
label DQNAgent, Reward 83: 240.000, Len(game): 240, Training Time: 3.171s, Prediction Time: 0.443s
label DQNAgent, Reward 84: 211.000, Len(game): 211, Training Time: 3.171s, Prediction Time: 0.451s
label DQNAgent, Reward 85: 255.000, Len(game): 255, Training Time: 3.171s, Prediction Time: 0.461s
label DQNAgent, Reward 86: 283.000, Len(game): 283, Training Time: 3.171s, Prediction Time: 0.473s
label DQNAgent, Reward 87: 189.000, Len(game): 189, Training Time: 3.171s, Prediction Time: 0.480s
label DQNAgent, Reward 88: 214.000, Len(game): 214, Training Time: 3.171s, Prediction Time: 0.489s
label DQNAgent, Reward 89: 282.000, Len(game): 282, Training Time: 3.171s, Prediction Time: 0.501s
label DQNAgent, Reward 90: 214.000, Len(game): 214, Training Time: 3.171s, Prediction Time: 0.509s
label DQNAgent, Reward 91: 164.000, Len(game): 164, Training Time: 3.171s, Prediction Time: 0.517s
label DQNAgent, Reward 92: 235.000, Len(game): 235, Training Time: 3.171s, Prediction Time: 0.525s
label DQNAgent, Reward 93: 191.000, Len(game): 191, Training Time: 3.171s, Prediction Time: 0.534s
label DQNAgent, Reward 94: 214.000, Len(game): 214, Training Time: 3.171s, Prediction Time: 0.543s
label DQNAgent, Reward 95: 229.000, Len(game): 229, Training Time: 3.171s, Prediction Time: 0.552s
label DQNAgent, Reward 96: 199.000, Len(game): 199, Training Time: 3.171s, Prediction Time: 0.560s
label DQNAgent, Reward 97: 190.000, Len(game): 190, Training Time: 3.171s, Prediction Time: 0.568s
label DQNAgent, Reward 98: 289.000, Len(game): 289, Training Time: 3.171s, Prediction Time: 0.579s
label DQNAgent, Reward 99: 212.000, Len(game): 212, Training Time: 3.171s, Prediction Time: 0.588s
Computed global error Bellman mean:  1.1717717636598608e-07  iter:  3
label KQLearningHJBCP, Reward 0: 22.000, Len(game): 22, Training Time: 0.009s, Prediction Time: 0.000s
Computed global error Bellman mean:  1.1552543799808745e-07  iter:  6
label KQLearningHJBCP, Reward 1: 78.000, Len(game): 78, Training Time: 0.059s, Prediction Time: 0.011s
Computed global error Bellman mean:  3.0488343749310424e-07  iter:  8
label KQLearningHJBCP, Reward 2: 71.000, Len(game): 71, Training Time: 0.203s, Prediction Time: 0.023s
Computed global error Bellman mean:  2.2164816109864766e-07  iter:  5
label KQLearningHJBCP, Reward 3: 82.000, Len(game): 82, Training Time: 0.392s, Prediction Time: 0.042s
Computed global error Bellman mean:  2.685937866324866e-07  iter:  5
label KQLearningHJBCP, Reward 4: 76.000, Len(game): 76, Training Time: 0.717s, Prediction Time: 0.063s
Computed global error Bellman mean:  2.2103691660173973e-07  iter:  5
label KQLearningHJBCP, Reward 5: 83.000, Len(game): 83, Training Time: 1.269s, Prediction Time: 0.088s
Computed global error Bellman mean:  2.3048877059193143e-07  iter:  6
label KQLearningHJBCP, Reward 6: 195.000, Len(game): 195, Training Time: 2.550s, Prediction Time: 0.156s
Computed global error Bellman mean:  0.10596339659857741  iter:  10
label KQLearningHJBCP, Reward 7: 217.000, Len(game): 217, Training Time: 5.986s, Prediction Time: 0.239s
label KQLearningHJBCP, Reward 8: 149.000, Len(game): 149, Training Time: 5.986s, Prediction Time: 0.312s
label KQLearningHJBCP, Reward 9: 93.000, Len(game): 93, Training Time: 5.986s, Prediction Time: 0.358s
label KQLearningHJBCP, Reward 10: 92.000, Len(game): 92, Training Time: 5.986s, Prediction Time: 0.400s
label KQLearningHJBCP, Reward 11: 96.000, Len(game): 96, Training Time: 5.986s, Prediction Time: 0.448s
label KQLearningHJBCP, Reward 12: 123.000, Len(game): 123, Training Time: 5.986s, Prediction Time: 0.510s
label KQLearningHJBCP, Reward 13: 116.000, Len(game): 116, Training Time: 5.986s, Prediction Time: 0.566s
label KQLearningHJBCP, Reward 14: 92.000, Len(game): 92, Training Time: 5.986s, Prediction Time: 0.612s
label KQLearningHJBCP, Reward 15: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 0.662s
label KQLearningHJBCP, Reward 16: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 0.710s
label KQLearningHJBCP, Reward 17: 114.000, Len(game): 114, Training Time: 5.986s, Prediction Time: 0.769s
label KQLearningHJBCP, Reward 18: 154.000, Len(game): 154, Training Time: 5.986s, Prediction Time: 0.847s
label KQLearningHJBCP, Reward 19: 130.000, Len(game): 130, Training Time: 5.986s, Prediction Time: 0.911s
label KQLearningHJBCP, Reward 20: 99.000, Len(game): 99, Training Time: 5.986s, Prediction Time: 0.959s
label KQLearningHJBCP, Reward 21: 105.000, Len(game): 105, Training Time: 5.986s, Prediction Time: 1.014s
label KQLearningHJBCP, Reward 22: 95.000, Len(game): 95, Training Time: 5.986s, Prediction Time: 1.059s
label KQLearningHJBCP, Reward 23: 130.000, Len(game): 130, Training Time: 5.986s, Prediction Time: 1.123s
label KQLearningHJBCP, Reward 24: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 1.174s
label KQLearningHJBCP, Reward 25: 101.000, Len(game): 101, Training Time: 5.986s, Prediction Time: 1.228s
label KQLearningHJBCP, Reward 26: 68.000, Len(game): 68, Training Time: 5.986s, Prediction Time: 1.261s
label KQLearningHJBCP, Reward 27: 108.000, Len(game): 108, Training Time: 5.986s, Prediction Time: 1.315s
label KQLearningHJBCP, Reward 28: 144.000, Len(game): 144, Training Time: 5.986s, Prediction Time: 1.386s
label KQLearningHJBCP, Reward 29: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 1.432s
label KQLearningHJBCP, Reward 30: 99.000, Len(game): 99, Training Time: 5.986s, Prediction Time: 1.481s
label KQLearningHJBCP, Reward 31: 122.000, Len(game): 122, Training Time: 5.986s, Prediction Time: 1.541s
label KQLearningHJBCP, Reward 32: 136.000, Len(game): 136, Training Time: 5.986s, Prediction Time: 1.609s
label KQLearningHJBCP, Reward 33: 100.000, Len(game): 100, Training Time: 5.986s, Prediction Time: 1.659s
label KQLearningHJBCP, Reward 34: 126.000, Len(game): 126, Training Time: 5.986s, Prediction Time: 1.721s
label KQLearningHJBCP, Reward 35: 78.000, Len(game): 78, Training Time: 5.986s, Prediction Time: 1.761s
label KQLearningHJBCP, Reward 36: 100.000, Len(game): 100, Training Time: 5.986s, Prediction Time: 1.813s
label KQLearningHJBCP, Reward 37: 101.000, Len(game): 101, Training Time: 5.986s, Prediction Time: 1.863s
label KQLearningHJBCP, Reward 38: 176.000, Len(game): 176, Training Time: 5.986s, Prediction Time: 1.950s
label KQLearningHJBCP, Reward 39: 105.000, Len(game): 105, Training Time: 5.986s, Prediction Time: 2.003s
label KQLearningHJBCP, Reward 40: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 2.053s
label KQLearningHJBCP, Reward 41: 127.000, Len(game): 127, Training Time: 5.986s, Prediction Time: 2.116s
label KQLearningHJBCP, Reward 42: 148.000, Len(game): 148, Training Time: 5.986s, Prediction Time: 2.191s
label KQLearningHJBCP, Reward 43: 99.000, Len(game): 99, Training Time: 5.986s, Prediction Time: 2.240s
label KQLearningHJBCP, Reward 44: 106.000, Len(game): 106, Training Time: 5.986s, Prediction Time: 2.293s
label KQLearningHJBCP, Reward 45: 164.000, Len(game): 164, Training Time: 5.986s, Prediction Time: 2.375s
label KQLearningHJBCP, Reward 46: 127.000, Len(game): 127, Training Time: 5.986s, Prediction Time: 2.439s
label KQLearningHJBCP, Reward 47: 97.000, Len(game): 97, Training Time: 5.986s, Prediction Time: 2.486s
label KQLearningHJBCP, Reward 48: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 2.532s
label KQLearningHJBCP, Reward 49: 108.000, Len(game): 108, Training Time: 5.986s, Prediction Time: 2.585s
label KQLearningHJBCP, Reward 50: 119.000, Len(game): 119, Training Time: 5.986s, Prediction Time: 2.645s
label KQLearningHJBCP, Reward 51: 89.000, Len(game): 89, Training Time: 5.986s, Prediction Time: 2.689s
label KQLearningHJBCP, Reward 52: 117.000, Len(game): 117, Training Time: 5.986s, Prediction Time: 2.747s
label KQLearningHJBCP, Reward 53: 97.000, Len(game): 97, Training Time: 5.986s, Prediction Time: 2.794s
label KQLearningHJBCP, Reward 54: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 2.843s
label KQLearningHJBCP, Reward 55: 148.000, Len(game): 148, Training Time: 5.986s, Prediction Time: 2.917s
label KQLearningHJBCP, Reward 56: 101.000, Len(game): 101, Training Time: 5.986s, Prediction Time: 2.968s
label KQLearningHJBCP, Reward 57: 90.000, Len(game): 90, Training Time: 5.986s, Prediction Time: 3.012s
label KQLearningHJBCP, Reward 58: 90.000, Len(game): 90, Training Time: 5.986s, Prediction Time: 3.056s
label KQLearningHJBCP, Reward 59: 110.000, Len(game): 110, Training Time: 5.986s, Prediction Time: 3.111s
label KQLearningHJBCP, Reward 60: 166.000, Len(game): 166, Training Time: 5.986s, Prediction Time: 3.193s
label KQLearningHJBCP, Reward 61: 105.000, Len(game): 105, Training Time: 5.986s, Prediction Time: 3.246s
label KQLearningHJBCP, Reward 62: 124.000, Len(game): 124, Training Time: 5.986s, Prediction Time: 3.310s
label KQLearningHJBCP, Reward 63: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 3.359s
label KQLearningHJBCP, Reward 64: 93.000, Len(game): 93, Training Time: 5.986s, Prediction Time: 3.406s
label KQLearningHJBCP, Reward 65: 122.000, Len(game): 122, Training Time: 5.986s, Prediction Time: 3.465s
label KQLearningHJBCP, Reward 66: 105.000, Len(game): 105, Training Time: 5.986s, Prediction Time: 3.517s
label KQLearningHJBCP, Reward 67: 163.000, Len(game): 163, Training Time: 5.986s, Prediction Time: 3.596s
label KQLearningHJBCP, Reward 68: 95.000, Len(game): 95, Training Time: 5.986s, Prediction Time: 3.645s
label KQLearningHJBCP, Reward 69: 116.000, Len(game): 116, Training Time: 5.986s, Prediction Time: 3.702s
label KQLearningHJBCP, Reward 70: 111.000, Len(game): 111, Training Time: 5.986s, Prediction Time: 3.757s
label KQLearningHJBCP, Reward 71: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 3.807s
label KQLearningHJBCP, Reward 72: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 3.857s
label KQLearningHJBCP, Reward 73: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 3.905s
label KQLearningHJBCP, Reward 74: 101.000, Len(game): 101, Training Time: 5.986s, Prediction Time: 3.955s
label KQLearningHJBCP, Reward 75: 111.000, Len(game): 111, Training Time: 5.986s, Prediction Time: 4.010s
label KQLearningHJBCP, Reward 76: 89.000, Len(game): 89, Training Time: 5.986s, Prediction Time: 4.054s
label KQLearningHJBCP, Reward 77: 94.000, Len(game): 94, Training Time: 5.986s, Prediction Time: 4.100s
label KQLearningHJBCP, Reward 78: 89.000, Len(game): 89, Training Time: 5.986s, Prediction Time: 4.145s
label KQLearningHJBCP, Reward 79: 123.000, Len(game): 123, Training Time: 5.986s, Prediction Time: 4.206s
label KQLearningHJBCP, Reward 80: 104.000, Len(game): 104, Training Time: 5.986s, Prediction Time: 4.259s
label KQLearningHJBCP, Reward 81: 114.000, Len(game): 114, Training Time: 5.986s, Prediction Time: 4.316s
label KQLearningHJBCP, Reward 82: 110.000, Len(game): 110, Training Time: 5.986s, Prediction Time: 4.370s
label KQLearningHJBCP, Reward 83: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 4.421s
label KQLearningHJBCP, Reward 84: 102.000, Len(game): 102, Training Time: 5.986s, Prediction Time: 4.470s
label KQLearningHJBCP, Reward 85: 120.000, Len(game): 120, Training Time: 5.986s, Prediction Time: 4.532s
label KQLearningHJBCP, Reward 86: 111.000, Len(game): 111, Training Time: 5.986s, Prediction Time: 4.587s
label KQLearningHJBCP, Reward 87: 116.000, Len(game): 116, Training Time: 5.986s, Prediction Time: 4.644s
label KQLearningHJBCP, Reward 88: 83.000, Len(game): 83, Training Time: 5.986s, Prediction Time: 4.685s
label KQLearningHJBCP, Reward 89: 95.000, Len(game): 95, Training Time: 5.986s, Prediction Time: 4.732s
label KQLearningHJBCP, Reward 90: 107.000, Len(game): 107, Training Time: 5.986s, Prediction Time: 4.785s
label KQLearningHJBCP, Reward 91: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 4.834s
label KQLearningHJBCP, Reward 92: 98.000, Len(game): 98, Training Time: 5.986s, Prediction Time: 4.883s
label KQLearningHJBCP, Reward 93: 97.000, Len(game): 97, Training Time: 5.986s, Prediction Time: 4.931s
label KQLearningHJBCP, Reward 94: 108.000, Len(game): 108, Training Time: 5.986s, Prediction Time: 4.984s
label KQLearningHJBCP, Reward 95: 115.000, Len(game): 115, Training Time: 5.986s, Prediction Time: 5.041s
label KQLearningHJBCP, Reward 96: 106.000, Len(game): 106, Training Time: 5.986s, Prediction Time: 5.094s
label KQLearningHJBCP, Reward 97: 111.000, Len(game): 111, Training Time: 5.986s, Prediction Time: 5.149s
label KQLearningHJBCP, Reward 98: 104.000, Len(game): 104, Training Time: 5.986s, Prediction Time: 5.202s
label KQLearningHJBCP, Reward 99: 81.000, Len(game): 81, Training Time: 5.986s, Prediction Time: 5.244s
Computed global error Bellman mean:  1.0343403334293693  iter:  0
label KQLearning, Reward 0: 20.000, Len(game): 20, Training Time: 0.003s, Prediction Time: 0.000s
Computed global error Bellman mean:  0.11291406057087777  iter:  5
label KQLearning, Reward 1: 191.000, Len(game): 191, Training Time: 0.102s, Prediction Time: 0.028s
Computed global error Bellman mean:  0.019494447560319525  iter:  5
label KQLearning, Reward 2: 32.000, Len(game): 32, Training Time: 0.206s, Prediction Time: 0.036s
Computed global error Bellman mean:  0.05866904492151179  iter:  5
label KQLearning, Reward 3: 123.000, Len(game): 123, Training Time: 0.498s, Prediction Time: 0.069s
Computed global error Bellman mean:  0.007963787895601543  iter:  5
label KQLearning, Reward 4: 162.000, Len(game): 162, Training Time: 1.013s, Prediction Time: 0.128s
Computed global error Bellman mean:  0.7506316675407901  iter:  5
label KQLearning, Reward 5: 109.000, Len(game): 109, Training Time: 1.777s, Prediction Time: 0.168s
Computed global error Bellman mean:  0.10648151504465554  iter:  5
label KQLearning, Reward 6: 264.000, Len(game): 264, Training Time: 3.193s, Prediction Time: 0.276s
label KQLearning, Reward 7: 191.000, Len(game): 191, Training Time: 3.193s, Prediction Time: 0.377s
label KQLearning, Reward 8: 233.000, Len(game): 233, Training Time: 3.193s, Prediction Time: 0.499s
label KQLearning, Reward 9: 247.000, Len(game): 247, Training Time: 3.193s, Prediction Time: 0.629s
label KQLearning, Reward 10: 209.000, Len(game): 209, Training Time: 3.193s, Prediction Time: 0.738s
label KQLearning, Reward 11: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 1.262s
label KQLearning, Reward 12: 155.000, Len(game): 155, Training Time: 3.193s, Prediction Time: 1.343s
label KQLearning, Reward 13: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 1.864s
label KQLearning, Reward 14: 266.000, Len(game): 266, Training Time: 3.193s, Prediction Time: 2.003s
label KQLearning, Reward 15: 224.000, Len(game): 224, Training Time: 3.193s, Prediction Time: 2.120s
label KQLearning, Reward 16: 237.000, Len(game): 237, Training Time: 3.193s, Prediction Time: 2.243s
label KQLearning, Reward 17: 65.000, Len(game): 65, Training Time: 3.193s, Prediction Time: 2.277s
label KQLearning, Reward 18: 255.000, Len(game): 255, Training Time: 3.193s, Prediction Time: 2.410s
label KQLearning, Reward 19: 78.000, Len(game): 78, Training Time: 3.193s, Prediction Time: 2.450s
label KQLearning, Reward 20: 229.000, Len(game): 229, Training Time: 3.193s, Prediction Time: 2.569s
label KQLearning, Reward 21: 228.000, Len(game): 228, Training Time: 3.193s, Prediction Time: 2.688s
label KQLearning, Reward 22: 225.000, Len(game): 225, Training Time: 3.193s, Prediction Time: 2.806s
label KQLearning, Reward 23: 311.000, Len(game): 311, Training Time: 3.193s, Prediction Time: 2.967s
label KQLearning, Reward 24: 265.000, Len(game): 265, Training Time: 3.193s, Prediction Time: 3.105s
label KQLearning, Reward 25: 209.000, Len(game): 209, Training Time: 3.193s, Prediction Time: 3.213s
label KQLearning, Reward 26: 67.000, Len(game): 67, Training Time: 3.193s, Prediction Time: 3.247s
label KQLearning, Reward 27: 235.000, Len(game): 235, Training Time: 3.193s, Prediction Time: 3.368s
label KQLearning, Reward 28: 193.000, Len(game): 193, Training Time: 3.193s, Prediction Time: 3.467s
label KQLearning, Reward 29: 215.000, Len(game): 215, Training Time: 3.193s, Prediction Time: 3.581s
label KQLearning, Reward 30: 256.000, Len(game): 256, Training Time: 3.193s, Prediction Time: 3.717s
label KQLearning, Reward 31: 259.000, Len(game): 259, Training Time: 3.193s, Prediction Time: 3.852s
label KQLearning, Reward 32: 241.000, Len(game): 241, Training Time: 3.193s, Prediction Time: 3.978s
label KQLearning, Reward 33: 271.000, Len(game): 271, Training Time: 3.193s, Prediction Time: 4.122s
label KQLearning, Reward 34: 200.000, Len(game): 200, Training Time: 3.193s, Prediction Time: 4.227s
label KQLearning, Reward 35: 225.000, Len(game): 225, Training Time: 3.193s, Prediction Time: 4.345s
label KQLearning, Reward 36: 234.000, Len(game): 234, Training Time: 3.193s, Prediction Time: 4.468s
label KQLearning, Reward 37: 189.000, Len(game): 189, Training Time: 3.193s, Prediction Time: 4.566s
label KQLearning, Reward 38: 72.000, Len(game): 72, Training Time: 3.193s, Prediction Time: 4.604s
label KQLearning, Reward 39: 201.000, Len(game): 201, Training Time: 3.193s, Prediction Time: 4.709s
label KQLearning, Reward 40: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 5.232s
label KQLearning, Reward 41: 187.000, Len(game): 187, Training Time: 3.193s, Prediction Time: 5.330s
label KQLearning, Reward 42: 193.000, Len(game): 193, Training Time: 3.193s, Prediction Time: 5.432s
label KQLearning, Reward 43: 239.000, Len(game): 239, Training Time: 3.193s, Prediction Time: 5.559s
label KQLearning, Reward 44: 226.000, Len(game): 226, Training Time: 3.193s, Prediction Time: 5.676s
label KQLearning, Reward 45: 211.000, Len(game): 211, Training Time: 3.193s, Prediction Time: 5.786s
label KQLearning, Reward 46: 262.000, Len(game): 262, Training Time: 3.193s, Prediction Time: 5.921s
label KQLearning, Reward 47: 225.000, Len(game): 225, Training Time: 3.193s, Prediction Time: 6.039s
label KQLearning, Reward 48: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 6.566s
label KQLearning, Reward 49: 203.000, Len(game): 203, Training Time: 3.193s, Prediction Time: 6.673s
label KQLearning, Reward 50: 245.000, Len(game): 245, Training Time: 3.193s, Prediction Time: 6.802s
label KQLearning, Reward 51: 275.000, Len(game): 275, Training Time: 3.193s, Prediction Time: 6.949s
label KQLearning, Reward 52: 257.000, Len(game): 257, Training Time: 3.193s, Prediction Time: 7.083s
label KQLearning, Reward 53: 77.000, Len(game): 77, Training Time: 3.193s, Prediction Time: 7.124s
label KQLearning, Reward 54: 252.000, Len(game): 252, Training Time: 3.193s, Prediction Time: 7.256s
label KQLearning, Reward 55: 214.000, Len(game): 214, Training Time: 3.193s, Prediction Time: 7.368s
label KQLearning, Reward 56: 165.000, Len(game): 165, Training Time: 3.193s, Prediction Time: 7.454s
label KQLearning, Reward 57: 231.000, Len(game): 231, Training Time: 3.193s, Prediction Time: 7.574s
label KQLearning, Reward 58: 178.000, Len(game): 178, Training Time: 3.193s, Prediction Time: 7.665s
label KQLearning, Reward 59: 239.000, Len(game): 239, Training Time: 3.193s, Prediction Time: 7.794s
label KQLearning, Reward 60: 161.000, Len(game): 161, Training Time: 3.193s, Prediction Time: 7.879s
label KQLearning, Reward 61: 93.000, Len(game): 93, Training Time: 3.193s, Prediction Time: 7.928s
label KQLearning, Reward 62: 207.000, Len(game): 207, Training Time: 3.193s, Prediction Time: 8.034s
label KQLearning, Reward 63: 199.000, Len(game): 199, Training Time: 3.193s, Prediction Time: 8.139s
label KQLearning, Reward 64: 258.000, Len(game): 258, Training Time: 3.193s, Prediction Time: 8.277s
label KQLearning, Reward 65: 168.000, Len(game): 168, Training Time: 3.193s, Prediction Time: 8.365s
label KQLearning, Reward 66: 211.000, Len(game): 211, Training Time: 3.193s, Prediction Time: 8.477s
label KQLearning, Reward 67: 71.000, Len(game): 71, Training Time: 3.193s, Prediction Time: 8.513s
label KQLearning, Reward 68: 70.000, Len(game): 70, Training Time: 3.193s, Prediction Time: 8.550s
label KQLearning, Reward 69: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 9.076s
label KQLearning, Reward 70: 217.000, Len(game): 217, Training Time: 3.193s, Prediction Time: 9.190s
label KQLearning, Reward 71: 262.000, Len(game): 262, Training Time: 3.193s, Prediction Time: 9.327s
label KQLearning, Reward 72: 234.000, Len(game): 234, Training Time: 3.193s, Prediction Time: 9.449s
label KQLearning, Reward 73: 65.000, Len(game): 65, Training Time: 3.193s, Prediction Time: 9.484s
label KQLearning, Reward 74: 217.000, Len(game): 217, Training Time: 3.193s, Prediction Time: 9.597s
label KQLearning, Reward 75: 238.000, Len(game): 238, Training Time: 3.193s, Prediction Time: 9.720s
label KQLearning, Reward 76: 243.000, Len(game): 243, Training Time: 3.193s, Prediction Time: 9.848s
label KQLearning, Reward 77: 217.000, Len(game): 217, Training Time: 3.193s, Prediction Time: 9.964s
label KQLearning, Reward 78: 219.000, Len(game): 219, Training Time: 3.193s, Prediction Time: 10.080s
label KQLearning, Reward 79: 252.000, Len(game): 252, Training Time: 3.193s, Prediction Time: 10.213s
label KQLearning, Reward 80: 207.000, Len(game): 207, Training Time: 3.193s, Prediction Time: 10.322s
label KQLearning, Reward 81: 100.000, Len(game): 100, Training Time: 3.193s, Prediction Time: 10.373s
label KQLearning, Reward 82: 225.000, Len(game): 225, Training Time: 3.193s, Prediction Time: 10.492s
label KQLearning, Reward 83: 153.000, Len(game): 153, Training Time: 3.193s, Prediction Time: 10.573s
label KQLearning, Reward 84: 333.000, Len(game): 333, Training Time: 3.193s, Prediction Time: 10.749s
label KQLearning, Reward 85: 246.000, Len(game): 246, Training Time: 3.193s, Prediction Time: 10.880s
label KQLearning, Reward 86: 307.000, Len(game): 307, Training Time: 3.193s, Prediction Time: 11.040s
label KQLearning, Reward 87: 245.000, Len(game): 245, Training Time: 3.193s, Prediction Time: 11.169s
label KQLearning, Reward 88: 269.000, Len(game): 269, Training Time: 3.193s, Prediction Time: 11.309s
label KQLearning, Reward 89: 206.000, Len(game): 206, Training Time: 3.193s, Prediction Time: 11.417s
label KQLearning, Reward 90: 173.000, Len(game): 173, Training Time: 3.193s, Prediction Time: 11.507s
label KQLearning, Reward 91: 165.000, Len(game): 165, Training Time: 3.193s, Prediction Time: 11.593s
label KQLearning, Reward 92: 211.000, Len(game): 211, Training Time: 3.193s, Prediction Time: 11.701s
label KQLearning, Reward 93: 248.000, Len(game): 248, Training Time: 3.193s, Prediction Time: 11.829s
label KQLearning, Reward 94: 219.000, Len(game): 219, Training Time: 3.193s, Prediction Time: 11.946s
label KQLearning, Reward 95: 200.000, Len(game): 200, Training Time: 3.193s, Prediction Time: 12.049s
label KQLearning, Reward 96: 226.000, Len(game): 226, Training Time: 3.193s, Prediction Time: 12.169s
label KQLearning, Reward 97: 232.000, Len(game): 232, Training Time: 3.193s, Prediction Time: 12.288s
label KQLearning, Reward 98: 230.000, Len(game): 230, Training Time: 3.193s, Prediction Time: 12.407s
label KQLearning, Reward 99: 1000.000, Len(game): 1000, Training Time: 3.193s, Prediction Time: 12.936s
0
label PPOAgent, Reward 0: 12.000, Len(game): 12, Training Time: 0.003s, Prediction Time: 0.003s
label PPOAgent, Reward 1: 23.000, Len(game): 23, Training Time: 0.007s, Prediction Time: 0.007s
label PPOAgent, Reward 2: 12.000, Len(game): 12, Training Time: 0.010s, Prediction Time: 0.010s
label PPOAgent, Reward 3: 14.000, Len(game): 14, Training Time: 0.013s, Prediction Time: 0.013s
label PPOAgent, Reward 4: 24.000, Len(game): 24, Training Time: 0.018s, Prediction Time: 0.018s
label PPOAgent, Reward 5: 63.000, Len(game): 63, Training Time: 0.030s, Prediction Time: 0.030s
label PPOAgent, Reward 6: 35.000, Len(game): 35, Training Time: 0.037s, Prediction Time: 0.037s
label PPOAgent, Reward 7: 21.000, Len(game): 21, Training Time: 0.040s, Prediction Time: 0.040s
label PPOAgent, Reward 8: 14.000, Len(game): 14, Training Time: 0.043s, Prediction Time: 0.043s
label PPOAgent, Reward 9: 11.000, Len(game): 11, Training Time: 0.045s, Prediction Time: 0.045s
label PPOAgent, Reward 10: 14.000, Len(game): 14, Training Time: 0.048s, Prediction Time: 0.048s
label PPOAgent, Reward 11: 15.000, Len(game): 15, Training Time: 0.050s, Prediction Time: 0.050s
label PPOAgent, Reward 12: 15.000, Len(game): 15, Training Time: 0.053s, Prediction Time: 0.053s
label PPOAgent, Reward 13: 16.000, Len(game): 16, Training Time: 0.057s, Prediction Time: 0.057s
label PPOAgent, Reward 14: 12.000, Len(game): 12, Training Time: 0.059s, Prediction Time: 0.059s
label PPOAgent, Reward 15: 14.000, Len(game): 14, Training Time: 0.062s, Prediction Time: 0.062s
label PPOAgent, Reward 16: 14.000, Len(game): 14, Training Time: 0.064s, Prediction Time: 0.064s
label PPOAgent, Reward 17: 21.000, Len(game): 21, Training Time: 0.068s, Prediction Time: 0.068s
label PPOAgent, Reward 18: 15.000, Len(game): 15, Training Time: 0.071s, Prediction Time: 0.071s
label PPOAgent, Reward 19: 31.000, Len(game): 31, Training Time: 0.077s, Prediction Time: 0.077s
label PPOAgent, Reward 20: 11.000, Len(game): 11, Training Time: 0.078s, Prediction Time: 0.078s
label PPOAgent, Reward 21: 16.000, Len(game): 16, Training Time: 0.081s, Prediction Time: 0.081s
label PPOAgent, Reward 22: 10.000, Len(game): 10, Training Time: 0.082s, Prediction Time: 0.082s
label PPOAgent, Reward 23: 43.000, Len(game): 43, Training Time: 0.091s, Prediction Time: 0.091s
label PPOAgent, Reward 24: 22.000, Len(game): 22, Training Time: 0.095s, Prediction Time: 0.095s
label PPOAgent, Reward 25: 27.000, Len(game): 27, Training Time: 0.100s, Prediction Time: 0.100s
label PPOAgent, Reward 26: 19.000, Len(game): 19, Training Time: 0.103s, Prediction Time: 0.103s
label PPOAgent, Reward 27: 16.000, Len(game): 16, Training Time: 0.106s, Prediction Time: 0.106s
label PPOAgent, Reward 28: 27.000, Len(game): 27, Training Time: 0.112s, Prediction Time: 0.112s
label PPOAgent, Reward 29: 37.000, Len(game): 37, Training Time: 0.119s, Prediction Time: 0.119s
label PPOAgent, Reward 30: 15.000, Len(game): 15, Training Time: 0.121s, Prediction Time: 0.121s
label PPOAgent, Reward 31: 18.000, Len(game): 18, Training Time: 0.124s, Prediction Time: 0.124s
label PPOAgent, Reward 32: 14.000, Len(game): 14, Training Time: 0.127s, Prediction Time: 0.127s
label PPOAgent, Reward 33: 22.000, Len(game): 22, Training Time: 0.130s, Prediction Time: 0.130s
label PPOAgent, Reward 34: 8.000, Len(game): 8, Training Time: 0.133s, Prediction Time: 0.133s
label PPOAgent, Reward 35: 44.000, Len(game): 44, Training Time: 0.140s, Prediction Time: 0.140s
label PPOAgent, Reward 36: 13.000, Len(game): 13, Training Time: 0.143s, Prediction Time: 0.143s
label PPOAgent, Reward 37: 17.000, Len(game): 17, Training Time: 0.146s, Prediction Time: 0.146s
label PPOAgent, Reward 38: 20.000, Len(game): 20, Training Time: 0.150s, Prediction Time: 0.150s
label PPOAgent, Reward 39: 53.000, Len(game): 53, Training Time: 0.160s, Prediction Time: 0.160s
label PPOAgent, Reward 40: 22.000, Len(game): 22, Training Time: 0.164s, Prediction Time: 0.164s
label PPOAgent, Reward 41: 47.000, Len(game): 47, Training Time: 0.174s, Prediction Time: 0.174s
label PPOAgent, Reward 42: 36.000, Len(game): 36, Training Time: 0.181s, Prediction Time: 0.181s
label PPOAgent, Reward 43: 19.000, Len(game): 19, Training Time: 0.184s, Prediction Time: 0.184s
label PPOAgent, Reward 44: 11.000, Len(game): 11, Training Time: 0.186s, Prediction Time: 0.186s
label PPOAgent, Reward 45: 12.000, Len(game): 12, Training Time: 0.189s, Prediction Time: 0.189s
label PPOAgent, Reward 46: 38.000, Len(game): 38, Training Time: 0.196s, Prediction Time: 0.196s
label PPOAgent, Reward 47: 37.000, Len(game): 37, Training Time: 0.203s, Prediction Time: 0.203s
label PPOAgent, Reward 48: 17.000, Len(game): 17, Training Time: 0.205s, Prediction Time: 0.205s
label PPOAgent, Reward 49: 46.000, Len(game): 46, Training Time: 0.212s, Prediction Time: 0.212s
label PPOAgent, Reward 50: 16.000, Len(game): 16, Training Time: 0.215s, Prediction Time: 0.215s
label PPOAgent, Reward 51: 24.000, Len(game): 24, Training Time: 0.219s, Prediction Time: 0.219s
label PPOAgent, Reward 52: 18.000, Len(game): 18, Training Time: 0.221s, Prediction Time: 0.221s
label PPOAgent, Reward 53: 25.000, Len(game): 25, Training Time: 0.265s, Prediction Time: 0.265s
label PPOAgent, Reward 54: 16.000, Len(game): 16, Training Time: 0.268s, Prediction Time: 0.268s
label PPOAgent, Reward 55: 37.000, Len(game): 37, Training Time: 0.275s, Prediction Time: 0.275s
label PPOAgent, Reward 56: 20.000, Len(game): 20, Training Time: 0.279s, Prediction Time: 0.279s
label PPOAgent, Reward 57: 28.000, Len(game): 28, Training Time: 0.284s, Prediction Time: 0.284s
label PPOAgent, Reward 58: 15.000, Len(game): 15, Training Time: 0.287s, Prediction Time: 0.287s
label PPOAgent, Reward 59: 11.000, Len(game): 11, Training Time: 0.289s, Prediction Time: 0.289s
label PPOAgent, Reward 60: 17.000, Len(game): 17, Training Time: 0.293s, Prediction Time: 0.293s
label PPOAgent, Reward 61: 13.000, Len(game): 13, Training Time: 0.295s, Prediction Time: 0.295s
label PPOAgent, Reward 62: 24.000, Len(game): 24, Training Time: 0.299s, Prediction Time: 0.299s
label PPOAgent, Reward 63: 15.000, Len(game): 15, Training Time: 0.301s, Prediction Time: 0.301s
label PPOAgent, Reward 64: 14.000, Len(game): 14, Training Time: 0.303s, Prediction Time: 0.303s
label PPOAgent, Reward 65: 10.000, Len(game): 10, Training Time: 0.304s, Prediction Time: 0.304s
label PPOAgent, Reward 66: 15.000, Len(game): 15, Training Time: 0.308s, Prediction Time: 0.308s
label PPOAgent, Reward 67: 9.000, Len(game): 9, Training Time: 0.309s, Prediction Time: 0.309s
label PPOAgent, Reward 68: 16.000, Len(game): 16, Training Time: 0.312s, Prediction Time: 0.312s
label PPOAgent, Reward 69: 18.000, Len(game): 18, Training Time: 0.315s, Prediction Time: 0.315s
label PPOAgent, Reward 70: 15.000, Len(game): 15, Training Time: 0.319s, Prediction Time: 0.319s
label PPOAgent, Reward 71: 24.000, Len(game): 24, Training Time: 0.325s, Prediction Time: 0.325s
label PPOAgent, Reward 72: 51.000, Len(game): 51, Training Time: 0.335s, Prediction Time: 0.335s
label PPOAgent, Reward 73: 14.000, Len(game): 14, Training Time: 0.338s, Prediction Time: 0.338s
label PPOAgent, Reward 74: 32.000, Len(game): 32, Training Time: 0.344s, Prediction Time: 0.344s
label PPOAgent, Reward 75: 16.000, Len(game): 16, Training Time: 0.347s, Prediction Time: 0.347s
label PPOAgent, Reward 76: 89.000, Len(game): 89, Training Time: 0.366s, Prediction Time: 0.366s
label PPOAgent, Reward 77: 17.000, Len(game): 17, Training Time: 0.369s, Prediction Time: 0.369s
label PPOAgent, Reward 78: 21.000, Len(game): 21, Training Time: 0.373s, Prediction Time: 0.373s
label PPOAgent, Reward 79: 17.000, Len(game): 17, Training Time: 0.376s, Prediction Time: 0.376s
label PPOAgent, Reward 80: 26.000, Len(game): 26, Training Time: 0.381s, Prediction Time: 0.381s
label PPOAgent, Reward 81: 31.000, Len(game): 31, Training Time: 0.388s, Prediction Time: 0.388s
label PPOAgent, Reward 82: 20.000, Len(game): 20, Training Time: 0.391s, Prediction Time: 0.391s
label PPOAgent, Reward 83: 14.000, Len(game): 14, Training Time: 0.394s, Prediction Time: 0.394s
label PPOAgent, Reward 84: 13.000, Len(game): 13, Training Time: 0.397s, Prediction Time: 0.397s
label PPOAgent, Reward 85: 15.000, Len(game): 15, Training Time: 0.400s, Prediction Time: 0.400s
label PPOAgent, Reward 86: 16.000, Len(game): 16, Training Time: 0.403s, Prediction Time: 0.403s
label PPOAgent, Reward 87: 19.000, Len(game): 19, Training Time: 0.407s, Prediction Time: 0.407s
label PPOAgent, Reward 88: 13.000, Len(game): 13, Training Time: 0.409s, Prediction Time: 0.409s
label PPOAgent, Reward 89: 12.000, Len(game): 12, Training Time: 0.412s, Prediction Time: 0.412s
label PPOAgent, Reward 90: 20.000, Len(game): 20, Training Time: 0.415s, Prediction Time: 0.415s
label PPOAgent, Reward 91: 20.000, Len(game): 20, Training Time: 0.420s, Prediction Time: 0.420s
label PPOAgent, Reward 92: 56.000, Len(game): 56, Training Time: 0.431s, Prediction Time: 0.431s
label PPOAgent, Reward 93: 11.000, Len(game): 11, Training Time: 0.434s, Prediction Time: 0.434s
label PPOAgent, Reward 94: 26.000, Len(game): 26, Training Time: 0.438s, Prediction Time: 0.438s
label PPOAgent, Reward 95: 11.000, Len(game): 11, Training Time: 0.440s, Prediction Time: 0.440s
label PPOAgent, Reward 96: 21.000, Len(game): 21, Training Time: 0.444s, Prediction Time: 0.444s
label PPOAgent, Reward 97: 36.000, Len(game): 36, Training Time: 0.451s, Prediction Time: 0.451s
label PPOAgent, Reward 98: 26.000, Len(game): 26, Training Time: 0.455s, Prediction Time: 0.455s
label PPOAgent, Reward 99: 15.000, Len(game): 15, Training Time: 0.458s, Prediction Time: 0.458s
label PolicyGradient, Reward 0: 10.000, Len(game): 10, Training Time: 0.004s, Prediction Time: 0.000s
label PolicyGradient, Reward 1: 13.000, Len(game): 13, Training Time: 0.007s, Prediction Time: 0.003s
label PolicyGradient, Reward 2: 13.000, Len(game): 13, Training Time: 0.011s, Prediction Time: 0.006s
label PolicyGradient, Reward 3: 27.000, Len(game): 27, Training Time: 0.017s, Prediction Time: 0.011s
label PolicyGradient, Reward 4: 10.000, Len(game): 10, Training Time: 0.023s, Prediction Time: 0.013s
label PolicyGradient, Reward 5: 15.000, Len(game): 15, Training Time: 0.032s, Prediction Time: 0.016s
label PolicyGradient, Reward 6: 12.000, Len(game): 12, Training Time: 0.040s, Prediction Time: 0.018s
label PolicyGradient, Reward 7: 12.000, Len(game): 12, Training Time: 0.047s, Prediction Time: 0.021s
label PolicyGradient, Reward 8: 17.000, Len(game): 17, Training Time: 0.056s, Prediction Time: 0.026s
label PolicyGradient, Reward 9: 17.000, Len(game): 17, Training Time: 0.065s, Prediction Time: 0.031s
label PolicyGradient, Reward 10: 15.000, Len(game): 15, Training Time: 0.075s, Prediction Time: 0.036s
label PolicyGradient, Reward 11: 23.000, Len(game): 23, Training Time: 0.087s, Prediction Time: 0.043s
label PolicyGradient, Reward 12: 59.000, Len(game): 59, Training Time: 0.103s, Prediction Time: 0.058s
label PolicyGradient, Reward 13: 37.000, Len(game): 37, Training Time: 0.124s, Prediction Time: 0.070s
label PolicyGradient, Reward 14: 21.000, Len(game): 21, Training Time: 0.146s, Prediction Time: 0.079s
label PolicyGradient, Reward 15: 42.000, Len(game): 42, Training Time: 0.171s, Prediction Time: 0.095s
label PolicyGradient, Reward 16: 15.000, Len(game): 15, Training Time: 0.197s, Prediction Time: 0.106s
label PolicyGradient, Reward 17: 19.000, Len(game): 19, Training Time: 0.230s, Prediction Time: 0.118s
label PolicyGradient, Reward 18: 63.000, Len(game): 63, Training Time: 0.274s, Prediction Time: 0.148s
label PolicyGradient, Reward 19: 30.000, Len(game): 30, Training Time: 0.324s, Prediction Time: 0.169s
label PolicyGradient, Reward 20: 74.000, Len(game): 74, Training Time: 0.392s, Prediction Time: 0.206s
label PolicyGradient, Reward 21: 28.000, Len(game): 28, Training Time: 0.457s, Prediction Time: 0.235s
label PolicyGradient, Reward 22: 96.000, Len(game): 96, Training Time: 0.547s, Prediction Time: 0.292s
label PolicyGradient, Reward 23: 27.000, Len(game): 27, Training Time: 0.633s, Prediction Time: 0.330s
label PolicyGradient, Reward 24: 16.000, Len(game): 16, Training Time: 0.731s, Prediction Time: 0.367s
label PolicyGradient, Reward 25: 33.000, Len(game): 33, Training Time: 0.831s, Prediction Time: 0.414s
label PolicyGradient, Reward 26: 13.000, Len(game): 13, Training Time: 0.942s, Prediction Time: 0.459s
label PolicyGradient, Reward 27: 47.000, Len(game): 47, Training Time: 1.070s, Prediction Time: 0.521s
label PolicyGradient, Reward 28: 92.000, Len(game): 92, Training Time: 1.212s, Prediction Time: 0.606s
label PolicyGradient, Reward 29: 34.000, Len(game): 34, Training Time: 1.364s, Prediction Time: 0.670s
label PolicyGradient, Reward 30: 128.000, Len(game): 128, Training Time: 1.567s, Prediction Time: 0.790s
label PolicyGradient, Reward 31: 49.000, Len(game): 49, Training Time: 1.766s, Prediction Time: 0.890s
label PolicyGradient, Reward 32: 124.000, Len(game): 124, Training Time: 2.011s, Prediction Time: 1.034s
label PolicyGradient, Reward 33: 45.000, Len(game): 45, Training Time: 2.287s, Prediction Time: 1.158s
label PolicyGradient, Reward 34: 124.000, Len(game): 124, Training Time: 2.597s, Prediction Time: 1.329s
label PolicyGradient, Reward 35: 25.000, Len(game): 25, Training Time: 2.924s, Prediction Time: 1.462s
label PolicyGradient, Reward 36: 17.000, Len(game): 17, Training Time: 3.234s, Prediction Time: 1.612s
label PolicyGradient, Reward 37: 38.000, Len(game): 38, Training Time: 3.234s, Prediction Time: 1.772s
label PolicyGradient, Reward 38: 48.000, Len(game): 48, Training Time: 3.234s, Prediction Time: 1.801s
label PolicyGradient, Reward 39: 47.000, Len(game): 47, Training Time: 3.234s, Prediction Time: 1.832s
label PolicyGradient, Reward 40: 22.000, Len(game): 22, Training Time: 3.234s, Prediction Time: 1.845s
label PolicyGradient, Reward 41: 25.000, Len(game): 25, Training Time: 3.234s, Prediction Time: 1.861s
label PolicyGradient, Reward 42: 38.000, Len(game): 38, Training Time: 3.234s, Prediction Time: 1.884s
label PolicyGradient, Reward 43: 36.000, Len(game): 36, Training Time: 3.234s, Prediction Time: 1.907s
label PolicyGradient, Reward 44: 23.000, Len(game): 23, Training Time: 3.234s, Prediction Time: 1.924s
label PolicyGradient, Reward 45: 23.000, Len(game): 23, Training Time: 3.234s, Prediction Time: 1.938s
label PolicyGradient, Reward 46: 36.000, Len(game): 36, Training Time: 3.234s, Prediction Time: 1.962s
label PolicyGradient, Reward 47: 33.000, Len(game): 33, Training Time: 3.234s, Prediction Time: 1.981s
label PolicyGradient, Reward 48: 32.000, Len(game): 32, Training Time: 3.234s, Prediction Time: 2.001s
label PolicyGradient, Reward 49: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.020s
label PolicyGradient, Reward 50: 50.000, Len(game): 50, Training Time: 3.234s, Prediction Time: 2.051s
label PolicyGradient, Reward 51: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.072s
label PolicyGradient, Reward 52: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.097s
label PolicyGradient, Reward 53: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.117s
label PolicyGradient, Reward 54: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.138s
label PolicyGradient, Reward 55: 28.000, Len(game): 28, Training Time: 3.234s, Prediction Time: 2.156s
label PolicyGradient, Reward 56: 33.000, Len(game): 33, Training Time: 3.234s, Prediction Time: 2.176s
label PolicyGradient, Reward 57: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.197s
label PolicyGradient, Reward 58: 25.000, Len(game): 25, Training Time: 3.234s, Prediction Time: 2.213s
label PolicyGradient, Reward 59: 25.000, Len(game): 25, Training Time: 3.234s, Prediction Time: 2.228s
label PolicyGradient, Reward 60: 29.000, Len(game): 29, Training Time: 3.234s, Prediction Time: 2.246s
label PolicyGradient, Reward 61: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.266s
label PolicyGradient, Reward 62: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.286s
label PolicyGradient, Reward 63: 38.000, Len(game): 38, Training Time: 3.234s, Prediction Time: 2.311s
label PolicyGradient, Reward 64: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.336s
label PolicyGradient, Reward 65: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.359s
label PolicyGradient, Reward 66: 22.000, Len(game): 22, Training Time: 3.234s, Prediction Time: 2.373s
label PolicyGradient, Reward 67: 17.000, Len(game): 17, Training Time: 3.234s, Prediction Time: 2.383s
label PolicyGradient, Reward 68: 16.000, Len(game): 16, Training Time: 3.234s, Prediction Time: 2.393s
label PolicyGradient, Reward 69: 21.000, Len(game): 21, Training Time: 3.234s, Prediction Time: 2.406s
label PolicyGradient, Reward 70: 41.000, Len(game): 41, Training Time: 3.234s, Prediction Time: 2.432s
label PolicyGradient, Reward 71: 23.000, Len(game): 23, Training Time: 3.234s, Prediction Time: 2.445s
label PolicyGradient, Reward 72: 41.000, Len(game): 41, Training Time: 3.234s, Prediction Time: 2.471s
label PolicyGradient, Reward 73: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.490s
label PolicyGradient, Reward 74: 31.000, Len(game): 31, Training Time: 3.234s, Prediction Time: 2.510s
label PolicyGradient, Reward 75: 30.000, Len(game): 30, Training Time: 3.234s, Prediction Time: 2.530s
label PolicyGradient, Reward 76: 36.000, Len(game): 36, Training Time: 3.234s, Prediction Time: 2.552s
label PolicyGradient, Reward 77: 26.000, Len(game): 26, Training Time: 3.234s, Prediction Time: 2.569s
label PolicyGradient, Reward 78: 30.000, Len(game): 30, Training Time: 3.234s, Prediction Time: 2.587s
label PolicyGradient, Reward 79: 26.000, Len(game): 26, Training Time: 3.234s, Prediction Time: 2.603s
label PolicyGradient, Reward 80: 59.000, Len(game): 59, Training Time: 3.234s, Prediction Time: 2.639s
label PolicyGradient, Reward 81: 42.000, Len(game): 42, Training Time: 3.234s, Prediction Time: 2.664s
label PolicyGradient, Reward 82: 33.000, Len(game): 33, Training Time: 3.234s, Prediction Time: 2.686s
label PolicyGradient, Reward 83: 37.000, Len(game): 37, Training Time: 3.234s, Prediction Time: 2.708s
label PolicyGradient, Reward 84: 44.000, Len(game): 44, Training Time: 3.234s, Prediction Time: 2.735s
label PolicyGradient, Reward 85: 35.000, Len(game): 35, Training Time: 3.234s, Prediction Time: 2.757s
label PolicyGradient, Reward 86: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.781s
label PolicyGradient, Reward 87: 38.000, Len(game): 38, Training Time: 3.234s, Prediction Time: 2.805s
label PolicyGradient, Reward 88: 20.000, Len(game): 20, Training Time: 3.234s, Prediction Time: 2.818s
label PolicyGradient, Reward 89: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 2.840s
label PolicyGradient, Reward 90: 40.000, Len(game): 40, Training Time: 3.234s, Prediction Time: 2.866s
label PolicyGradient, Reward 91: 47.000, Len(game): 47, Training Time: 3.234s, Prediction Time: 2.896s
label PolicyGradient, Reward 92: 27.000, Len(game): 27, Training Time: 3.234s, Prediction Time: 2.912s
label PolicyGradient, Reward 93: 42.000, Len(game): 42, Training Time: 3.234s, Prediction Time: 2.938s
label PolicyGradient, Reward 94: 39.000, Len(game): 39, Training Time: 3.234s, Prediction Time: 2.962s
label PolicyGradient, Reward 95: 26.000, Len(game): 26, Training Time: 3.234s, Prediction Time: 2.978s
label PolicyGradient, Reward 96: 35.000, Len(game): 35, Training Time: 3.234s, Prediction Time: 3.000s
label PolicyGradient, Reward 97: 23.000, Len(game): 23, Training Time: 3.234s, Prediction Time: 3.014s
label PolicyGradient, Reward 98: 34.000, Len(game): 34, Training Time: 3.234s, Prediction Time: 3.035s
label PolicyGradient, Reward 99: 35.000, Len(game): 35, Training Time: 3.234s, Prediction Time: 3.057s
label Controller-based, Reward 0: 138.000, Len(game): 138, Training Time: 0.003s, Prediction Time: 0.001s
label Controller-based, Reward 1: 230.000, Len(game): 230, Training Time: 0.006s, Prediction Time: 0.003s
label Controller-based, Reward 2: 9.000, Len(game): 9, Training Time: 0.018s, Prediction Time: 0.003s
label Controller-based, Reward 3: 148.000, Len(game): 148, Training Time: 0.028s, Prediction Time: 0.005s
label Controller-based, Reward 4: 9.000, Len(game): 9, Training Time: 0.037s, Prediction Time: 0.005s
label Controller-based, Reward 5: 263.000, Len(game): 263, Training Time: 0.049s, Prediction Time: 0.006s
label Controller-based, Reward 6: 206.000, Len(game): 206, Training Time: 0.061s, Prediction Time: 0.008s
label Controller-based, Reward 7: 249.000, Len(game): 249, Training Time: 0.073s, Prediction Time: 0.010s
label Controller-based, Reward 8: 281.000, Len(game): 281, Training Time: 0.086s, Prediction Time: 0.012s
label Controller-based, Reward 9: 153.000, Len(game): 153, Training Time: 0.097s, Prediction Time: 0.013s
no training
label Controller-based, Reward 10: 1000.000, Len(game): 1000, Training Time: 0.097s, Prediction Time: 0.021s
label Controller-based, Reward 11: 750.000, Len(game): 750, Training Time: 0.114s, Prediction Time: 0.028s
no training
label Controller-based, Reward 12: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.037s
no training
label Controller-based, Reward 13: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.045s
no training
label Controller-based, Reward 14: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.053s
no training
label Controller-based, Reward 15: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.061s
no training
label Controller-based, Reward 16: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.070s
no training
label Controller-based, Reward 17: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.078s
no training
label Controller-based, Reward 18: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.086s
no training
label Controller-based, Reward 19: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.095s
no training
label Controller-based, Reward 20: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.103s
no training
label Controller-based, Reward 21: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.113s
no training
label Controller-based, Reward 22: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.121s
no training
label Controller-based, Reward 23: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.128s
no training
label Controller-based, Reward 24: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.137s
no training
label Controller-based, Reward 25: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.146s
no training
label Controller-based, Reward 26: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.154s
no training
label Controller-based, Reward 27: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.161s
no training
label Controller-based, Reward 28: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.170s
no training
label Controller-based, Reward 29: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.179s
no training
label Controller-based, Reward 30: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.187s
no training
label Controller-based, Reward 31: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.195s
no training
label Controller-based, Reward 32: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.203s
no training
label Controller-based, Reward 33: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.210s
no training
label Controller-based, Reward 34: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.217s
no training
label Controller-based, Reward 35: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.225s
no training
label Controller-based, Reward 36: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.234s
no training
label Controller-based, Reward 37: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.242s
no training
label Controller-based, Reward 38: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.250s
no training
label Controller-based, Reward 39: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.259s
no training
label Controller-based, Reward 40: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.267s
no training
label Controller-based, Reward 41: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.275s
no training
label Controller-based, Reward 42: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.284s
no training
label Controller-based, Reward 43: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.292s
no training
label Controller-based, Reward 44: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.301s
no training
label Controller-based, Reward 45: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.308s
no training
label Controller-based, Reward 46: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.317s
no training
label Controller-based, Reward 47: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.326s
no training
label Controller-based, Reward 48: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.334s
no training
label Controller-based, Reward 49: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.342s
no training
label Controller-based, Reward 50: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.351s
no training
label Controller-based, Reward 51: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.358s
no training
label Controller-based, Reward 52: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.366s
no training
label Controller-based, Reward 53: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.374s
no training
label Controller-based, Reward 54: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.382s
no training
label Controller-based, Reward 55: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.391s
no training
label Controller-based, Reward 56: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.399s
no training
label Controller-based, Reward 57: 1000.000, Len(game): 1000, Training Time: 0.114s, Prediction Time: 0.407s
no training
label Controller-based, Reward 58: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.415s
no training
label Controller-based, Reward 59: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.423s
no training
label Controller-based, Reward 60: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.431s
no training
label Controller-based, Reward 61: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.439s
no training
label Controller-based, Reward 62: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.447s
no training
label Controller-based, Reward 63: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.455s
no training
label Controller-based, Reward 64: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.463s
no training
label Controller-based, Reward 65: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.471s
no training
label Controller-based, Reward 66: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.480s
no training
label Controller-based, Reward 67: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.488s
no training
label Controller-based, Reward 68: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.496s
no training
label Controller-based, Reward 69: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.505s
no training
label Controller-based, Reward 70: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.513s
no training
label Controller-based, Reward 71: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.522s
no training
label Controller-based, Reward 72: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.530s
no training
label Controller-based, Reward 73: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.538s
no training
label Controller-based, Reward 74: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.546s
no training
label Controller-based, Reward 75: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.555s
no training
label Controller-based, Reward 76: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.563s
no training
label Controller-based, Reward 77: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.572s
no training
label Controller-based, Reward 78: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.581s
no training
label Controller-based, Reward 79: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.589s
no training
label Controller-based, Reward 80: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.597s
no training
label Controller-based, Reward 81: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.605s
no training
label Controller-based, Reward 82: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.613s
no training
label Controller-based, Reward 83: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.621s
no training
label Controller-based, Reward 84: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.630s
no training
label Controller-based, Reward 85: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.638s
no training
label Controller-based, Reward 86: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.647s
no training
label Controller-based, Reward 87: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.655s
no training
label Controller-based, Reward 88: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.664s
no training
label Controller-based, Reward 89: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.671s
no training
label Controller-based, Reward 90: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.681s
no training
label Controller-based, Reward 91: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.689s
no training
label Controller-based, Reward 92: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.698s
no training
label Controller-based, Reward 93: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.707s
no training
label Controller-based, Reward 94: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.715s
no training
label Controller-based, Reward 95: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.722s
no training
label Controller-based, Reward 96: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.731s
no training
label Controller-based, Reward 97: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.740s
no training
label Controller-based, Reward 98: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.749s
no training
label Controller-based, Reward 99: 1000.000, Len(game): 1000, Training Time: 0.115s, Prediction Time: 0.756s
label KACAgent, Reward 0: 14.000, Len(game): 14, Training Time: 0.004s, Prediction Time: 0.000s
label KACAgent, Reward 1: 48.000, Len(game): 48, Training Time: 0.008s, Prediction Time: 0.009s
label KACAgent, Reward 2: 15.000, Len(game): 15, Training Time: 0.015s, Prediction Time: 0.012s
label KACAgent, Reward 3: 70.000, Len(game): 70, Training Time: 0.024s, Prediction Time: 0.026s
label KACAgent, Reward 4: 60.000, Len(game): 60, Training Time: 0.038s, Prediction Time: 0.040s
label KACAgent, Reward 5: 105.000, Len(game): 105, Training Time: 0.063s, Prediction Time: 0.064s
label KACAgent, Reward 6: 136.000, Len(game): 136, Training Time: 0.106s, Prediction Time: 0.105s
label KACAgent, Reward 7: 71.000, Len(game): 71, Training Time: 0.158s, Prediction Time: 0.139s
label KACAgent, Reward 8: 97.000, Len(game): 97, Training Time: 0.226s, Prediction Time: 0.190s
label KACAgent, Reward 9: 191.000, Len(game): 191, Training Time: 0.337s, Prediction Time: 0.283s
label KACAgent, Reward 10: 78.000, Len(game): 78, Training Time: 0.465s, Prediction Time: 0.363s
label KACAgent, Reward 11: 142.000, Len(game): 142, Training Time: 0.629s, Prediction Time: 0.475s
label KACAgent, Reward 12: 149.000, Len(game): 149, Training Time: 0.833s, Prediction Time: 0.633s
label KACAgent, Reward 13: 362.000, Len(game): 362, Training Time: 1.147s, Prediction Time: 0.915s
label KACAgent, Reward 14: 209.000, Len(game): 209, Training Time: 1.565s, Prediction Time: 1.223s
label KACAgent, Reward 15: 286.000, Len(game): 286, Training Time: 2.114s, Prediction Time: 1.648s
label KACAgent, Reward 16: 297.000, Len(game): 297, Training Time: 2.809s, Prediction Time: 2.164s
label KACAgent, Reward 17: 366.000, Len(game): 366, Training Time: 3.681s, Prediction Time: 2.861s
label KACAgent, Reward 18: 273.000, Len(game): 273, Training Time: 3.681s, Prediction Time: 3.597s
label KACAgent, Reward 19: 384.000, Len(game): 384, Training Time: 3.681s, Prediction Time: 3.916s
label KACAgent, Reward 20: 268.000, Len(game): 268, Training Time: 3.681s, Prediction Time: 4.126s
label KACAgent, Reward 21: 345.000, Len(game): 345, Training Time: 3.681s, Prediction Time: 4.396s
label KACAgent, Reward 22: 311.000, Len(game): 311, Training Time: 3.681s, Prediction Time: 4.638s
label KACAgent, Reward 23: 369.000, Len(game): 369, Training Time: 3.681s, Prediction Time: 4.926s
label KACAgent, Reward 24: 357.000, Len(game): 357, Training Time: 3.681s, Prediction Time: 5.203s
label KACAgent, Reward 25: 391.000, Len(game): 391, Training Time: 3.681s, Prediction Time: 5.510s
label KACAgent, Reward 26: 340.000, Len(game): 340, Training Time: 3.681s, Prediction Time: 5.775s
label KACAgent, Reward 27: 337.000, Len(game): 337, Training Time: 3.681s, Prediction Time: 6.037s
label KACAgent, Reward 28: 354.000, Len(game): 354, Training Time: 3.681s, Prediction Time: 6.316s
label KACAgent, Reward 29: 290.000, Len(game): 290, Training Time: 3.681s, Prediction Time: 6.541s
label KACAgent, Reward 30: 112.000, Len(game): 112, Training Time: 3.681s, Prediction Time: 6.627s
label KACAgent, Reward 31: 390.000, Len(game): 390, Training Time: 3.681s, Prediction Time: 6.930s
label KACAgent, Reward 32: 287.000, Len(game): 287, Training Time: 3.681s, Prediction Time: 7.156s
label KACAgent, Reward 33: 294.000, Len(game): 294, Training Time: 3.681s, Prediction Time: 7.383s
label KACAgent, Reward 34: 287.000, Len(game): 287, Training Time: 3.681s, Prediction Time: 7.608s
label KACAgent, Reward 35: 341.000, Len(game): 341, Training Time: 3.681s, Prediction Time: 7.880s
label KACAgent, Reward 36: 356.000, Len(game): 356, Training Time: 3.681s, Prediction Time: 8.162s
label KACAgent, Reward 37: 302.000, Len(game): 302, Training Time: 3.681s, Prediction Time: 8.394s
label KACAgent, Reward 38: 434.000, Len(game): 434, Training Time: 3.681s, Prediction Time: 8.737s
label KACAgent, Reward 39: 285.000, Len(game): 285, Training Time: 3.681s, Prediction Time: 8.961s
label KACAgent, Reward 40: 356.000, Len(game): 356, Training Time: 3.681s, Prediction Time: 9.239s
label KACAgent, Reward 41: 309.000, Len(game): 309, Training Time: 3.681s, Prediction Time: 9.480s
label KACAgent, Reward 42: 384.000, Len(game): 384, Training Time: 3.681s, Prediction Time: 9.777s
label KACAgent, Reward 43: 439.000, Len(game): 439, Training Time: 3.681s, Prediction Time: 10.119s
label KACAgent, Reward 44: 414.000, Len(game): 414, Training Time: 3.681s, Prediction Time: 10.441s
label KACAgent, Reward 45: 379.000, Len(game): 379, Training Time: 3.681s, Prediction Time: 10.737s
label KACAgent, Reward 46: 283.000, Len(game): 283, Training Time: 3.681s, Prediction Time: 10.956s
label KACAgent, Reward 47: 315.000, Len(game): 315, Training Time: 3.681s, Prediction Time: 11.203s
label KACAgent, Reward 48: 266.000, Len(game): 266, Training Time: 3.681s, Prediction Time: 11.410s
label KACAgent, Reward 49: 306.000, Len(game): 306, Training Time: 3.681s, Prediction Time: 11.649s
label KACAgent, Reward 50: 378.000, Len(game): 378, Training Time: 3.681s, Prediction Time: 11.946s
label KACAgent, Reward 51: 196.000, Len(game): 196, Training Time: 3.681s, Prediction Time: 12.098s
label KACAgent, Reward 52: 508.000, Len(game): 508, Training Time: 3.681s, Prediction Time: 12.494s
label KACAgent, Reward 53: 312.000, Len(game): 312, Training Time: 3.681s, Prediction Time: 12.735s
label KACAgent, Reward 54: 393.000, Len(game): 393, Training Time: 3.681s, Prediction Time: 13.039s
label KACAgent, Reward 55: 276.000, Len(game): 276, Training Time: 3.681s, Prediction Time: 13.255s
label KACAgent, Reward 56: 405.000, Len(game): 405, Training Time: 3.681s, Prediction Time: 13.572s
label KACAgent, Reward 57: 461.000, Len(game): 461, Training Time: 3.681s, Prediction Time: 13.929s
label KACAgent, Reward 58: 294.000, Len(game): 294, Training Time: 3.681s, Prediction Time: 14.161s
label KACAgent, Reward 59: 331.000, Len(game): 331, Training Time: 3.681s, Prediction Time: 14.420s
label KACAgent, Reward 60: 314.000, Len(game): 314, Training Time: 3.681s, Prediction Time: 14.664s
label KACAgent, Reward 61: 281.000, Len(game): 281, Training Time: 3.681s, Prediction Time: 14.883s
label KACAgent, Reward 62: 300.000, Len(game): 300, Training Time: 3.681s, Prediction Time: 15.116s
label KACAgent, Reward 63: 282.000, Len(game): 282, Training Time: 3.681s, Prediction Time: 15.335s
label KACAgent, Reward 64: 304.000, Len(game): 304, Training Time: 3.681s, Prediction Time: 15.571s
label KACAgent, Reward 65: 296.000, Len(game): 296, Training Time: 3.681s, Prediction Time: 15.801s
label KACAgent, Reward 66: 346.000, Len(game): 346, Training Time: 3.681s, Prediction Time: 16.071s
label KACAgent, Reward 67: 318.000, Len(game): 318, Training Time: 3.681s, Prediction Time: 16.318s
label KACAgent, Reward 68: 319.000, Len(game): 319, Training Time: 3.681s, Prediction Time: 16.565s
label KACAgent, Reward 69: 312.000, Len(game): 312, Training Time: 3.681s, Prediction Time: 16.810s
label KACAgent, Reward 70: 186.000, Len(game): 186, Training Time: 3.681s, Prediction Time: 16.955s
label KACAgent, Reward 71: 362.000, Len(game): 362, Training Time: 3.681s, Prediction Time: 17.237s
label KACAgent, Reward 72: 433.000, Len(game): 433, Training Time: 3.681s, Prediction Time: 17.573s
label KACAgent, Reward 73: 285.000, Len(game): 285, Training Time: 3.681s, Prediction Time: 17.795s
label KACAgent, Reward 74: 332.000, Len(game): 332, Training Time: 3.681s, Prediction Time: 18.053s
label KACAgent, Reward 75: 346.000, Len(game): 346, Training Time: 3.681s, Prediction Time: 18.322s
label KACAgent, Reward 76: 364.000, Len(game): 364, Training Time: 3.681s, Prediction Time: 18.609s
label KACAgent, Reward 77: 652.000, Len(game): 652, Training Time: 3.681s, Prediction Time: 19.121s
label KACAgent, Reward 78: 376.000, Len(game): 376, Training Time: 3.681s, Prediction Time: 19.421s
label KACAgent, Reward 79: 620.000, Len(game): 620, Training Time: 3.681s, Prediction Time: 19.907s
label KACAgent, Reward 80: 338.000, Len(game): 338, Training Time: 3.681s, Prediction Time: 20.175s
label KACAgent, Reward 81: 319.000, Len(game): 319, Training Time: 3.681s, Prediction Time: 20.432s
label KACAgent, Reward 82: 332.000, Len(game): 332, Training Time: 3.681s, Prediction Time: 20.694s
label KACAgent, Reward 83: 302.000, Len(game): 302, Training Time: 3.681s, Prediction Time: 20.934s
label KACAgent, Reward 84: 427.000, Len(game): 427, Training Time: 3.681s, Prediction Time: 21.268s
label KACAgent, Reward 85: 298.000, Len(game): 298, Training Time: 3.681s, Prediction Time: 21.501s
label KACAgent, Reward 86: 308.000, Len(game): 308, Training Time: 3.681s, Prediction Time: 21.749s
label KACAgent, Reward 87: 263.000, Len(game): 263, Training Time: 3.681s, Prediction Time: 21.957s
label KACAgent, Reward 88: 316.000, Len(game): 316, Training Time: 3.681s, Prediction Time: 22.208s
label KACAgent, Reward 89: 373.000, Len(game): 373, Training Time: 3.681s, Prediction Time: 22.514s
label KACAgent, Reward 90: 322.000, Len(game): 322, Training Time: 3.681s, Prediction Time: 22.769s
label KACAgent, Reward 91: 337.000, Len(game): 337, Training Time: 3.681s, Prediction Time: 23.036s
label KACAgent, Reward 92: 328.000, Len(game): 328, Training Time: 3.681s, Prediction Time: 23.293s
label KACAgent, Reward 93: 472.000, Len(game): 472, Training Time: 3.681s, Prediction Time: 23.669s
label KACAgent, Reward 94: 372.000, Len(game): 372, Training Time: 3.681s, Prediction Time: 23.969s
label KACAgent, Reward 95: 293.000, Len(game): 293, Training Time: 3.681s, Prediction Time: 24.202s
label KACAgent, Reward 96: 386.000, Len(game): 386, Training Time: 3.681s, Prediction Time: 24.509s
label KACAgent, Reward 97: 323.000, Len(game): 323, Training Time: 3.681s, Prediction Time: 24.765s
label KACAgent, Reward 98: 364.000, Len(game): 364, Training Time: 3.681s, Prediction Time: 25.065s
label KACAgent, Reward 99: 334.000, Len(game): 334, Training Time: 3.681s, Prediction Time: 25.328s
label DQNAgent, Reward 0: 31.000, Len(game): 31, Training Time: 0.001s, Prediction Time: 0.000s
label DQNAgent, Reward 1: 37.000, Len(game): 37, Training Time: 0.007s, Prediction Time: 0.000s
label DQNAgent, Reward 2: 29.000, Len(game): 29, Training Time: 0.043s, Prediction Time: 0.000s
label DQNAgent, Reward 3: 17.000, Len(game): 17, Training Time: 0.061s, Prediction Time: 0.000s
label DQNAgent, Reward 4: 16.000, Len(game): 16, Training Time: 0.078s, Prediction Time: 0.001s
label DQNAgent, Reward 5: 11.000, Len(game): 11, Training Time: 0.091s, Prediction Time: 0.001s
label DQNAgent, Reward 6: 23.000, Len(game): 23, Training Time: 0.118s, Prediction Time: 0.001s
label DQNAgent, Reward 7: 12.000, Len(game): 12, Training Time: 0.132s, Prediction Time: 0.001s
label DQNAgent, Reward 8: 19.000, Len(game): 19, Training Time: 0.155s, Prediction Time: 0.002s
label DQNAgent, Reward 9: 13.000, Len(game): 13, Training Time: 0.172s, Prediction Time: 0.003s
label DQNAgent, Reward 10: 14.000, Len(game): 14, Training Time: 0.191s, Prediction Time: 0.003s
label DQNAgent, Reward 11: 21.000, Len(game): 21, Training Time: 0.213s, Prediction Time: 0.004s
label DQNAgent, Reward 12: 9.000, Len(game): 9, Training Time: 0.223s, Prediction Time: 0.005s
label DQNAgent, Reward 13: 11.000, Len(game): 11, Training Time: 0.235s, Prediction Time: 0.006s
label DQNAgent, Reward 14: 18.000, Len(game): 18, Training Time: 0.256s, Prediction Time: 0.006s
label DQNAgent, Reward 15: 9.000, Len(game): 9, Training Time: 0.266s, Prediction Time: 0.007s
label DQNAgent, Reward 16: 14.000, Len(game): 14, Training Time: 0.281s, Prediction Time: 0.007s
label DQNAgent, Reward 17: 13.000, Len(game): 13, Training Time: 0.295s, Prediction Time: 0.008s
label DQNAgent, Reward 18: 13.000, Len(game): 13, Training Time: 0.310s, Prediction Time: 0.008s
label DQNAgent, Reward 19: 9.000, Len(game): 9, Training Time: 0.320s, Prediction Time: 0.008s
label DQNAgent, Reward 20: 10.000, Len(game): 10, Training Time: 0.331s, Prediction Time: 0.009s
label DQNAgent, Reward 21: 26.000, Len(game): 26, Training Time: 0.359s, Prediction Time: 0.011s
label DQNAgent, Reward 22: 16.000, Len(game): 16, Training Time: 0.378s, Prediction Time: 0.011s
label DQNAgent, Reward 23: 28.000, Len(game): 28, Training Time: 0.409s, Prediction Time: 0.011s
label DQNAgent, Reward 24: 11.000, Len(game): 11, Training Time: 0.422s, Prediction Time: 0.012s
label DQNAgent, Reward 25: 18.000, Len(game): 18, Training Time: 0.442s, Prediction Time: 0.013s
label DQNAgent, Reward 26: 27.000, Len(game): 27, Training Time: 0.475s, Prediction Time: 0.013s
label DQNAgent, Reward 27: 16.000, Len(game): 16, Training Time: 0.492s, Prediction Time: 0.013s
label DQNAgent, Reward 28: 11.000, Len(game): 11, Training Time: 0.505s, Prediction Time: 0.013s
label DQNAgent, Reward 29: 11.000, Len(game): 11, Training Time: 0.516s, Prediction Time: 0.014s
label DQNAgent, Reward 30: 19.000, Len(game): 19, Training Time: 0.540s, Prediction Time: 0.015s
label DQNAgent, Reward 31: 16.000, Len(game): 16, Training Time: 0.562s, Prediction Time: 0.016s
label DQNAgent, Reward 32: 18.000, Len(game): 18, Training Time: 0.582s, Prediction Time: 0.017s
label DQNAgent, Reward 33: 12.000, Len(game): 12, Training Time: 0.594s, Prediction Time: 0.017s
label DQNAgent, Reward 34: 15.000, Len(game): 15, Training Time: 0.610s, Prediction Time: 0.018s
label DQNAgent, Reward 35: 33.000, Len(game): 33, Training Time: 0.646s, Prediction Time: 0.019s
label DQNAgent, Reward 36: 34.000, Len(game): 34, Training Time: 0.685s, Prediction Time: 0.020s
label DQNAgent, Reward 37: 25.000, Len(game): 25, Training Time: 0.712s, Prediction Time: 0.020s
label DQNAgent, Reward 38: 11.000, Len(game): 11, Training Time: 0.724s, Prediction Time: 0.022s
label DQNAgent, Reward 39: 20.000, Len(game): 20, Training Time: 0.746s, Prediction Time: 0.023s
label DQNAgent, Reward 40: 13.000, Len(game): 13, Training Time: 0.760s, Prediction Time: 0.023s
label DQNAgent, Reward 41: 38.000, Len(game): 38, Training Time: 0.802s, Prediction Time: 0.024s
label DQNAgent, Reward 42: 20.000, Len(game): 20, Training Time: 0.825s, Prediction Time: 0.025s
label DQNAgent, Reward 43: 14.000, Len(game): 14, Training Time: 0.840s, Prediction Time: 0.025s
label DQNAgent, Reward 44: 28.000, Len(game): 28, Training Time: 0.871s, Prediction Time: 0.026s
label DQNAgent, Reward 45: 24.000, Len(game): 24, Training Time: 0.899s, Prediction Time: 0.027s
label DQNAgent, Reward 46: 33.000, Len(game): 33, Training Time: 0.936s, Prediction Time: 0.028s
label DQNAgent, Reward 47: 19.000, Len(game): 19, Training Time: 0.957s, Prediction Time: 0.029s
label DQNAgent, Reward 48: 23.000, Len(game): 23, Training Time: 0.981s, Prediction Time: 0.029s
label DQNAgent, Reward 49: 36.000, Len(game): 36, Training Time: 1.020s, Prediction Time: 0.030s
label DQNAgent, Reward 50: 111.000, Len(game): 111, Training Time: 1.144s, Prediction Time: 0.035s
label DQNAgent, Reward 51: 176.000, Len(game): 176, Training Time: 1.343s, Prediction Time: 0.041s
label DQNAgent, Reward 52: 202.000, Len(game): 202, Training Time: 1.569s, Prediction Time: 0.048s
label DQNAgent, Reward 53: 151.000, Len(game): 151, Training Time: 1.736s, Prediction Time: 0.053s
label DQNAgent, Reward 54: 163.000, Len(game): 163, Training Time: 1.936s, Prediction Time: 0.060s
label DQNAgent, Reward 55: 178.000, Len(game): 178, Training Time: 2.147s, Prediction Time: 0.067s
label DQNAgent, Reward 56: 443.000, Len(game): 443, Training Time: 2.657s, Prediction Time: 0.084s
label DQNAgent, Reward 57: 234.000, Len(game): 234, Training Time: 2.931s, Prediction Time: 0.094s
label DQNAgent, Reward 58: 179.000, Len(game): 179, Training Time: 3.131s, Prediction Time: 0.102s
label DQNAgent, Reward 59: 202.000, Len(game): 202, Training Time: 3.131s, Prediction Time: 0.110s
label DQNAgent, Reward 60: 325.000, Len(game): 325, Training Time: 3.131s, Prediction Time: 0.123s
label DQNAgent, Reward 61: 221.000, Len(game): 221, Training Time: 3.131s, Prediction Time: 0.132s
label DQNAgent, Reward 62: 212.000, Len(game): 212, Training Time: 3.131s, Prediction Time: 0.140s
label DQNAgent, Reward 63: 186.000, Len(game): 186, Training Time: 3.131s, Prediction Time: 0.147s
label DQNAgent, Reward 64: 285.000, Len(game): 285, Training Time: 3.131s, Prediction Time: 0.160s
label DQNAgent, Reward 65: 342.000, Len(game): 342, Training Time: 3.131s, Prediction Time: 0.173s
label DQNAgent, Reward 66: 205.000, Len(game): 205, Training Time: 3.131s, Prediction Time: 0.182s
label DQNAgent, Reward 67: 150.000, Len(game): 150, Training Time: 3.131s, Prediction Time: 0.188s
label DQNAgent, Reward 68: 215.000, Len(game): 215, Training Time: 3.131s, Prediction Time: 0.198s
label DQNAgent, Reward 69: 163.000, Len(game): 163, Training Time: 3.131s, Prediction Time: 0.204s
label DQNAgent, Reward 70: 252.000, Len(game): 252, Training Time: 3.131s, Prediction Time: 0.214s
label DQNAgent, Reward 71: 316.000, Len(game): 316, Training Time: 3.131s, Prediction Time: 0.228s
label DQNAgent, Reward 72: 218.000, Len(game): 218, Training Time: 3.131s, Prediction Time: 0.237s
label DQNAgent, Reward 73: 225.000, Len(game): 225, Training Time: 3.131s, Prediction Time: 0.246s
label DQNAgent, Reward 74: 305.000, Len(game): 305, Training Time: 3.131s, Prediction Time: 0.258s
label DQNAgent, Reward 75: 254.000, Len(game): 254, Training Time: 3.131s, Prediction Time: 0.269s
label DQNAgent, Reward 76: 185.000, Len(game): 185, Training Time: 3.131s, Prediction Time: 0.277s
label DQNAgent, Reward 77: 262.000, Len(game): 262, Training Time: 3.131s, Prediction Time: 0.287s
label DQNAgent, Reward 78: 212.000, Len(game): 212, Training Time: 3.131s, Prediction Time: 0.296s
label DQNAgent, Reward 79: 334.000, Len(game): 334, Training Time: 3.131s, Prediction Time: 0.310s
label DQNAgent, Reward 80: 236.000, Len(game): 236, Training Time: 3.131s, Prediction Time: 0.321s
label DQNAgent, Reward 81: 185.000, Len(game): 185, Training Time: 3.131s, Prediction Time: 0.329s
label DQNAgent, Reward 82: 166.000, Len(game): 166, Training Time: 3.131s, Prediction Time: 0.335s
label DQNAgent, Reward 83: 325.000, Len(game): 325, Training Time: 3.131s, Prediction Time: 0.348s
label DQNAgent, Reward 84: 140.000, Len(game): 140, Training Time: 3.131s, Prediction Time: 0.353s
label DQNAgent, Reward 85: 346.000, Len(game): 346, Training Time: 3.131s, Prediction Time: 0.367s
label DQNAgent, Reward 86: 246.000, Len(game): 246, Training Time: 3.131s, Prediction Time: 0.377s
label DQNAgent, Reward 87: 202.000, Len(game): 202, Training Time: 3.131s, Prediction Time: 0.385s
label DQNAgent, Reward 88: 262.000, Len(game): 262, Training Time: 3.131s, Prediction Time: 0.395s
label DQNAgent, Reward 89: 223.000, Len(game): 223, Training Time: 3.131s, Prediction Time: 0.404s
label DQNAgent, Reward 90: 188.000, Len(game): 188, Training Time: 3.131s, Prediction Time: 0.412s
label DQNAgent, Reward 91: 173.000, Len(game): 173, Training Time: 3.131s, Prediction Time: 0.419s
label DQNAgent, Reward 92: 167.000, Len(game): 167, Training Time: 3.131s, Prediction Time: 0.426s
label DQNAgent, Reward 93: 242.000, Len(game): 242, Training Time: 3.131s, Prediction Time: 0.435s
label DQNAgent, Reward 94: 310.000, Len(game): 310, Training Time: 3.131s, Prediction Time: 0.448s
label DQNAgent, Reward 95: 320.000, Len(game): 320, Training Time: 3.131s, Prediction Time: 0.461s
label DQNAgent, Reward 96: 145.000, Len(game): 145, Training Time: 3.131s, Prediction Time: 0.467s
label DQNAgent, Reward 97: 156.000, Len(game): 156, Training Time: 3.131s, Prediction Time: 0.472s
label DQNAgent, Reward 98: 178.000, Len(game): 178, Training Time: 3.131s, Prediction Time: 0.480s
label DQNAgent, Reward 99: 217.000, Len(game): 217, Training Time: 3.131s, Prediction Time: 0.488s
Computed global error Bellman mean:  6.83823226615183e-08  iter:  3
label KQLearningHJBCP, Reward 0: 35.000, Len(game): 35, Training Time: 0.012s, Prediction Time: 0.000s
Computed global error Bellman mean:  6.153183776724802e-08  iter:  4
label KQLearningHJBCP, Reward 1: 40.000, Len(game): 40, Training Time: 0.036s, Prediction Time: 0.007s
Computed global error Bellman mean:  6.570681726583113e-08  iter:  6
label KQLearningHJBCP, Reward 2: 47.000, Len(game): 47, Training Time: 0.099s, Prediction Time: 0.015s
no training
label KQLearningHJBCP, Reward 3: 1000.000, Len(game): 1000, Training Time: 0.099s, Prediction Time: 0.211s
Computed global error Bellman mean:  1.207237322271833e-07  iter:  9
label KQLearningHJBCP, Reward 4: 529.000, Len(game): 529, Training Time: 2.215s, Prediction Time: 0.318s
no training
label KQLearningHJBCP, Reward 5: 1000.000, Len(game): 1000, Training Time: 2.215s, Prediction Time: 0.748s
Computed global error Bellman mean:  1.9297541858819368e-07  iter:  8
label KQLearningHJBCP, Reward 6: 779.000, Len(game): 779, Training Time: 10.511s, Prediction Time: 1.100s
label KQLearningHJBCP, Reward 7: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 1.790s
label KQLearningHJBCP, Reward 8: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 2.465s
label KQLearningHJBCP, Reward 9: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 3.151s
label KQLearningHJBCP, Reward 10: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 3.831s
label KQLearningHJBCP, Reward 11: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 4.517s
label KQLearningHJBCP, Reward 12: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 5.202s
label KQLearningHJBCP, Reward 13: 163.000, Len(game): 163, Training Time: 10.511s, Prediction Time: 5.314s
label KQLearningHJBCP, Reward 14: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 5.998s
label KQLearningHJBCP, Reward 15: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 6.679s
label KQLearningHJBCP, Reward 16: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 7.358s
label KQLearningHJBCP, Reward 17: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 8.046s
label KQLearningHJBCP, Reward 18: 173.000, Len(game): 173, Training Time: 10.511s, Prediction Time: 8.163s
label KQLearningHJBCP, Reward 19: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 8.845s
label KQLearningHJBCP, Reward 20: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 9.524s
label KQLearningHJBCP, Reward 21: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 10.206s
label KQLearningHJBCP, Reward 22: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 10.891s
label KQLearningHJBCP, Reward 23: 382.000, Len(game): 382, Training Time: 10.511s, Prediction Time: 11.151s
label KQLearningHJBCP, Reward 24: 459.000, Len(game): 459, Training Time: 10.511s, Prediction Time: 11.466s
label KQLearningHJBCP, Reward 25: 264.000, Len(game): 264, Training Time: 10.511s, Prediction Time: 11.646s
label KQLearningHJBCP, Reward 26: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 12.332s
label KQLearningHJBCP, Reward 27: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 13.027s
label KQLearningHJBCP, Reward 28: 515.000, Len(game): 515, Training Time: 10.511s, Prediction Time: 13.374s
label KQLearningHJBCP, Reward 29: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 14.055s
label KQLearningHJBCP, Reward 30: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 14.744s
label KQLearningHJBCP, Reward 31: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 15.427s
label KQLearningHJBCP, Reward 32: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 16.106s
label KQLearningHJBCP, Reward 33: 240.000, Len(game): 240, Training Time: 10.511s, Prediction Time: 16.271s
label KQLearningHJBCP, Reward 34: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 16.953s
label KQLearningHJBCP, Reward 35: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 17.631s
label KQLearningHJBCP, Reward 36: 273.000, Len(game): 273, Training Time: 10.511s, Prediction Time: 17.814s
label KQLearningHJBCP, Reward 37: 688.000, Len(game): 688, Training Time: 10.511s, Prediction Time: 18.282s
label KQLearningHJBCP, Reward 38: 861.000, Len(game): 861, Training Time: 10.511s, Prediction Time: 18.862s
label KQLearningHJBCP, Reward 39: 242.000, Len(game): 242, Training Time: 10.511s, Prediction Time: 19.025s
label KQLearningHJBCP, Reward 40: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 19.712s
label KQLearningHJBCP, Reward 41: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 20.401s
label KQLearningHJBCP, Reward 42: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 21.084s
label KQLearningHJBCP, Reward 43: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 21.775s
label KQLearningHJBCP, Reward 44: 985.000, Len(game): 985, Training Time: 10.511s, Prediction Time: 22.457s
label KQLearningHJBCP, Reward 45: 530.000, Len(game): 530, Training Time: 10.511s, Prediction Time: 22.820s
label KQLearningHJBCP, Reward 46: 228.000, Len(game): 228, Training Time: 10.511s, Prediction Time: 22.977s
label KQLearningHJBCP, Reward 47: 405.000, Len(game): 405, Training Time: 10.511s, Prediction Time: 23.254s
label KQLearningHJBCP, Reward 48: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 23.943s
label KQLearningHJBCP, Reward 49: 366.000, Len(game): 366, Training Time: 10.511s, Prediction Time: 24.198s
label KQLearningHJBCP, Reward 50: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 24.887s
label KQLearningHJBCP, Reward 51: 208.000, Len(game): 208, Training Time: 10.511s, Prediction Time: 25.035s
label KQLearningHJBCP, Reward 52: 497.000, Len(game): 497, Training Time: 10.511s, Prediction Time: 25.380s
label KQLearningHJBCP, Reward 53: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 26.065s
label KQLearningHJBCP, Reward 54: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 26.751s
label KQLearningHJBCP, Reward 55: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 27.442s
label KQLearningHJBCP, Reward 56: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 28.128s
label KQLearningHJBCP, Reward 57: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 28.812s
label KQLearningHJBCP, Reward 58: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 29.494s
label KQLearningHJBCP, Reward 59: 288.000, Len(game): 288, Training Time: 10.511s, Prediction Time: 29.694s
label KQLearningHJBCP, Reward 60: 244.000, Len(game): 244, Training Time: 10.511s, Prediction Time: 29.859s
label KQLearningHJBCP, Reward 61: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 30.550s
label KQLearningHJBCP, Reward 62: 224.000, Len(game): 224, Training Time: 10.511s, Prediction Time: 30.705s
label KQLearningHJBCP, Reward 63: 259.000, Len(game): 259, Training Time: 10.511s, Prediction Time: 30.885s
label KQLearningHJBCP, Reward 64: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 31.574s
label KQLearningHJBCP, Reward 65: 285.000, Len(game): 285, Training Time: 10.511s, Prediction Time: 31.774s
label KQLearningHJBCP, Reward 66: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 32.483s
label KQLearningHJBCP, Reward 67: 268.000, Len(game): 268, Training Time: 10.511s, Prediction Time: 32.672s
label KQLearningHJBCP, Reward 68: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 33.366s
label KQLearningHJBCP, Reward 69: 237.000, Len(game): 237, Training Time: 10.511s, Prediction Time: 33.533s
label KQLearningHJBCP, Reward 70: 252.000, Len(game): 252, Training Time: 10.511s, Prediction Time: 33.706s
label KQLearningHJBCP, Reward 71: 751.000, Len(game): 751, Training Time: 10.511s, Prediction Time: 34.228s
label KQLearningHJBCP, Reward 72: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 34.919s
label KQLearningHJBCP, Reward 73: 170.000, Len(game): 170, Training Time: 10.511s, Prediction Time: 35.037s
label KQLearningHJBCP, Reward 74: 336.000, Len(game): 336, Training Time: 10.511s, Prediction Time: 35.267s
label KQLearningHJBCP, Reward 75: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 35.953s
label KQLearningHJBCP, Reward 76: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 36.639s
label KQLearningHJBCP, Reward 77: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 37.328s
label KQLearningHJBCP, Reward 78: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 38.012s
label KQLearningHJBCP, Reward 79: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 38.710s
label KQLearningHJBCP, Reward 80: 204.000, Len(game): 204, Training Time: 10.511s, Prediction Time: 38.853s
label KQLearningHJBCP, Reward 81: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 39.536s
label KQLearningHJBCP, Reward 82: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 40.223s
label KQLearningHJBCP, Reward 83: 757.000, Len(game): 757, Training Time: 10.511s, Prediction Time: 40.738s
label KQLearningHJBCP, Reward 84: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 41.428s
label KQLearningHJBCP, Reward 85: 204.000, Len(game): 204, Training Time: 10.511s, Prediction Time: 41.567s
label KQLearningHJBCP, Reward 86: 281.000, Len(game): 281, Training Time: 10.511s, Prediction Time: 41.761s
label KQLearningHJBCP, Reward 87: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 42.444s
label KQLearningHJBCP, Reward 88: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 43.123s
label KQLearningHJBCP, Reward 89: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 43.802s
label KQLearningHJBCP, Reward 90: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 44.488s
label KQLearningHJBCP, Reward 91: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 45.172s
label KQLearningHJBCP, Reward 92: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 45.858s
label KQLearningHJBCP, Reward 93: 177.000, Len(game): 177, Training Time: 10.511s, Prediction Time: 45.978s
label KQLearningHJBCP, Reward 94: 563.000, Len(game): 563, Training Time: 10.511s, Prediction Time: 46.358s
label KQLearningHJBCP, Reward 95: 382.000, Len(game): 382, Training Time: 10.511s, Prediction Time: 46.617s
label KQLearningHJBCP, Reward 96: 704.000, Len(game): 704, Training Time: 10.511s, Prediction Time: 47.097s
label KQLearningHJBCP, Reward 97: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 47.782s
label KQLearningHJBCP, Reward 98: 539.000, Len(game): 539, Training Time: 10.511s, Prediction Time: 48.150s
label KQLearningHJBCP, Reward 99: 1000.000, Len(game): 1000, Training Time: 10.511s, Prediction Time: 48.828s
Computed global error Bellman mean:  3.9266451670430215e-08  iter:  2
label KQLearning, Reward 0: 18.000, Len(game): 18, Training Time: 0.005s, Prediction Time: 0.000s
Computed global error Bellman mean:  0.3964756234060676  iter:  5
label KQLearning, Reward 1: 51.000, Len(game): 51, Training Time: 0.022s, Prediction Time: 0.008s
Computed global error Bellman mean:  0.4553239146341465  iter:  5
label KQLearning, Reward 2: 35.000, Len(game): 35, Training Time: 0.052s, Prediction Time: 0.014s
Computed global error Bellman mean:  3.059006677079503e-07  iter:  4
label KQLearning, Reward 3: 53.000, Len(game): 53, Training Time: 0.089s, Prediction Time: 0.024s
Computed global error Bellman mean:  0.02300332025657364  iter:  5
label KQLearning, Reward 4: 63.000, Len(game): 63, Training Time: 0.171s, Prediction Time: 0.036s
Computed global error Bellman mean:  0.12679985761031448  iter:  5
label KQLearning, Reward 5: 45.000, Len(game): 45, Training Time: 0.319s, Prediction Time: 0.048s
Computed global error Bellman mean:  0.052994993189221073  iter:  5
label KQLearning, Reward 6: 31.000, Len(game): 31, Training Time: 0.488s, Prediction Time: 0.058s
Computed global error Bellman mean:  0.02126114540487673  iter:  5
label KQLearning, Reward 7: 50.000, Len(game): 50, Training Time: 0.717s, Prediction Time: 0.072s
Computed global error Bellman mean:  0.0009045258385653113  iter:  5
label KQLearning, Reward 8: 59.000, Len(game): 59, Training Time: 1.065s, Prediction Time: 0.092s
Computed global error Bellman mean:  0.028279770743522326  iter:  5
label KQLearning, Reward 9: 84.000, Len(game): 84, Training Time: 1.535s, Prediction Time: 0.123s
Computed global error Bellman mean:  0.0038382381461582412  iter:  5
label KQLearning, Reward 10: 51.000, Len(game): 51, Training Time: 2.153s, Prediction Time: 0.143s
Computed global error Bellman mean:  0.015333520935118947  iter:  5
label KQLearning, Reward 11: 88.000, Len(game): 88, Training Time: 2.922s, Prediction Time: 0.177s
Computed global error Bellman mean:  0.0067286687677481725  iter:  5
label KQLearning, Reward 12: 74.000, Len(game): 74, Training Time: 3.841s, Prediction Time: 0.205s
label KQLearning, Reward 13: 91.000, Len(game): 91, Training Time: 3.841s, Prediction Time: 0.244s
label KQLearning, Reward 14: 84.000, Len(game): 84, Training Time: 3.841s, Prediction Time: 0.279s
label KQLearning, Reward 15: 153.000, Len(game): 153, Training Time: 3.841s, Prediction Time: 0.351s
label KQLearning, Reward 16: 67.000, Len(game): 67, Training Time: 3.841s, Prediction Time: 0.382s
label KQLearning, Reward 17: 99.000, Len(game): 99, Training Time: 3.841s, Prediction Time: 0.427s
label KQLearning, Reward 18: 105.000, Len(game): 105, Training Time: 3.841s, Prediction Time: 0.477s
label KQLearning, Reward 19: 67.000, Len(game): 67, Training Time: 3.841s, Prediction Time: 0.507s
label KQLearning, Reward 20: 81.000, Len(game): 81, Training Time: 3.841s, Prediction Time: 0.544s
label KQLearning, Reward 21: 70.000, Len(game): 70, Training Time: 3.841s, Prediction Time: 0.576s
label KQLearning, Reward 22: 87.000, Len(game): 87, Training Time: 3.841s, Prediction Time: 0.615s
label KQLearning, Reward 23: 136.000, Len(game): 136, Training Time: 3.841s, Prediction Time: 0.678s
label KQLearning, Reward 24: 106.000, Len(game): 106, Training Time: 3.841s, Prediction Time: 0.727s
label KQLearning, Reward 25: 111.000, Len(game): 111, Training Time: 3.841s, Prediction Time: 0.777s
label KQLearning, Reward 26: 73.000, Len(game): 73, Training Time: 3.841s, Prediction Time: 0.811s
label KQLearning, Reward 27: 352.000, Len(game): 352, Training Time: 3.841s, Prediction Time: 0.974s
label KQLearning, Reward 28: 83.000, Len(game): 83, Training Time: 3.841s, Prediction Time: 1.014s
label KQLearning, Reward 29: 75.000, Len(game): 75, Training Time: 3.841s, Prediction Time: 1.051s
label KQLearning, Reward 30: 65.000, Len(game): 65, Training Time: 3.841s, Prediction Time: 1.082s
label KQLearning, Reward 31: 1000.000, Len(game): 1000, Training Time: 3.841s, Prediction Time: 1.543s
label KQLearning, Reward 32: 84.000, Len(game): 84, Training Time: 3.841s, Prediction Time: 1.581s
label KQLearning, Reward 33: 103.000, Len(game): 103, Training Time: 3.841s, Prediction Time: 1.628s
label KQLearning, Reward 34: 72.000, Len(game): 72, Training Time: 3.841s, Prediction Time: 1.660s
label KQLearning, Reward 35: 79.000, Len(game): 79, Training Time: 3.841s, Prediction Time: 1.695s
label KQLearning, Reward 36: 88.000, Len(game): 88, Training Time: 3.841s, Prediction Time: 1.734s
label KQLearning, Reward 37: 84.000, Len(game): 84, Training Time: 3.841s, Prediction Time: 1.772s
label KQLearning, Reward 38: 60.000, Len(game): 60, Training Time: 3.841s, Prediction Time: 1.798s
label KQLearning, Reward 39: 64.000, Len(game): 64, Training Time: 3.841s, Prediction Time: 1.827s
label KQLearning, Reward 40: 74.000, Len(game): 74, Training Time: 3.841s, Prediction Time: 1.861s
label KQLearning, Reward 41: 74.000, Len(game): 74, Training Time: 3.841s, Prediction Time: 1.894s
label KQLearning, Reward 42: 72.000, Len(game): 72, Training Time: 3.841s, Prediction Time: 1.927s
label KQLearning, Reward 43: 83.000, Len(game): 83, Training Time: 3.841s, Prediction Time: 1.965s
label KQLearning, Reward 44: 91.000, Len(game): 91, Training Time: 3.841s, Prediction Time: 2.005s
label KQLearning, Reward 45: 75.000, Len(game): 75, Training Time: 3.841s, Prediction Time: 2.039s
label KQLearning, Reward 46: 83.000, Len(game): 83, Training Time: 3.841s, Prediction Time: 2.076s
label KQLearning, Reward 47: 101.000, Len(game): 101, Training Time: 3.841s, Prediction Time: 2.122s
label KQLearning, Reward 48: 75.000, Len(game): 75, Training Time: 3.841s, Prediction Time: 2.156s
label KQLearning, Reward 49: 85.000, Len(game): 85, Training Time: 3.841s, Prediction Time: 2.196s
label KQLearning, Reward 50: 85.000, Len(game): 85, Training Time: 3.841s, Prediction Time: 2.235s
label KQLearning, Reward 51: 67.000, Len(game): 67, Training Time: 3.841s, Prediction Time: 2.265s
label KQLearning, Reward 52: 187.000, Len(game): 187, Training Time: 3.841s, Prediction Time: 2.354s
label KQLearning, Reward 53: 118.000, Len(game): 118, Training Time: 3.841s, Prediction Time: 2.408s
label KQLearning, Reward 54: 86.000, Len(game): 86, Training Time: 3.841s, Prediction Time: 2.451s
label KQLearning, Reward 55: 64.000, Len(game): 64, Training Time: 3.841s, Prediction Time: 2.480s
label KQLearning, Reward 56: 93.000, Len(game): 93, Training Time: 3.841s, Prediction Time: 2.523s
label KQLearning, Reward 57: 220.000, Len(game): 220, Training Time: 3.841s, Prediction Time: 2.625s
label KQLearning, Reward 58: 78.000, Len(game): 78, Training Time: 3.841s, Prediction Time: 2.660s
label KQLearning, Reward 59: 81.000, Len(game): 81, Training Time: 3.841s, Prediction Time: 2.698s
label KQLearning, Reward 60: 113.000, Len(game): 113, Training Time: 3.841s, Prediction Time: 2.750s
label KQLearning, Reward 61: 96.000, Len(game): 96, Training Time: 3.841s, Prediction Time: 2.796s
label KQLearning, Reward 62: 85.000, Len(game): 85, Training Time: 3.841s, Prediction Time: 2.837s
label KQLearning, Reward 63: 73.000, Len(game): 73, Training Time: 3.841s, Prediction Time: 2.871s
label KQLearning, Reward 64: 73.000, Len(game): 73, Training Time: 3.841s, Prediction Time: 2.904s
label KQLearning, Reward 65: 62.000, Len(game): 62, Training Time: 3.841s, Prediction Time: 2.932s
label KQLearning, Reward 66: 68.000, Len(game): 68, Training Time: 3.841s, Prediction Time: 2.963s
label KQLearning, Reward 67: 91.000, Len(game): 91, Training Time: 3.841s, Prediction Time: 3.006s
label KQLearning, Reward 68: 105.000, Len(game): 105, Training Time: 3.841s, Prediction Time: 3.055s
label KQLearning, Reward 69: 77.000, Len(game): 77, Training Time: 3.841s, Prediction Time: 3.090s
label KQLearning, Reward 70: 67.000, Len(game): 67, Training Time: 3.841s, Prediction Time: 3.121s
label KQLearning, Reward 71: 65.000, Len(game): 65, Training Time: 3.841s, Prediction Time: 3.151s
label KQLearning, Reward 72: 151.000, Len(game): 151, Training Time: 3.841s, Prediction Time: 3.220s
label KQLearning, Reward 73: 106.000, Len(game): 106, Training Time: 3.841s, Prediction Time: 3.269s
label KQLearning, Reward 74: 96.000, Len(game): 96, Training Time: 3.841s, Prediction Time: 3.312s
label KQLearning, Reward 75: 89.000, Len(game): 89, Training Time: 3.841s, Prediction Time: 3.353s
label KQLearning, Reward 76: 110.000, Len(game): 110, Training Time: 3.841s, Prediction Time: 3.405s
label KQLearning, Reward 77: 76.000, Len(game): 76, Training Time: 3.841s, Prediction Time: 3.440s
label KQLearning, Reward 78: 59.000, Len(game): 59, Training Time: 3.841s, Prediction Time: 3.466s
label KQLearning, Reward 79: 117.000, Len(game): 117, Training Time: 3.841s, Prediction Time: 3.519s
label KQLearning, Reward 80: 81.000, Len(game): 81, Training Time: 3.841s, Prediction Time: 3.559s
label KQLearning, Reward 81: 92.000, Len(game): 92, Training Time: 3.841s, Prediction Time: 3.600s
label KQLearning, Reward 82: 108.000, Len(game): 108, Training Time: 3.841s, Prediction Time: 3.652s
label KQLearning, Reward 83: 79.000, Len(game): 79, Training Time: 3.841s, Prediction Time: 3.689s
label KQLearning, Reward 84: 93.000, Len(game): 93, Training Time: 3.841s, Prediction Time: 3.733s
label KQLearning, Reward 85: 63.000, Len(game): 63, Training Time: 3.841s, Prediction Time: 3.761s
label KQLearning, Reward 86: 80.000, Len(game): 80, Training Time: 3.841s, Prediction Time: 3.798s
label KQLearning, Reward 87: 97.000, Len(game): 97, Training Time: 3.841s, Prediction Time: 3.842s
label KQLearning, Reward 88: 114.000, Len(game): 114, Training Time: 3.841s, Prediction Time: 3.895s
label KQLearning, Reward 89: 87.000, Len(game): 87, Training Time: 3.841s, Prediction Time: 3.935s
label KQLearning, Reward 90: 70.000, Len(game): 70, Training Time: 3.841s, Prediction Time: 3.967s
label KQLearning, Reward 91: 76.000, Len(game): 76, Training Time: 3.841s, Prediction Time: 4.003s
label KQLearning, Reward 92: 86.000, Len(game): 86, Training Time: 3.841s, Prediction Time: 4.043s
label KQLearning, Reward 93: 75.000, Len(game): 75, Training Time: 3.841s, Prediction Time: 4.077s
label KQLearning, Reward 94: 107.000, Len(game): 107, Training Time: 3.841s, Prediction Time: 4.128s
label KQLearning, Reward 95: 79.000, Len(game): 79, Training Time: 3.841s, Prediction Time: 4.166s
label KQLearning, Reward 96: 104.000, Len(game): 104, Training Time: 3.841s, Prediction Time: 4.214s
label KQLearning, Reward 97: 90.000, Len(game): 90, Training Time: 3.841s, Prediction Time: 4.256s
label KQLearning, Reward 98: 81.000, Len(game): 81, Training Time: 3.841s, Prediction Time: 4.294s
label KQLearning, Reward 99: 95.000, Len(game): 95, Training Time: 3.841s, Prediction Time: 4.338s
1
label PPOAgent, Reward 0: 21.000, Len(game): 21, Training Time: 0.004s, Prediction Time: 0.004s
label PPOAgent, Reward 1: 53.000, Len(game): 53, Training Time: 0.014s, Prediction Time: 0.014s
label PPOAgent, Reward 2: 11.000, Len(game): 11, Training Time: 0.016s, Prediction Time: 0.016s
label PPOAgent, Reward 3: 36.000, Len(game): 36, Training Time: 0.023s, Prediction Time: 0.023s
label PPOAgent, Reward 4: 14.000, Len(game): 14, Training Time: 0.025s, Prediction Time: 0.025s
label PPOAgent, Reward 5: 20.000, Len(game): 20, Training Time: 0.029s, Prediction Time: 0.029s
label PPOAgent, Reward 6: 22.000, Len(game): 22, Training Time: 0.033s, Prediction Time: 0.033s
label PPOAgent, Reward 7: 57.000, Len(game): 57, Training Time: 0.044s, Prediction Time: 0.044s
label PPOAgent, Reward 8: 17.000, Len(game): 17, Training Time: 0.047s, Prediction Time: 0.047s
label PPOAgent, Reward 9: 19.000, Len(game): 19, Training Time: 0.050s, Prediction Time: 0.050s
label PPOAgent, Reward 10: 9.000, Len(game): 9, Training Time: 0.053s, Prediction Time: 0.053s
label PPOAgent, Reward 11: 16.000, Len(game): 16, Training Time: 0.055s, Prediction Time: 0.055s
label PPOAgent, Reward 12: 19.000, Len(game): 19, Training Time: 0.058s, Prediction Time: 0.058s
label PPOAgent, Reward 13: 12.000, Len(game): 12, Training Time: 0.061s, Prediction Time: 0.061s
label PPOAgent, Reward 14: 39.000, Len(game): 39, Training Time: 0.069s, Prediction Time: 0.069s
label PPOAgent, Reward 15: 16.000, Len(game): 16, Training Time: 0.071s, Prediction Time: 0.071s
label PPOAgent, Reward 16: 19.000, Len(game): 19, Training Time: 0.075s, Prediction Time: 0.075s
label PPOAgent, Reward 17: 17.000, Len(game): 17, Training Time: 0.078s, Prediction Time: 0.078s
label PPOAgent, Reward 18: 31.000, Len(game): 31, Training Time: 0.084s, Prediction Time: 0.084s
label PPOAgent, Reward 19: 17.000, Len(game): 17, Training Time: 0.087s, Prediction Time: 0.087s
label PPOAgent, Reward 20: 13.000, Len(game): 13, Training Time: 0.090s, Prediction Time: 0.090s
label PPOAgent, Reward 21: 18.000, Len(game): 18, Training Time: 0.094s, Prediction Time: 0.094s
label PPOAgent, Reward 22: 20.000, Len(game): 20, Training Time: 0.097s, Prediction Time: 0.097s
label PPOAgent, Reward 23: 31.000, Len(game): 31, Training Time: 0.103s, Prediction Time: 0.103s
label PPOAgent, Reward 24: 16.000, Len(game): 16, Training Time: 0.106s, Prediction Time: 0.106s
label PPOAgent, Reward 25: 41.000, Len(game): 41, Training Time: 0.114s, Prediction Time: 0.114s
label PPOAgent, Reward 26: 12.000, Len(game): 12, Training Time: 0.116s, Prediction Time: 0.116s
label PPOAgent, Reward 27: 29.000, Len(game): 29, Training Time: 0.121s, Prediction Time: 0.121s
label PPOAgent, Reward 28: 31.000, Len(game): 31, Training Time: 0.128s, Prediction Time: 0.128s
label PPOAgent, Reward 29: 27.000, Len(game): 27, Training Time: 0.133s, Prediction Time: 0.133s
label PPOAgent, Reward 30: 18.000, Len(game): 18, Training Time: 0.136s, Prediction Time: 0.136s
label PPOAgent, Reward 31: 16.000, Len(game): 16, Training Time: 0.137s, Prediction Time: 0.137s
label PPOAgent, Reward 32: 33.000, Len(game): 33, Training Time: 0.142s, Prediction Time: 0.142s
label PPOAgent, Reward 33: 41.000, Len(game): 41, Training Time: 0.150s, Prediction Time: 0.150s
label PPOAgent, Reward 34: 22.000, Len(game): 22, Training Time: 0.154s, Prediction Time: 0.154s
label PPOAgent, Reward 35: 13.000, Len(game): 13, Training Time: 0.157s, Prediction Time: 0.157s
label PPOAgent, Reward 36: 19.000, Len(game): 19, Training Time: 0.160s, Prediction Time: 0.160s
label PPOAgent, Reward 37: 15.000, Len(game): 15, Training Time: 0.162s, Prediction Time: 0.162s
label PPOAgent, Reward 38: 30.000, Len(game): 30, Training Time: 0.168s, Prediction Time: 0.168s
label PPOAgent, Reward 39: 49.000, Len(game): 49, Training Time: 0.176s, Prediction Time: 0.176s
label PPOAgent, Reward 40: 21.000, Len(game): 21, Training Time: 0.181s, Prediction Time: 0.181s
label PPOAgent, Reward 41: 21.000, Len(game): 21, Training Time: 0.186s, Prediction Time: 0.186s
label PPOAgent, Reward 42: 49.000, Len(game): 49, Training Time: 0.194s, Prediction Time: 0.194s
label PPOAgent, Reward 43: 23.000, Len(game): 23, Training Time: 0.199s, Prediction Time: 0.199s
label PPOAgent, Reward 44: 48.000, Len(game): 48, Training Time: 0.208s, Prediction Time: 0.208s
label PPOAgent, Reward 45: 54.000, Len(game): 54, Training Time: 0.217s, Prediction Time: 0.217s
label PPOAgent, Reward 46: 55.000, Len(game): 55, Training Time: 0.268s, Prediction Time: 0.268s
label PPOAgent, Reward 47: 22.000, Len(game): 22, Training Time: 0.273s, Prediction Time: 0.273s
label PPOAgent, Reward 48: 34.000, Len(game): 34, Training Time: 0.280s, Prediction Time: 0.280s
label PPOAgent, Reward 49: 21.000, Len(game): 21, Training Time: 0.285s, Prediction Time: 0.285s
label PPOAgent, Reward 50: 27.000, Len(game): 27, Training Time: 0.290s, Prediction Time: 0.290s
label PPOAgent, Reward 51: 38.000, Len(game): 38, Training Time: 0.298s, Prediction Time: 0.298s
label PPOAgent, Reward 52: 41.000, Len(game): 41, Training Time: 0.306s, Prediction Time: 0.306s
label PPOAgent, Reward 53: 11.000, Len(game): 11, Training Time: 0.308s, Prediction Time: 0.308s
label PPOAgent, Reward 54: 17.000, Len(game): 17, Training Time: 0.311s, Prediction Time: 0.311s
label PPOAgent, Reward 55: 32.000, Len(game): 32, Training Time: 0.317s, Prediction Time: 0.317s
label PPOAgent, Reward 56: 45.000, Len(game): 45, Training Time: 0.328s, Prediction Time: 0.328s
label PPOAgent, Reward 57: 27.000, Len(game): 27, Training Time: 0.332s, Prediction Time: 0.332s
label PPOAgent, Reward 58: 21.000, Len(game): 21, Training Time: 0.338s, Prediction Time: 0.338s
label PPOAgent, Reward 59: 16.000, Len(game): 16, Training Time: 0.341s, Prediction Time: 0.341s
label PPOAgent, Reward 60: 14.000, Len(game): 14, Training Time: 0.344s, Prediction Time: 0.344s
label PPOAgent, Reward 61: 25.000, Len(game): 25, Training Time: 0.348s, Prediction Time: 0.348s
label PPOAgent, Reward 62: 15.000, Len(game): 15, Training Time: 0.352s, Prediction Time: 0.352s
label PPOAgent, Reward 63: 16.000, Len(game): 16, Training Time: 0.355s, Prediction Time: 0.355s
label PPOAgent, Reward 64: 24.000, Len(game): 24, Training Time: 0.360s, Prediction Time: 0.360s
label PPOAgent, Reward 65: 25.000, Len(game): 25, Training Time: 0.365s, Prediction Time: 0.365s
label PPOAgent, Reward 66: 12.000, Len(game): 12, Training Time: 0.367s, Prediction Time: 0.367s
label PPOAgent, Reward 67: 20.000, Len(game): 20, Training Time: 0.370s, Prediction Time: 0.370s
label PPOAgent, Reward 68: 28.000, Len(game): 28, Training Time: 0.375s, Prediction Time: 0.375s
label PPOAgent, Reward 69: 41.000, Len(game): 41, Training Time: 0.383s, Prediction Time: 0.383s
label PPOAgent, Reward 70: 23.000, Len(game): 23, Training Time: 0.388s, Prediction Time: 0.388s
label PPOAgent, Reward 71: 18.000, Len(game): 18, Training Time: 0.391s, Prediction Time: 0.391s
label PPOAgent, Reward 72: 18.000, Len(game): 18, Training Time: 0.395s, Prediction Time: 0.395s
label PPOAgent, Reward 73: 28.000, Len(game): 28, Training Time: 0.400s, Prediction Time: 0.400s
label PPOAgent, Reward 74: 21.000, Len(game): 21, Training Time: 0.404s, Prediction Time: 0.404s
label PPOAgent, Reward 75: 23.000, Len(game): 23, Training Time: 0.408s, Prediction Time: 0.408s
label PPOAgent, Reward 76: 25.000, Len(game): 25, Training Time: 0.413s, Prediction Time: 0.413s
label PPOAgent, Reward 77: 23.000, Len(game): 23, Training Time: 0.418s, Prediction Time: 0.418s
label PPOAgent, Reward 78: 14.000, Len(game): 14, Training Time: 0.423s, Prediction Time: 0.423s
label PPOAgent, Reward 79: 16.000, Len(game): 16, Training Time: 0.426s, Prediction Time: 0.426s
label PPOAgent, Reward 80: 22.000, Len(game): 22, Training Time: 0.433s, Prediction Time: 0.433s
label PPOAgent, Reward 81: 20.000, Len(game): 20, Training Time: 0.439s, Prediction Time: 0.439s
label PPOAgent, Reward 82: 43.000, Len(game): 43, Training Time: 0.450s, Prediction Time: 0.450s
label PPOAgent, Reward 83: 48.000, Len(game): 48, Training Time: 0.460s, Prediction Time: 0.460s
label PPOAgent, Reward 84: 36.000, Len(game): 36, Training Time: 0.467s, Prediction Time: 0.467s
label PPOAgent, Reward 85: 42.000, Len(game): 42, Training Time: 0.475s, Prediction Time: 0.475s
label PPOAgent, Reward 86: 18.000, Len(game): 18, Training Time: 0.478s, Prediction Time: 0.478s
label PPOAgent, Reward 87: 21.000, Len(game): 21, Training Time: 0.481s, Prediction Time: 0.481s
label PPOAgent, Reward 88: 35.000, Len(game): 35, Training Time: 0.490s, Prediction Time: 0.490s
label PPOAgent, Reward 89: 51.000, Len(game): 51, Training Time: 0.499s, Prediction Time: 0.499s
label PPOAgent, Reward 90: 10.000, Len(game): 10, Training Time: 0.501s, Prediction Time: 0.501s
label PPOAgent, Reward 91: 42.000, Len(game): 42, Training Time: 0.509s, Prediction Time: 0.509s
label PPOAgent, Reward 92: 67.000, Len(game): 67, Training Time: 0.560s, Prediction Time: 0.560s
label PPOAgent, Reward 93: 14.000, Len(game): 14, Training Time: 0.562s, Prediction Time: 0.562s
label PPOAgent, Reward 94: 21.000, Len(game): 21, Training Time: 0.567s, Prediction Time: 0.567s
label PPOAgent, Reward 95: 73.000, Len(game): 73, Training Time: 0.580s, Prediction Time: 0.580s
label PPOAgent, Reward 96: 53.000, Len(game): 53, Training Time: 0.590s, Prediction Time: 0.590s
label PPOAgent, Reward 97: 34.000, Len(game): 34, Training Time: 0.597s, Prediction Time: 0.597s
label PPOAgent, Reward 98: 33.000, Len(game): 33, Training Time: 0.602s, Prediction Time: 0.602s
label PPOAgent, Reward 99: 20.000, Len(game): 20, Training Time: 0.606s, Prediction Time: 0.606s
label PolicyGradient, Reward 0: 15.000, Len(game): 15, Training Time: 0.004s, Prediction Time: 0.001s
label PolicyGradient, Reward 1: 16.000, Len(game): 16, Training Time: 0.007s, Prediction Time: 0.004s
label PolicyGradient, Reward 2: 15.000, Len(game): 15, Training Time: 0.012s, Prediction Time: 0.008s
label PolicyGradient, Reward 3: 45.000, Len(game): 45, Training Time: 0.021s, Prediction Time: 0.019s
label PolicyGradient, Reward 4: 14.000, Len(game): 14, Training Time: 0.030s, Prediction Time: 0.022s
label PolicyGradient, Reward 5: 41.000, Len(game): 41, Training Time: 0.040s, Prediction Time: 0.031s
label PolicyGradient, Reward 6: 51.000, Len(game): 51, Training Time: 0.054s, Prediction Time: 0.043s
label PolicyGradient, Reward 7: 42.000, Len(game): 42, Training Time: 0.069s, Prediction Time: 0.055s
label PolicyGradient, Reward 8: 68.000, Len(game): 68, Training Time: 0.094s, Prediction Time: 0.074s
label PolicyGradient, Reward 9: 30.000, Len(game): 30, Training Time: 0.118s, Prediction Time: 0.087s
label PolicyGradient, Reward 10: 161.000, Len(game): 161, Training Time: 0.170s, Prediction Time: 0.136s
label PolicyGradient, Reward 11: 77.000, Len(game): 77, Training Time: 0.238s, Prediction Time: 0.177s
label PolicyGradient, Reward 12: 105.000, Len(game): 105, Training Time: 0.322s, Prediction Time: 0.237s
label PolicyGradient, Reward 13: 65.000, Len(game): 65, Training Time: 0.429s, Prediction Time: 0.289s
label PolicyGradient, Reward 14: 120.000, Len(game): 120, Training Time: 0.564s, Prediction Time: 0.372s
label PolicyGradient, Reward 15: 48.000, Len(game): 48, Training Time: 0.706s, Prediction Time: 0.443s
label PolicyGradient, Reward 16: 186.000, Len(game): 186, Training Time: 0.925s, Prediction Time: 0.584s
label PolicyGradient, Reward 17: 67.000, Len(game): 67, Training Time: 1.151s, Prediction Time: 0.697s
label PolicyGradient, Reward 18: 234.000, Len(game): 234, Training Time: 1.481s, Prediction Time: 0.913s
label PolicyGradient, Reward 19: 694.000, Len(game): 694, Training Time: 2.184s, Prediction Time: 1.466s
label PolicyGradient, Reward 20: 234.000, Len(game): 234, Training Time: 2.962s, Prediction Time: 1.990s
label PolicyGradient, Reward 21: 939.000, Len(game): 939, Training Time: 4.512s, Prediction Time: 3.131s
label PolicyGradient, Reward 22: 119.000, Len(game): 119, Training Time: 4.512s, Prediction Time: 4.060s
label PolicyGradient, Reward 23: 66.000, Len(game): 66, Training Time: 4.512s, Prediction Time: 4.128s
label PolicyGradient, Reward 24: 78.000, Len(game): 78, Training Time: 4.512s, Prediction Time: 4.199s
label PolicyGradient, Reward 25: 117.000, Len(game): 117, Training Time: 4.512s, Prediction Time: 4.303s
label PolicyGradient, Reward 26: 143.000, Len(game): 143, Training Time: 4.512s, Prediction Time: 4.428s
label PolicyGradient, Reward 27: 76.000, Len(game): 76, Training Time: 4.512s, Prediction Time: 4.494s
label PolicyGradient, Reward 28: 160.000, Len(game): 160, Training Time: 4.512s, Prediction Time: 4.635s
label PolicyGradient, Reward 29: 236.000, Len(game): 236, Training Time: 4.512s, Prediction Time: 4.847s
label PolicyGradient, Reward 30: 142.000, Len(game): 142, Training Time: 4.512s, Prediction Time: 4.974s
label PolicyGradient, Reward 31: 135.000, Len(game): 135, Training Time: 4.512s, Prediction Time: 5.097s
label PolicyGradient, Reward 32: 101.000, Len(game): 101, Training Time: 4.512s, Prediction Time: 5.187s
label PolicyGradient, Reward 33: 219.000, Len(game): 219, Training Time: 4.512s, Prediction Time: 5.381s
label PolicyGradient, Reward 34: 129.000, Len(game): 129, Training Time: 4.512s, Prediction Time: 5.499s
label PolicyGradient, Reward 35: 107.000, Len(game): 107, Training Time: 4.512s, Prediction Time: 5.597s
label PolicyGradient, Reward 36: 155.000, Len(game): 155, Training Time: 4.512s, Prediction Time: 5.735s
label PolicyGradient, Reward 37: 76.000, Len(game): 76, Training Time: 4.512s, Prediction Time: 5.804s
label PolicyGradient, Reward 38: 150.000, Len(game): 150, Training Time: 4.512s, Prediction Time: 5.936s
label PolicyGradient, Reward 39: 113.000, Len(game): 113, Training Time: 4.512s, Prediction Time: 6.037s
label PolicyGradient, Reward 40: 148.000, Len(game): 148, Training Time: 4.512s, Prediction Time: 6.169s
label PolicyGradient, Reward 41: 117.000, Len(game): 117, Training Time: 4.512s, Prediction Time: 6.274s
label PolicyGradient, Reward 42: 118.000, Len(game): 118, Training Time: 4.512s, Prediction Time: 6.378s
label PolicyGradient, Reward 43: 202.000, Len(game): 202, Training Time: 4.512s, Prediction Time: 6.557s
label PolicyGradient, Reward 44: 62.000, Len(game): 62, Training Time: 4.512s, Prediction Time: 6.611s
label PolicyGradient, Reward 45: 119.000, Len(game): 119, Training Time: 4.512s, Prediction Time: 6.716s
label PolicyGradient, Reward 46: 179.000, Len(game): 179, Training Time: 4.512s, Prediction Time: 6.876s
label PolicyGradient, Reward 47: 148.000, Len(game): 148, Training Time: 4.512s, Prediction Time: 7.006s
label PolicyGradient, Reward 48: 165.000, Len(game): 165, Training Time: 4.512s, Prediction Time: 7.153s
label PolicyGradient, Reward 49: 167.000, Len(game): 167, Training Time: 4.512s, Prediction Time: 7.299s
label PolicyGradient, Reward 50: 123.000, Len(game): 123, Training Time: 4.512s, Prediction Time: 7.408s
label PolicyGradient, Reward 51: 83.000, Len(game): 83, Training Time: 4.512s, Prediction Time: 7.482s
label PolicyGradient, Reward 52: 93.000, Len(game): 93, Training Time: 4.512s, Prediction Time: 7.564s
label PolicyGradient, Reward 53: 119.000, Len(game): 119, Training Time: 4.512s, Prediction Time: 7.672s
label PolicyGradient, Reward 54: 89.000, Len(game): 89, Training Time: 4.512s, Prediction Time: 7.753s
label PolicyGradient, Reward 55: 101.000, Len(game): 101, Training Time: 4.512s, Prediction Time: 7.845s
label PolicyGradient, Reward 56: 109.000, Len(game): 109, Training Time: 4.512s, Prediction Time: 7.940s
label PolicyGradient, Reward 57: 162.000, Len(game): 162, Training Time: 4.512s, Prediction Time: 8.084s
label PolicyGradient, Reward 58: 91.000, Len(game): 91, Training Time: 4.512s, Prediction Time: 8.166s
label PolicyGradient, Reward 59: 95.000, Len(game): 95, Training Time: 4.512s, Prediction Time: 8.249s
label PolicyGradient, Reward 60: 131.000, Len(game): 131, Training Time: 4.512s, Prediction Time: 8.364s
label PolicyGradient, Reward 61: 110.000, Len(game): 110, Training Time: 4.512s, Prediction Time: 8.460s
label PolicyGradient, Reward 62: 102.000, Len(game): 102, Training Time: 4.512s, Prediction Time: 8.548s
label PolicyGradient, Reward 63: 122.000, Len(game): 122, Training Time: 4.512s, Prediction Time: 8.662s
label PolicyGradient, Reward 64: 157.000, Len(game): 157, Training Time: 4.512s, Prediction Time: 8.799s
label PolicyGradient, Reward 65: 85.000, Len(game): 85, Training Time: 4.512s, Prediction Time: 8.874s
label PolicyGradient, Reward 66: 111.000, Len(game): 111, Training Time: 4.512s, Prediction Time: 8.972s
label PolicyGradient, Reward 67: 264.000, Len(game): 264, Training Time: 4.512s, Prediction Time: 9.210s
label PolicyGradient, Reward 68: 94.000, Len(game): 94, Training Time: 4.512s, Prediction Time: 9.294s
label PolicyGradient, Reward 69: 94.000, Len(game): 94, Training Time: 4.512s, Prediction Time: 9.381s
label PolicyGradient, Reward 70: 64.000, Len(game): 64, Training Time: 4.512s, Prediction Time: 9.438s
label PolicyGradient, Reward 71: 144.000, Len(game): 144, Training Time: 4.512s, Prediction Time: 9.565s
label PolicyGradient, Reward 72: 102.000, Len(game): 102, Training Time: 4.512s, Prediction Time: 9.653s
label PolicyGradient, Reward 73: 184.000, Len(game): 184, Training Time: 4.512s, Prediction Time: 9.814s
label PolicyGradient, Reward 74: 141.000, Len(game): 141, Training Time: 4.512s, Prediction Time: 9.936s
label PolicyGradient, Reward 75: 125.000, Len(game): 125, Training Time: 4.512s, Prediction Time: 10.046s
label PolicyGradient, Reward 76: 108.000, Len(game): 108, Training Time: 4.512s, Prediction Time: 10.142s
label PolicyGradient, Reward 77: 147.000, Len(game): 147, Training Time: 4.512s, Prediction Time: 10.273s
label PolicyGradient, Reward 78: 114.000, Len(game): 114, Training Time: 4.512s, Prediction Time: 10.376s
label PolicyGradient, Reward 79: 95.000, Len(game): 95, Training Time: 4.512s, Prediction Time: 10.460s
label PolicyGradient, Reward 80: 100.000, Len(game): 100, Training Time: 4.512s, Prediction Time: 10.551s
label PolicyGradient, Reward 81: 144.000, Len(game): 144, Training Time: 4.512s, Prediction Time: 10.681s
label PolicyGradient, Reward 82: 124.000, Len(game): 124, Training Time: 4.512s, Prediction Time: 10.789s
label PolicyGradient, Reward 83: 128.000, Len(game): 128, Training Time: 4.512s, Prediction Time: 10.901s
label PolicyGradient, Reward 84: 131.000, Len(game): 131, Training Time: 4.512s, Prediction Time: 11.017s
label PolicyGradient, Reward 85: 83.000, Len(game): 83, Training Time: 4.512s, Prediction Time: 11.091s
label PolicyGradient, Reward 86: 83.000, Len(game): 83, Training Time: 4.512s, Prediction Time: 11.165s
label PolicyGradient, Reward 87: 58.000, Len(game): 58, Training Time: 4.512s, Prediction Time: 11.215s
label PolicyGradient, Reward 88: 130.000, Len(game): 130, Training Time: 4.512s, Prediction Time: 11.329s
label PolicyGradient, Reward 89: 96.000, Len(game): 96, Training Time: 4.512s, Prediction Time: 11.411s
label PolicyGradient, Reward 90: 98.000, Len(game): 98, Training Time: 4.512s, Prediction Time: 11.497s
label PolicyGradient, Reward 91: 167.000, Len(game): 167, Training Time: 4.512s, Prediction Time: 11.645s
label PolicyGradient, Reward 92: 106.000, Len(game): 106, Training Time: 4.512s, Prediction Time: 11.738s
label PolicyGradient, Reward 93: 144.000, Len(game): 144, Training Time: 4.512s, Prediction Time: 11.866s
label PolicyGradient, Reward 94: 85.000, Len(game): 85, Training Time: 4.512s, Prediction Time: 11.939s
label PolicyGradient, Reward 95: 130.000, Len(game): 130, Training Time: 4.512s, Prediction Time: 12.053s
label PolicyGradient, Reward 96: 98.000, Len(game): 98, Training Time: 4.512s, Prediction Time: 12.137s
label PolicyGradient, Reward 97: 150.000, Len(game): 150, Training Time: 4.512s, Prediction Time: 12.268s
label PolicyGradient, Reward 98: 132.000, Len(game): 132, Training Time: 4.512s, Prediction Time: 12.387s
label PolicyGradient, Reward 99: 177.000, Len(game): 177, Training Time: 4.512s, Prediction Time: 12.543s
label Controller-based, Reward 0: 127.000, Len(game): 127, Training Time: 0.002s, Prediction Time: 0.001s
label Controller-based, Reward 1: 75.000, Len(game): 75, Training Time: 0.003s, Prediction Time: 0.002s
label Controller-based, Reward 2: 9.000, Len(game): 9, Training Time: 0.011s, Prediction Time: 0.002s
label Controller-based, Reward 3: 199.000, Len(game): 199, Training Time: 0.022s, Prediction Time: 0.004s
label Controller-based, Reward 4: 286.000, Len(game): 286, Training Time: 0.033s, Prediction Time: 0.007s
label Controller-based, Reward 5: 364.000, Len(game): 364, Training Time: 0.046s, Prediction Time: 0.010s
no training
label Controller-based, Reward 6: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.019s
no training
label Controller-based, Reward 7: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.028s
no training
label Controller-based, Reward 8: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.036s
no training
label Controller-based, Reward 9: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.045s
no training
label Controller-based, Reward 10: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.053s
no training
label Controller-based, Reward 11: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.063s
no training
label Controller-based, Reward 12: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.071s
no training
label Controller-based, Reward 13: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.079s
no training
label Controller-based, Reward 14: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.087s
no training
label Controller-based, Reward 15: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.095s
no training
label Controller-based, Reward 16: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.103s
no training
label Controller-based, Reward 17: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.111s
no training
label Controller-based, Reward 18: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.119s
no training
label Controller-based, Reward 19: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.128s
no training
label Controller-based, Reward 20: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.136s
no training
label Controller-based, Reward 21: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.144s
no training
label Controller-based, Reward 22: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.152s
no training
label Controller-based, Reward 23: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.160s
no training
label Controller-based, Reward 24: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.168s
no training
label Controller-based, Reward 25: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.177s
no training
label Controller-based, Reward 26: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.185s
no training
label Controller-based, Reward 27: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.193s
no training
label Controller-based, Reward 28: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.201s
no training
label Controller-based, Reward 29: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.209s
no training
label Controller-based, Reward 30: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.217s
no training
label Controller-based, Reward 31: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.226s
no training
label Controller-based, Reward 32: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.233s
no training
label Controller-based, Reward 33: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.241s
no training
label Controller-based, Reward 34: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.250s
no training
label Controller-based, Reward 35: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.258s
no training
label Controller-based, Reward 36: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.265s
no training
label Controller-based, Reward 37: 1000.000, Len(game): 1000, Training Time: 0.046s, Prediction Time: 0.273s
no training
label Controller-based, Reward 38: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.280s
no training
label Controller-based, Reward 39: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.288s
no training
label Controller-based, Reward 40: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.296s
no training
label Controller-based, Reward 41: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.305s
no training
label Controller-based, Reward 42: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.313s
no training
label Controller-based, Reward 43: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.321s
no training
label Controller-based, Reward 44: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.329s
no training
label Controller-based, Reward 45: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.338s
no training
label Controller-based, Reward 46: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.346s
no training
label Controller-based, Reward 47: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.354s
no training
label Controller-based, Reward 48: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.362s
no training
label Controller-based, Reward 49: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.371s
no training
label Controller-based, Reward 50: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.379s
no training
label Controller-based, Reward 51: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.387s
no training
label Controller-based, Reward 52: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.396s
no training
label Controller-based, Reward 53: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.403s
no training
label Controller-based, Reward 54: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.412s
no training
label Controller-based, Reward 55: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.420s
no training
label Controller-based, Reward 56: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.428s
no training
label Controller-based, Reward 57: 1000.000, Len(game): 1000, Training Time: 0.047s, Prediction Time: 0.436s
no training
label Controller-based, Reward 58: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.443s
no training
label Controller-based, Reward 59: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.451s
no training
label Controller-based, Reward 60: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.459s
no training
label Controller-based, Reward 61: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.466s
no training
label Controller-based, Reward 62: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.474s
no training
label Controller-based, Reward 63: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.483s
no training
label Controller-based, Reward 64: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.491s
no training
label Controller-based, Reward 65: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.500s
no training
label Controller-based, Reward 66: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.507s
no training
label Controller-based, Reward 67: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.516s
no training
label Controller-based, Reward 68: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.524s
no training
label Controller-based, Reward 69: 1000.000, Len(game): 1000, Training Time: 0.048s, Prediction Time: 0.533s
no training
label Controller-based, Reward 70: 1000.000, Len(game): 1000, Training Time: 0.049s, Prediction Time: 0.541s
no training
label Controller-based, Reward 71: 1000.000, Len(game): 1000, Training Time: 0.049s, Prediction Time: 0.548s
no training
label Controller-based, Reward 72: 1000.000, Len(game): 1000, Training Time: 0.049s, Prediction Time: 0.556s
no training
label Controller-based, Reward 73: 1000.000, Len(game): 1000, Training Time: 0.049s, Prediction Time: 0.564s
no training
label Controller-based, Reward 74: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.572s
no training
label Controller-based, Reward 75: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.578s
no training
label Controller-based, Reward 76: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.587s
no training
label Controller-based, Reward 77: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.596s
no training
label Controller-based, Reward 78: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.604s
no training
label Controller-based, Reward 79: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.612s
no training
label Controller-based, Reward 80: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.621s
no training
label Controller-based, Reward 81: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.629s
no training
label Controller-based, Reward 82: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.638s
no training
label Controller-based, Reward 83: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.645s
no training
label Controller-based, Reward 84: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.653s
no training
label Controller-based, Reward 85: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.662s
no training
label Controller-based, Reward 86: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.671s
no training
label Controller-based, Reward 87: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.679s
no training
label Controller-based, Reward 88: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.687s
no training
label Controller-based, Reward 89: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.696s
no training
label Controller-based, Reward 90: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.704s
no training
label Controller-based, Reward 91: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.712s
no training
label Controller-based, Reward 92: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.720s
no training
label Controller-based, Reward 93: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.729s
no training
label Controller-based, Reward 94: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.736s
no training
label Controller-based, Reward 95: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.745s
no training
label Controller-based, Reward 96: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.753s
no training
label Controller-based, Reward 97: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.761s
no training
label Controller-based, Reward 98: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.770s
no training
label Controller-based, Reward 99: 1000.000, Len(game): 1000, Training Time: 0.050s, Prediction Time: 0.777s
label KACAgent, Reward 0: 14.000, Len(game): 14, Training Time: 0.004s, Prediction Time: 0.000s
label KACAgent, Reward 1: 39.000, Len(game): 39, Training Time: 0.009s, Prediction Time: 0.007s
label KACAgent, Reward 2: 65.000, Len(game): 65, Training Time: 0.024s, Prediction Time: 0.019s
label KACAgent, Reward 3: 91.000, Len(game): 91, Training Time: 0.040s, Prediction Time: 0.038s
label KACAgent, Reward 4: 80.000, Len(game): 80, Training Time: 0.063s, Prediction Time: 0.056s
label KACAgent, Reward 5: 117.000, Len(game): 117, Training Time: 0.102s, Prediction Time: 0.091s
label KACAgent, Reward 6: 69.000, Len(game): 69, Training Time: 0.148s, Prediction Time: 0.123s
label KACAgent, Reward 7: 164.000, Len(game): 164, Training Time: 0.217s, Prediction Time: 0.189s
label KACAgent, Reward 8: 103.000, Len(game): 103, Training Time: 0.309s, Prediction Time: 0.255s
label KACAgent, Reward 9: 278.000, Len(game): 278, Training Time: 0.478s, Prediction Time: 0.404s
label KACAgent, Reward 10: 114.000, Len(game): 114, Training Time: 0.670s, Prediction Time: 0.527s
label KACAgent, Reward 11: 984.000, Len(game): 984, Training Time: 1.226s, Prediction Time: 1.141s
label KACAgent, Reward 12: 412.000, Len(game): 412, Training Time: 2.074s, Prediction Time: 1.789s
label KACAgent, Reward 13: 111.000, Len(game): 111, Training Time: 2.990s, Prediction Time: 2.340s
label KACAgent, Reward 14: 202.000, Len(game): 202, Training Time: 4.040s, Prediction Time: 3.047s
label KACAgent, Reward 15: 344.000, Len(game): 344, Training Time: 4.040s, Prediction Time: 3.935s
label KACAgent, Reward 16: 250.000, Len(game): 250, Training Time: 4.040s, Prediction Time: 4.140s
label KACAgent, Reward 17: 205.000, Len(game): 205, Training Time: 4.040s, Prediction Time: 4.307s
label KACAgent, Reward 18: 332.000, Len(game): 332, Training Time: 4.040s, Prediction Time: 4.577s
label KACAgent, Reward 19: 345.000, Len(game): 345, Training Time: 4.040s, Prediction Time: 4.856s
label KACAgent, Reward 20: 283.000, Len(game): 283, Training Time: 4.040s, Prediction Time: 5.084s
label KACAgent, Reward 21: 299.000, Len(game): 299, Training Time: 4.040s, Prediction Time: 5.330s
label KACAgent, Reward 22: 473.000, Len(game): 473, Training Time: 4.040s, Prediction Time: 5.721s
label KACAgent, Reward 23: 280.000, Len(game): 280, Training Time: 4.040s, Prediction Time: 5.955s
label KACAgent, Reward 24: 304.000, Len(game): 304, Training Time: 4.040s, Prediction Time: 6.200s
label KACAgent, Reward 25: 195.000, Len(game): 195, Training Time: 4.040s, Prediction Time: 6.359s
label KACAgent, Reward 26: 240.000, Len(game): 240, Training Time: 4.040s, Prediction Time: 6.558s
label KACAgent, Reward 27: 168.000, Len(game): 168, Training Time: 4.040s, Prediction Time: 6.694s
label KACAgent, Reward 28: 177.000, Len(game): 177, Training Time: 4.040s, Prediction Time: 6.835s
label KACAgent, Reward 29: 282.000, Len(game): 282, Training Time: 4.040s, Prediction Time: 7.061s
label KACAgent, Reward 30: 300.000, Len(game): 300, Training Time: 4.040s, Prediction Time: 7.304s
label KACAgent, Reward 31: 243.000, Len(game): 243, Training Time: 4.040s, Prediction Time: 7.497s
label KACAgent, Reward 32: 467.000, Len(game): 467, Training Time: 4.040s, Prediction Time: 7.880s
label KACAgent, Reward 33: 158.000, Len(game): 158, Training Time: 4.040s, Prediction Time: 8.013s
label KACAgent, Reward 34: 301.000, Len(game): 301, Training Time: 4.040s, Prediction Time: 8.261s
label KACAgent, Reward 35: 154.000, Len(game): 154, Training Time: 4.040s, Prediction Time: 8.385s
label KACAgent, Reward 36: 262.000, Len(game): 262, Training Time: 4.040s, Prediction Time: 8.597s
label KACAgent, Reward 37: 283.000, Len(game): 283, Training Time: 4.040s, Prediction Time: 8.826s
label KACAgent, Reward 38: 586.000, Len(game): 586, Training Time: 4.040s, Prediction Time: 9.296s
label KACAgent, Reward 39: 313.000, Len(game): 313, Training Time: 4.040s, Prediction Time: 9.549s
label KACAgent, Reward 40: 275.000, Len(game): 275, Training Time: 4.040s, Prediction Time: 9.774s
label KACAgent, Reward 41: 191.000, Len(game): 191, Training Time: 4.040s, Prediction Time: 9.927s
label KACAgent, Reward 42: 223.000, Len(game): 223, Training Time: 4.040s, Prediction Time: 10.107s
label KACAgent, Reward 43: 323.000, Len(game): 323, Training Time: 4.040s, Prediction Time: 10.365s
label KACAgent, Reward 44: 205.000, Len(game): 205, Training Time: 4.040s, Prediction Time: 10.530s
label KACAgent, Reward 45: 242.000, Len(game): 242, Training Time: 4.040s, Prediction Time: 10.724s
label KACAgent, Reward 46: 164.000, Len(game): 164, Training Time: 4.040s, Prediction Time: 10.854s
label KACAgent, Reward 47: 314.000, Len(game): 314, Training Time: 4.040s, Prediction Time: 11.104s
label KACAgent, Reward 48: 320.000, Len(game): 320, Training Time: 4.040s, Prediction Time: 11.360s
label KACAgent, Reward 49: 145.000, Len(game): 145, Training Time: 4.040s, Prediction Time: 11.477s
label KACAgent, Reward 50: 325.000, Len(game): 325, Training Time: 4.040s, Prediction Time: 11.736s
label KACAgent, Reward 51: 219.000, Len(game): 219, Training Time: 4.040s, Prediction Time: 11.912s
label KACAgent, Reward 52: 178.000, Len(game): 178, Training Time: 4.040s, Prediction Time: 12.058s
label KACAgent, Reward 53: 232.000, Len(game): 232, Training Time: 4.040s, Prediction Time: 12.250s
label KACAgent, Reward 54: 333.000, Len(game): 333, Training Time: 4.040s, Prediction Time: 12.521s
label KACAgent, Reward 55: 256.000, Len(game): 256, Training Time: 4.040s, Prediction Time: 12.726s
label KACAgent, Reward 56: 302.000, Len(game): 302, Training Time: 4.040s, Prediction Time: 12.970s
label KACAgent, Reward 57: 303.000, Len(game): 303, Training Time: 4.040s, Prediction Time: 13.216s
label KACAgent, Reward 58: 344.000, Len(game): 344, Training Time: 4.040s, Prediction Time: 13.494s
label KACAgent, Reward 59: 216.000, Len(game): 216, Training Time: 4.040s, Prediction Time: 13.669s
label KACAgent, Reward 60: 419.000, Len(game): 419, Training Time: 4.040s, Prediction Time: 14.008s
label KACAgent, Reward 61: 264.000, Len(game): 264, Training Time: 4.040s, Prediction Time: 14.219s
label KACAgent, Reward 62: 247.000, Len(game): 247, Training Time: 4.040s, Prediction Time: 14.415s
label KACAgent, Reward 63: 185.000, Len(game): 185, Training Time: 4.040s, Prediction Time: 14.564s
label KACAgent, Reward 64: 252.000, Len(game): 252, Training Time: 4.040s, Prediction Time: 14.768s
label KACAgent, Reward 65: 324.000, Len(game): 324, Training Time: 4.040s, Prediction Time: 15.034s
label KACAgent, Reward 66: 192.000, Len(game): 192, Training Time: 4.040s, Prediction Time: 15.188s
label KACAgent, Reward 67: 189.000, Len(game): 189, Training Time: 4.040s, Prediction Time: 15.342s
label KACAgent, Reward 68: 342.000, Len(game): 342, Training Time: 4.040s, Prediction Time: 15.617s
label KACAgent, Reward 69: 309.000, Len(game): 309, Training Time: 4.040s, Prediction Time: 15.868s
label KACAgent, Reward 70: 327.000, Len(game): 327, Training Time: 4.040s, Prediction Time: 16.134s
label KACAgent, Reward 71: 215.000, Len(game): 215, Training Time: 4.040s, Prediction Time: 16.310s
label KACAgent, Reward 72: 277.000, Len(game): 277, Training Time: 4.040s, Prediction Time: 16.536s
label KACAgent, Reward 73: 188.000, Len(game): 188, Training Time: 4.040s, Prediction Time: 16.687s
label KACAgent, Reward 74: 211.000, Len(game): 211, Training Time: 4.040s, Prediction Time: 16.858s
label KACAgent, Reward 75: 313.000, Len(game): 313, Training Time: 4.040s, Prediction Time: 17.113s
label KACAgent, Reward 76: 193.000, Len(game): 193, Training Time: 4.040s, Prediction Time: 17.268s
label KACAgent, Reward 77: 191.000, Len(game): 191, Training Time: 4.040s, Prediction Time: 17.420s
label KACAgent, Reward 78: 356.000, Len(game): 356, Training Time: 4.040s, Prediction Time: 17.708s
label KACAgent, Reward 79: 348.000, Len(game): 348, Training Time: 4.040s, Prediction Time: 17.989s
label KACAgent, Reward 80: 338.000, Len(game): 338, Training Time: 4.040s, Prediction Time: 18.260s
label KACAgent, Reward 81: 194.000, Len(game): 194, Training Time: 4.040s, Prediction Time: 18.416s
label KACAgent, Reward 82: 189.000, Len(game): 189, Training Time: 4.040s, Prediction Time: 18.568s
label KACAgent, Reward 83: 332.000, Len(game): 332, Training Time: 4.040s, Prediction Time: 18.838s
label KACAgent, Reward 84: 136.000, Len(game): 136, Training Time: 4.040s, Prediction Time: 18.949s
label KACAgent, Reward 85: 276.000, Len(game): 276, Training Time: 4.040s, Prediction Time: 19.174s
label KACAgent, Reward 86: 207.000, Len(game): 207, Training Time: 4.040s, Prediction Time: 19.342s
label KACAgent, Reward 87: 306.000, Len(game): 306, Training Time: 4.040s, Prediction Time: 19.588s
label KACAgent, Reward 88: 303.000, Len(game): 303, Training Time: 4.040s, Prediction Time: 19.830s
label KACAgent, Reward 89: 286.000, Len(game): 286, Training Time: 4.040s, Prediction Time: 20.057s
label KACAgent, Reward 90: 311.000, Len(game): 311, Training Time: 4.040s, Prediction Time: 20.307s
label KACAgent, Reward 91: 247.000, Len(game): 247, Training Time: 4.040s, Prediction Time: 20.504s
label KACAgent, Reward 92: 221.000, Len(game): 221, Training Time: 4.040s, Prediction Time: 20.680s
label KACAgent, Reward 93: 190.000, Len(game): 190, Training Time: 4.040s, Prediction Time: 20.832s
label KACAgent, Reward 94: 266.000, Len(game): 266, Training Time: 4.040s, Prediction Time: 21.046s
label KACAgent, Reward 95: 290.000, Len(game): 290, Training Time: 4.040s, Prediction Time: 21.284s
label KACAgent, Reward 96: 286.000, Len(game): 286, Training Time: 4.040s, Prediction Time: 21.521s
label KACAgent, Reward 97: 256.000, Len(game): 256, Training Time: 4.040s, Prediction Time: 21.731s
label KACAgent, Reward 98: 238.000, Len(game): 238, Training Time: 4.040s, Prediction Time: 21.924s
label KACAgent, Reward 99: 132.000, Len(game): 132, Training Time: 4.040s, Prediction Time: 22.030s
label DQNAgent, Reward 0: 22.000, Len(game): 22, Training Time: 0.000s, Prediction Time: 0.000s
label DQNAgent, Reward 1: 14.000, Len(game): 14, Training Time: 0.000s, Prediction Time: 0.000s
label DQNAgent, Reward 2: 16.000, Len(game): 16, Training Time: 0.000s, Prediction Time: 0.000s
label DQNAgent, Reward 3: 23.000, Len(game): 23, Training Time: 0.019s, Prediction Time: 0.001s
label DQNAgent, Reward 4: 22.000, Len(game): 22, Training Time: 0.043s, Prediction Time: 0.002s
label DQNAgent, Reward 5: 17.000, Len(game): 17, Training Time: 0.062s, Prediction Time: 0.002s
label DQNAgent, Reward 6: 14.000, Len(game): 14, Training Time: 0.077s, Prediction Time: 0.002s
label DQNAgent, Reward 7: 11.000, Len(game): 11, Training Time: 0.088s, Prediction Time: 0.003s
label DQNAgent, Reward 8: 12.000, Len(game): 12, Training Time: 0.100s, Prediction Time: 0.004s
label DQNAgent, Reward 9: 18.000, Len(game): 18, Training Time: 0.120s, Prediction Time: 0.004s
label DQNAgent, Reward 10: 24.000, Len(game): 24, Training Time: 0.149s, Prediction Time: 0.004s
label DQNAgent, Reward 11: 30.000, Len(game): 30, Training Time: 0.183s, Prediction Time: 0.004s
label DQNAgent, Reward 12: 24.000, Len(game): 24, Training Time: 0.209s, Prediction Time: 0.004s
label DQNAgent, Reward 13: 14.000, Len(game): 14, Training Time: 0.223s, Prediction Time: 0.004s
label DQNAgent, Reward 14: 23.000, Len(game): 23, Training Time: 0.252s, Prediction Time: 0.005s
label DQNAgent, Reward 15: 13.000, Len(game): 13, Training Time: 0.270s, Prediction Time: 0.006s
label DQNAgent, Reward 16: 12.000, Len(game): 12, Training Time: 0.287s, Prediction Time: 0.007s
label DQNAgent, Reward 17: 16.000, Len(game): 16, Training Time: 0.308s, Prediction Time: 0.007s
label DQNAgent, Reward 18: 22.000, Len(game): 22, Training Time: 0.339s, Prediction Time: 0.007s
label DQNAgent, Reward 19: 38.000, Len(game): 38, Training Time: 0.389s, Prediction Time: 0.008s
label DQNAgent, Reward 20: 16.000, Len(game): 16, Training Time: 0.408s, Prediction Time: 0.008s
label DQNAgent, Reward 21: 25.000, Len(game): 25, Training Time: 0.435s, Prediction Time: 0.009s
label DQNAgent, Reward 22: 13.000, Len(game): 13, Training Time: 0.448s, Prediction Time: 0.010s
label DQNAgent, Reward 23: 22.000, Len(game): 22, Training Time: 0.473s, Prediction Time: 0.011s
label DQNAgent, Reward 24: 15.000, Len(game): 15, Training Time: 0.490s, Prediction Time: 0.011s
label DQNAgent, Reward 25: 28.000, Len(game): 28, Training Time: 0.521s, Prediction Time: 0.012s
label DQNAgent, Reward 26: 19.000, Len(game): 19, Training Time: 0.542s, Prediction Time: 0.013s
label DQNAgent, Reward 27: 24.000, Len(game): 24, Training Time: 0.569s, Prediction Time: 0.014s
label DQNAgent, Reward 28: 11.000, Len(game): 11, Training Time: 0.582s, Prediction Time: 0.014s
label DQNAgent, Reward 29: 12.000, Len(game): 12, Training Time: 0.596s, Prediction Time: 0.014s
label DQNAgent, Reward 30: 17.000, Len(game): 17, Training Time: 0.619s, Prediction Time: 0.014s
label DQNAgent, Reward 31: 41.000, Len(game): 41, Training Time: 0.675s, Prediction Time: 0.016s
label DQNAgent, Reward 32: 23.000, Len(game): 23, Training Time: 0.707s, Prediction Time: 0.016s
label DQNAgent, Reward 33: 10.000, Len(game): 10, Training Time: 0.718s, Prediction Time: 0.017s
label DQNAgent, Reward 34: 9.000, Len(game): 9, Training Time: 0.729s, Prediction Time: 0.018s
label DQNAgent, Reward 35: 11.000, Len(game): 11, Training Time: 0.745s, Prediction Time: 0.018s
label DQNAgent, Reward 36: 10.000, Len(game): 10, Training Time: 0.759s, Prediction Time: 0.018s
label DQNAgent, Reward 37: 14.000, Len(game): 14, Training Time: 0.776s, Prediction Time: 0.019s
label DQNAgent, Reward 38: 14.000, Len(game): 14, Training Time: 0.792s, Prediction Time: 0.020s
label DQNAgent, Reward 39: 12.000, Len(game): 12, Training Time: 0.805s, Prediction Time: 0.021s
label DQNAgent, Reward 40: 14.000, Len(game): 14, Training Time: 0.822s, Prediction Time: 0.022s
label DQNAgent, Reward 41: 13.000, Len(game): 13, Training Time: 0.837s, Prediction Time: 0.022s
label DQNAgent, Reward 42: 12.000, Len(game): 12, Training Time: 0.851s, Prediction Time: 0.022s
label DQNAgent, Reward 43: 33.000, Len(game): 33, Training Time: 0.890s, Prediction Time: 0.023s
label DQNAgent, Reward 44: 10.000, Len(game): 10, Training Time: 0.902s, Prediction Time: 0.023s
label DQNAgent, Reward 45: 16.000, Len(game): 16, Training Time: 0.919s, Prediction Time: 0.023s
label DQNAgent, Reward 46: 13.000, Len(game): 13, Training Time: 0.934s, Prediction Time: 0.024s
label DQNAgent, Reward 47: 18.000, Len(game): 18, Training Time: 0.955s, Prediction Time: 0.024s
label DQNAgent, Reward 48: 34.000, Len(game): 34, Training Time: 0.996s, Prediction Time: 0.025s
label DQNAgent, Reward 49: 15.000, Len(game): 15, Training Time: 1.012s, Prediction Time: 0.025s
label DQNAgent, Reward 50: 16.000, Len(game): 16, Training Time: 1.030s, Prediction Time: 0.026s
label DQNAgent, Reward 51: 28.000, Len(game): 28, Training Time: 1.060s, Prediction Time: 0.027s
label DQNAgent, Reward 52: 15.000, Len(game): 15, Training Time: 1.075s, Prediction Time: 0.028s
label DQNAgent, Reward 53: 26.000, Len(game): 26, Training Time: 1.103s, Prediction Time: 0.030s
label DQNAgent, Reward 54: 32.000, Len(game): 32, Training Time: 1.136s, Prediction Time: 0.031s
label DQNAgent, Reward 55: 24.000, Len(game): 24, Training Time: 1.161s, Prediction Time: 0.032s
label DQNAgent, Reward 56: 61.000, Len(game): 61, Training Time: 1.226s, Prediction Time: 0.035s
label DQNAgent, Reward 57: 12.000, Len(game): 12, Training Time: 1.242s, Prediction Time: 0.035s
label DQNAgent, Reward 58: 59.000, Len(game): 59, Training Time: 1.316s, Prediction Time: 0.037s
label DQNAgent, Reward 59: 199.000, Len(game): 199, Training Time: 1.537s, Prediction Time: 0.043s
label DQNAgent, Reward 60: 117.000, Len(game): 117, Training Time: 1.667s, Prediction Time: 0.047s
label DQNAgent, Reward 61: 139.000, Len(game): 139, Training Time: 1.831s, Prediction Time: 0.051s
label DQNAgent, Reward 62: 91.000, Len(game): 91, Training Time: 1.929s, Prediction Time: 0.054s
label DQNAgent, Reward 63: 124.000, Len(game): 124, Training Time: 2.076s, Prediction Time: 0.059s
label DQNAgent, Reward 64: 110.000, Len(game): 110, Training Time: 2.212s, Prediction Time: 0.064s
label DQNAgent, Reward 65: 143.000, Len(game): 143, Training Time: 2.511s, Prediction Time: 0.071s
label DQNAgent, Reward 66: 84.000, Len(game): 84, Training Time: 2.620s, Prediction Time: 0.074s
label DQNAgent, Reward 67: 120.000, Len(game): 120, Training Time: 2.761s, Prediction Time: 0.080s
label DQNAgent, Reward 68: 77.000, Len(game): 77, Training Time: 2.844s, Prediction Time: 0.084s
label DQNAgent, Reward 69: 118.000, Len(game): 118, Training Time: 2.972s, Prediction Time: 0.089s
label DQNAgent, Reward 70: 109.000, Len(game): 109, Training Time: 3.101s, Prediction Time: 0.095s
label DQNAgent, Reward 71: 166.000, Len(game): 166, Training Time: 3.101s, Prediction Time: 0.101s
label DQNAgent, Reward 72: 162.000, Len(game): 162, Training Time: 3.101s, Prediction Time: 0.109s
label DQNAgent, Reward 73: 230.000, Len(game): 230, Training Time: 3.101s, Prediction Time: 0.119s
label DQNAgent, Reward 74: 110.000, Len(game): 110, Training Time: 3.101s, Prediction Time: 0.124s
label DQNAgent, Reward 75: 109.000, Len(game): 109, Training Time: 3.101s, Prediction Time: 0.129s
label DQNAgent, Reward 76: 124.000, Len(game): 124, Training Time: 3.101s, Prediction Time: 0.135s
label DQNAgent, Reward 77: 106.000, Len(game): 106, Training Time: 3.101s, Prediction Time: 0.139s
label DQNAgent, Reward 78: 130.000, Len(game): 130, Training Time: 3.101s, Prediction Time: 0.144s
label DQNAgent, Reward 79: 119.000, Len(game): 119, Training Time: 3.101s, Prediction Time: 0.149s
label DQNAgent, Reward 80: 162.000, Len(game): 162, Training Time: 3.101s, Prediction Time: 0.156s
label DQNAgent, Reward 81: 110.000, Len(game): 110, Training Time: 3.101s, Prediction Time: 0.160s
label DQNAgent, Reward 82: 106.000, Len(game): 106, Training Time: 3.101s, Prediction Time: 0.165s
label DQNAgent, Reward 83: 136.000, Len(game): 136, Training Time: 3.101s, Prediction Time: 0.170s
label DQNAgent, Reward 84: 142.000, Len(game): 142, Training Time: 3.101s, Prediction Time: 0.176s
label DQNAgent, Reward 85: 128.000, Len(game): 128, Training Time: 3.101s, Prediction Time: 0.180s
label DQNAgent, Reward 86: 152.000, Len(game): 152, Training Time: 3.101s, Prediction Time: 0.187s
label DQNAgent, Reward 87: 121.000, Len(game): 121, Training Time: 3.101s, Prediction Time: 0.192s
label DQNAgent, Reward 88: 137.000, Len(game): 137, Training Time: 3.101s, Prediction Time: 0.197s
label DQNAgent, Reward 89: 181.000, Len(game): 181, Training Time: 3.101s, Prediction Time: 0.205s
label DQNAgent, Reward 90: 122.000, Len(game): 122, Training Time: 3.101s, Prediction Time: 0.209s
label DQNAgent, Reward 91: 126.000, Len(game): 126, Training Time: 3.101s, Prediction Time: 0.216s
label DQNAgent, Reward 92: 148.000, Len(game): 148, Training Time: 3.101s, Prediction Time: 0.223s
label DQNAgent, Reward 93: 118.000, Len(game): 118, Training Time: 3.101s, Prediction Time: 0.227s
label DQNAgent, Reward 94: 106.000, Len(game): 106, Training Time: 3.101s, Prediction Time: 0.232s
label DQNAgent, Reward 95: 126.000, Len(game): 126, Training Time: 3.101s, Prediction Time: 0.237s
label DQNAgent, Reward 96: 179.000, Len(game): 179, Training Time: 3.101s, Prediction Time: 0.245s
label DQNAgent, Reward 97: 200.000, Len(game): 200, Training Time: 3.101s, Prediction Time: 0.253s
label DQNAgent, Reward 98: 117.000, Len(game): 117, Training Time: 3.101s, Prediction Time: 0.258s
label DQNAgent, Reward 99: 144.000, Len(game): 144, Training Time: 3.101s, Prediction Time: 0.265s
Computed global error Bellman mean:  2.429265355294616e-07  iter:  2
label KQLearningHJBCP, Reward 0: 27.000, Len(game): 27, Training Time: 0.011s, Prediction Time: 0.001s
Computed global error Bellman mean:  1.1135033661321919e-07  iter:  6
label KQLearningHJBCP, Reward 1: 44.000, Len(game): 44, Training Time: 0.043s, Prediction Time: 0.007s
Computed global error Bellman mean:  3.422438720370735e-07  iter:  8
label KQLearningHJBCP, Reward 2: 54.000, Len(game): 54, Training Time: 0.122s, Prediction Time: 0.017s
Computed global error Bellman mean:  3.9577951882171255e-07  iter:  4
label KQLearningHJBCP, Reward 3: 58.000, Len(game): 58, Training Time: 0.203s, Prediction Time: 0.029s
Computed global error Bellman mean:  3.432339072491923e-07  iter:  6
label KQLearningHJBCP, Reward 4: 58.000, Len(game): 58, Training Time: 0.415s, Prediction Time: 0.043s
Computed global error Bellman mean:  5.280399794638235e-07  iter:  5
label KQLearningHJBCP, Reward 5: 63.000, Len(game): 63, Training Time: 0.666s, Prediction Time: 0.060s
Computed global error Bellman mean:  1.3332333149421225e-06  iter:  10
label KQLearningHJBCP, Reward 6: 65.000, Len(game): 65, Training Time: 1.528s, Prediction Time: 0.080s
Computed global error Bellman mean:  4.910292080454003e-07  iter:  6
label KQLearningHJBCP, Reward 7: 48.000, Len(game): 48, Training Time: 2.194s, Prediction Time: 0.096s
Computed global error Bellman mean:  4.686085707314039e-07  iter:  5
label KQLearningHJBCP, Reward 8: 73.000, Len(game): 73, Training Time: 2.916s, Prediction Time: 0.121s
Computed global error Bellman mean:  4.164509857890951e-07  iter:  6
label KQLearningHJBCP, Reward 9: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.149s
label KQLearningHJBCP, Reward 10: 33.000, Len(game): 33, Training Time: 4.082s, Prediction Time: 0.162s
label KQLearningHJBCP, Reward 11: 104.000, Len(game): 104, Training Time: 4.082s, Prediction Time: 0.199s
label KQLearningHJBCP, Reward 12: 78.000, Len(game): 78, Training Time: 4.082s, Prediction Time: 0.229s
label KQLearningHJBCP, Reward 13: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 0.256s
label KQLearningHJBCP, Reward 14: 78.000, Len(game): 78, Training Time: 4.082s, Prediction Time: 0.284s
label KQLearningHJBCP, Reward 15: 54.000, Len(game): 54, Training Time: 4.082s, Prediction Time: 0.304s
label KQLearningHJBCP, Reward 16: 47.000, Len(game): 47, Training Time: 4.082s, Prediction Time: 0.321s
label KQLearningHJBCP, Reward 17: 93.000, Len(game): 93, Training Time: 4.082s, Prediction Time: 0.358s
label KQLearningHJBCP, Reward 18: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.391s
label KQLearningHJBCP, Reward 19: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.423s
label KQLearningHJBCP, Reward 20: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 0.451s
label KQLearningHJBCP, Reward 21: 53.000, Len(game): 53, Training Time: 4.082s, Prediction Time: 0.472s
label KQLearningHJBCP, Reward 22: 52.000, Len(game): 52, Training Time: 4.082s, Prediction Time: 0.493s
label KQLearningHJBCP, Reward 23: 49.000, Len(game): 49, Training Time: 4.082s, Prediction Time: 0.513s
label KQLearningHJBCP, Reward 24: 74.000, Len(game): 74, Training Time: 4.082s, Prediction Time: 0.543s
label KQLearningHJBCP, Reward 25: 113.000, Len(game): 113, Training Time: 4.082s, Prediction Time: 0.589s
label KQLearningHJBCP, Reward 26: 110.000, Len(game): 110, Training Time: 4.082s, Prediction Time: 0.633s
label KQLearningHJBCP, Reward 27: 43.000, Len(game): 43, Training Time: 4.082s, Prediction Time: 0.649s
label KQLearningHJBCP, Reward 28: 66.000, Len(game): 66, Training Time: 4.082s, Prediction Time: 0.677s
label KQLearningHJBCP, Reward 29: 77.000, Len(game): 77, Training Time: 4.082s, Prediction Time: 0.709s
label KQLearningHJBCP, Reward 30: 67.000, Len(game): 67, Training Time: 4.082s, Prediction Time: 0.736s
label KQLearningHJBCP, Reward 31: 61.000, Len(game): 61, Training Time: 4.082s, Prediction Time: 0.760s
label KQLearningHJBCP, Reward 32: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.792s
label KQLearningHJBCP, Reward 33: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 0.824s
label KQLearningHJBCP, Reward 34: 80.000, Len(game): 80, Training Time: 4.082s, Prediction Time: 0.858s
label KQLearningHJBCP, Reward 35: 90.000, Len(game): 90, Training Time: 4.082s, Prediction Time: 0.897s
label KQLearningHJBCP, Reward 36: 100.000, Len(game): 100, Training Time: 4.082s, Prediction Time: 0.937s
label KQLearningHJBCP, Reward 37: 83.000, Len(game): 83, Training Time: 4.082s, Prediction Time: 0.970s
label KQLearningHJBCP, Reward 38: 74.000, Len(game): 74, Training Time: 4.082s, Prediction Time: 0.999s
label KQLearningHJBCP, Reward 39: 80.000, Len(game): 80, Training Time: 4.082s, Prediction Time: 1.033s
label KQLearningHJBCP, Reward 40: 75.000, Len(game): 75, Training Time: 4.082s, Prediction Time: 1.063s
label KQLearningHJBCP, Reward 41: 56.000, Len(game): 56, Training Time: 4.082s, Prediction Time: 1.087s
label KQLearningHJBCP, Reward 42: 85.000, Len(game): 85, Training Time: 4.082s, Prediction Time: 1.121s
label KQLearningHJBCP, Reward 43: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 1.149s
label KQLearningHJBCP, Reward 44: 88.000, Len(game): 88, Training Time: 4.082s, Prediction Time: 1.187s
label KQLearningHJBCP, Reward 45: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 1.216s
label KQLearningHJBCP, Reward 46: 46.000, Len(game): 46, Training Time: 4.082s, Prediction Time: 1.234s
label KQLearningHJBCP, Reward 47: 106.000, Len(game): 106, Training Time: 4.082s, Prediction Time: 1.277s
label KQLearningHJBCP, Reward 48: 83.000, Len(game): 83, Training Time: 4.082s, Prediction Time: 1.312s
label KQLearningHJBCP, Reward 49: 85.000, Len(game): 85, Training Time: 4.082s, Prediction Time: 1.346s
label KQLearningHJBCP, Reward 50: 68.000, Len(game): 68, Training Time: 4.082s, Prediction Time: 1.374s
label KQLearningHJBCP, Reward 51: 68.000, Len(game): 68, Training Time: 4.082s, Prediction Time: 1.402s
label KQLearningHJBCP, Reward 52: 84.000, Len(game): 84, Training Time: 4.082s, Prediction Time: 1.436s
label KQLearningHJBCP, Reward 53: 94.000, Len(game): 94, Training Time: 4.082s, Prediction Time: 1.474s
label KQLearningHJBCP, Reward 54: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 1.503s
label KQLearningHJBCP, Reward 55: 86.000, Len(game): 86, Training Time: 4.082s, Prediction Time: 1.538s
label KQLearningHJBCP, Reward 56: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 1.568s
label KQLearningHJBCP, Reward 57: 68.000, Len(game): 68, Training Time: 4.082s, Prediction Time: 1.596s
label KQLearningHJBCP, Reward 58: 51.000, Len(game): 51, Training Time: 4.082s, Prediction Time: 1.617s
label KQLearningHJBCP, Reward 59: 45.000, Len(game): 45, Training Time: 4.082s, Prediction Time: 1.634s
label KQLearningHJBCP, Reward 60: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 1.666s
label KQLearningHJBCP, Reward 61: 84.000, Len(game): 84, Training Time: 4.082s, Prediction Time: 1.700s
label KQLearningHJBCP, Reward 62: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 1.732s
label KQLearningHJBCP, Reward 63: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 1.760s
label KQLearningHJBCP, Reward 64: 101.000, Len(game): 101, Training Time: 4.082s, Prediction Time: 1.801s
label KQLearningHJBCP, Reward 65: 78.000, Len(game): 78, Training Time: 4.082s, Prediction Time: 1.832s
label KQLearningHJBCP, Reward 66: 90.000, Len(game): 90, Training Time: 4.082s, Prediction Time: 1.869s
label KQLearningHJBCP, Reward 67: 107.000, Len(game): 107, Training Time: 4.082s, Prediction Time: 1.912s
label KQLearningHJBCP, Reward 68: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 1.943s
label KQLearningHJBCP, Reward 69: 67.000, Len(game): 67, Training Time: 4.082s, Prediction Time: 1.970s
label KQLearningHJBCP, Reward 70: 64.000, Len(game): 64, Training Time: 4.082s, Prediction Time: 1.996s
label KQLearningHJBCP, Reward 71: 69.000, Len(game): 69, Training Time: 4.082s, Prediction Time: 2.024s
label KQLearningHJBCP, Reward 72: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 2.055s
label KQLearningHJBCP, Reward 73: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 2.083s
label KQLearningHJBCP, Reward 74: 78.000, Len(game): 78, Training Time: 4.082s, Prediction Time: 2.116s
label KQLearningHJBCP, Reward 75: 70.000, Len(game): 70, Training Time: 4.082s, Prediction Time: 2.144s
label KQLearningHJBCP, Reward 76: 94.000, Len(game): 94, Training Time: 4.082s, Prediction Time: 2.183s
label KQLearningHJBCP, Reward 77: 50.000, Len(game): 50, Training Time: 4.082s, Prediction Time: 2.203s
label KQLearningHJBCP, Reward 78: 79.000, Len(game): 79, Training Time: 4.082s, Prediction Time: 2.236s
label KQLearningHJBCP, Reward 79: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 2.266s
label KQLearningHJBCP, Reward 80: 96.000, Len(game): 96, Training Time: 4.082s, Prediction Time: 2.305s
label KQLearningHJBCP, Reward 81: 77.000, Len(game): 77, Training Time: 4.082s, Prediction Time: 2.337s
label KQLearningHJBCP, Reward 82: 74.000, Len(game): 74, Training Time: 4.082s, Prediction Time: 2.367s
label KQLearningHJBCP, Reward 83: 42.000, Len(game): 42, Training Time: 4.082s, Prediction Time: 2.384s
label KQLearningHJBCP, Reward 84: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 2.415s
label KQLearningHJBCP, Reward 85: 51.000, Len(game): 51, Training Time: 4.082s, Prediction Time: 2.436s
label KQLearningHJBCP, Reward 86: 77.000, Len(game): 77, Training Time: 4.082s, Prediction Time: 2.467s
label KQLearningHJBCP, Reward 87: 71.000, Len(game): 71, Training Time: 4.082s, Prediction Time: 2.496s
label KQLearningHJBCP, Reward 88: 73.000, Len(game): 73, Training Time: 4.082s, Prediction Time: 2.527s
label KQLearningHJBCP, Reward 89: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 2.557s
label KQLearningHJBCP, Reward 90: 47.000, Len(game): 47, Training Time: 4.082s, Prediction Time: 2.576s
label KQLearningHJBCP, Reward 91: 76.000, Len(game): 76, Training Time: 4.082s, Prediction Time: 2.607s
label KQLearningHJBCP, Reward 92: 47.000, Len(game): 47, Training Time: 4.082s, Prediction Time: 2.627s
label KQLearningHJBCP, Reward 93: 58.000, Len(game): 58, Training Time: 4.082s, Prediction Time: 2.653s
label KQLearningHJBCP, Reward 94: 80.000, Len(game): 80, Training Time: 4.082s, Prediction Time: 2.685s
label KQLearningHJBCP, Reward 95: 77.000, Len(game): 77, Training Time: 4.082s, Prediction Time: 2.716s
label KQLearningHJBCP, Reward 96: 66.000, Len(game): 66, Training Time: 4.082s, Prediction Time: 2.743s
label KQLearningHJBCP, Reward 97: 71.000, Len(game): 71, Training Time: 4.082s, Prediction Time: 2.773s
label KQLearningHJBCP, Reward 98: 80.000, Len(game): 80, Training Time: 4.082s, Prediction Time: 2.806s
label KQLearningHJBCP, Reward 99: 72.000, Len(game): 72, Training Time: 4.082s, Prediction Time: 2.835s
Computed global error Bellman mean:  2.2538068875251957e-08  iter:  2
label KQLearning, Reward 0: 17.000, Len(game): 17, Training Time: 0.005s, Prediction Time: 0.001s
Computed global error Bellman mean:  2.1038873556891403e-08  iter:  3
label KQLearning, Reward 1: 19.000, Len(game): 19, Training Time: 0.011s, Prediction Time: 0.006s
Computed global error Bellman mean:  3.3796859718234394e-08  iter:  4
label KQLearning, Reward 2: 36.000, Len(game): 36, Training Time: 0.027s, Prediction Time: 0.013s
Computed global error Bellman mean:  0.005292865730735803  iter:  5
label KQLearning, Reward 3: 41.000, Len(game): 41, Training Time: 0.056s, Prediction Time: 0.021s
Computed global error Bellman mean:  3.536763350335074e-08  iter:  5
label KQLearning, Reward 4: 22.000, Len(game): 22, Training Time: 0.099s, Prediction Time: 0.026s
Computed global error Bellman mean:  7.233596841768994e-08  iter:  5
label KQLearning, Reward 5: 23.000, Len(game): 23, Training Time: 0.148s, Prediction Time: 0.031s
Computed global error Bellman mean:  6.697590247687889e-05  iter:  5
label KQLearning, Reward 6: 57.000, Len(game): 57, Training Time: 0.224s, Prediction Time: 0.044s
Computed global error Bellman mean:  6.971595679015014e-08  iter:  5
label KQLearning, Reward 7: 59.000, Len(game): 59, Training Time: 0.363s, Prediction Time: 0.059s
Computed global error Bellman mean:  1.0071149827691969e-07  iter:  5
label KQLearning, Reward 8: 97.000, Len(game): 97, Training Time: 0.654s, Prediction Time: 0.087s
Computed global error Bellman mean:  9.729623505213322e-08  iter:  5
label KQLearning, Reward 9: 71.000, Len(game): 71, Training Time: 1.035s, Prediction Time: 0.111s
Computed global error Bellman mean:  1.228719115186594e-05  iter:  5
label KQLearning, Reward 10: 143.000, Len(game): 143, Training Time: 1.667s, Prediction Time: 0.163s
Computed global error Bellman mean:  0.22382836200452905  iter:  5
label KQLearning, Reward 11: 198.000, Len(game): 198, Training Time: 2.817s, Prediction Time: 0.238s
Computed global error Bellman mean:  0.09699429032149978  iter:  5
label KQLearning, Reward 12: 305.000, Len(game): 305, Training Time: 4.855s, Prediction Time: 0.387s
label KQLearning, Reward 13: 370.000, Len(game): 370, Training Time: 4.855s, Prediction Time: 0.593s
label KQLearning, Reward 14: 533.000, Len(game): 533, Training Time: 4.855s, Prediction Time: 0.895s
label KQLearning, Reward 15: 168.000, Len(game): 168, Training Time: 4.855s, Prediction Time: 0.989s
label KQLearning, Reward 16: 150.000, Len(game): 150, Training Time: 4.855s, Prediction Time: 1.072s
label KQLearning, Reward 17: 222.000, Len(game): 222, Training Time: 4.855s, Prediction Time: 1.197s
label KQLearning, Reward 18: 253.000, Len(game): 253, Training Time: 4.855s, Prediction Time: 1.341s
label KQLearning, Reward 19: 217.000, Len(game): 217, Training Time: 4.855s, Prediction Time: 1.463s
label KQLearning, Reward 20: 221.000, Len(game): 221, Training Time: 4.855s, Prediction Time: 1.587s
label KQLearning, Reward 21: 147.000, Len(game): 147, Training Time: 4.855s, Prediction Time: 1.671s
label KQLearning, Reward 22: 170.000, Len(game): 170, Training Time: 4.855s, Prediction Time: 1.768s
label KQLearning, Reward 23: 155.000, Len(game): 155, Training Time: 4.855s, Prediction Time: 1.854s
label KQLearning, Reward 24: 295.000, Len(game): 295, Training Time: 4.855s, Prediction Time: 2.020s
label KQLearning, Reward 25: 153.000, Len(game): 153, Training Time: 4.855s, Prediction Time: 2.107s
label KQLearning, Reward 26: 309.000, Len(game): 309, Training Time: 4.855s, Prediction Time: 2.281s
label KQLearning, Reward 27: 157.000, Len(game): 157, Training Time: 4.855s, Prediction Time: 2.370s
label KQLearning, Reward 28: 263.000, Len(game): 263, Training Time: 4.855s, Prediction Time: 2.520s
label KQLearning, Reward 29: 175.000, Len(game): 175, Training Time: 4.855s, Prediction Time: 2.622s
label KQLearning, Reward 30: 218.000, Len(game): 218, Training Time: 4.856s, Prediction Time: 2.744s
label KQLearning, Reward 31: 177.000, Len(game): 177, Training Time: 4.856s, Prediction Time: 2.846s
label KQLearning, Reward 32: 155.000, Len(game): 155, Training Time: 4.856s, Prediction Time: 2.934s
label KQLearning, Reward 33: 174.000, Len(game): 174, Training Time: 4.856s, Prediction Time: 3.034s
label KQLearning, Reward 34: 198.000, Len(game): 198, Training Time: 4.856s, Prediction Time: 3.146s
label KQLearning, Reward 35: 176.000, Len(game): 176, Training Time: 4.856s, Prediction Time: 3.247s
label KQLearning, Reward 36: 161.000, Len(game): 161, Training Time: 4.856s, Prediction Time: 3.344s
label KQLearning, Reward 37: 176.000, Len(game): 176, Training Time: 4.856s, Prediction Time: 3.444s
label KQLearning, Reward 38: 127.000, Len(game): 127, Training Time: 4.856s, Prediction Time: 3.516s
label KQLearning, Reward 39: 207.000, Len(game): 207, Training Time: 4.856s, Prediction Time: 3.635s
label KQLearning, Reward 40: 209.000, Len(game): 209, Training Time: 4.856s, Prediction Time: 3.754s
label KQLearning, Reward 41: 127.000, Len(game): 127, Training Time: 4.856s, Prediction Time: 3.829s
label KQLearning, Reward 42: 134.000, Len(game): 134, Training Time: 4.856s, Prediction Time: 3.909s
label KQLearning, Reward 43: 115.000, Len(game): 115, Training Time: 4.856s, Prediction Time: 3.977s
label KQLearning, Reward 44: 150.000, Len(game): 150, Training Time: 4.856s, Prediction Time: 4.066s
label KQLearning, Reward 45: 197.000, Len(game): 197, Training Time: 4.856s, Prediction Time: 4.181s
label KQLearning, Reward 46: 176.000, Len(game): 176, Training Time: 4.856s, Prediction Time: 4.282s
label KQLearning, Reward 47: 233.000, Len(game): 233, Training Time: 4.856s, Prediction Time: 4.414s
label KQLearning, Reward 48: 167.000, Len(game): 167, Training Time: 4.856s, Prediction Time: 4.509s
label KQLearning, Reward 49: 181.000, Len(game): 181, Training Time: 4.856s, Prediction Time: 4.614s
label KQLearning, Reward 50: 192.000, Len(game): 192, Training Time: 4.856s, Prediction Time: 4.721s
label KQLearning, Reward 51: 225.000, Len(game): 225, Training Time: 4.856s, Prediction Time: 4.853s
label KQLearning, Reward 52: 654.000, Len(game): 654, Training Time: 4.856s, Prediction Time: 5.228s
label KQLearning, Reward 53: 217.000, Len(game): 217, Training Time: 4.856s, Prediction Time: 5.363s
label KQLearning, Reward 54: 240.000, Len(game): 240, Training Time: 4.856s, Prediction Time: 5.515s
label KQLearning, Reward 55: 232.000, Len(game): 232, Training Time: 4.856s, Prediction Time: 5.658s
label KQLearning, Reward 56: 314.000, Len(game): 314, Training Time: 4.856s, Prediction Time: 5.855s
label KQLearning, Reward 57: 219.000, Len(game): 219, Training Time: 4.856s, Prediction Time: 5.984s
label KQLearning, Reward 58: 365.000, Len(game): 365, Training Time: 4.856s, Prediction Time: 6.192s
label KQLearning, Reward 59: 157.000, Len(game): 157, Training Time: 4.856s, Prediction Time: 6.283s
label KQLearning, Reward 60: 223.000, Len(game): 223, Training Time: 4.856s, Prediction Time: 6.409s
label KQLearning, Reward 61: 383.000, Len(game): 383, Training Time: 4.856s, Prediction Time: 6.631s
label KQLearning, Reward 62: 259.000, Len(game): 259, Training Time: 4.856s, Prediction Time: 6.780s
label KQLearning, Reward 63: 212.000, Len(game): 212, Training Time: 4.856s, Prediction Time: 6.903s
label KQLearning, Reward 64: 144.000, Len(game): 144, Training Time: 4.856s, Prediction Time: 6.986s
label KQLearning, Reward 65: 155.000, Len(game): 155, Training Time: 4.856s, Prediction Time: 7.076s
label KQLearning, Reward 66: 181.000, Len(game): 181, Training Time: 4.856s, Prediction Time: 7.178s
label KQLearning, Reward 67: 243.000, Len(game): 243, Training Time: 4.856s, Prediction Time: 7.318s
label KQLearning, Reward 68: 147.000, Len(game): 147, Training Time: 4.856s, Prediction Time: 7.401s
label KQLearning, Reward 69: 157.000, Len(game): 157, Training Time: 4.856s, Prediction Time: 7.490s
label KQLearning, Reward 70: 279.000, Len(game): 279, Training Time: 4.856s, Prediction Time: 7.649s
label KQLearning, Reward 71: 155.000, Len(game): 155, Training Time: 4.856s, Prediction Time: 7.738s
label KQLearning, Reward 72: 160.000, Len(game): 160, Training Time: 4.856s, Prediction Time: 7.830s
label KQLearning, Reward 73: 142.000, Len(game): 142, Training Time: 4.856s, Prediction Time: 7.910s
label KQLearning, Reward 74: 298.000, Len(game): 298, Training Time: 4.856s, Prediction Time: 8.080s
label KQLearning, Reward 75: 175.000, Len(game): 175, Training Time: 4.856s, Prediction Time: 8.180s
label KQLearning, Reward 76: 194.000, Len(game): 194, Training Time: 4.856s, Prediction Time: 8.290s
label KQLearning, Reward 77: 147.000, Len(game): 147, Training Time: 4.856s, Prediction Time: 8.373s
label KQLearning, Reward 78: 165.000, Len(game): 165, Training Time: 4.856s, Prediction Time: 8.466s
label KQLearning, Reward 79: 155.000, Len(game): 155, Training Time: 4.856s, Prediction Time: 8.553s
label KQLearning, Reward 80: 202.000, Len(game): 202, Training Time: 4.856s, Prediction Time: 8.665s
label KQLearning, Reward 81: 158.000, Len(game): 158, Training Time: 4.856s, Prediction Time: 8.756s
label KQLearning, Reward 82: 279.000, Len(game): 279, Training Time: 4.856s, Prediction Time: 8.915s
label KQLearning, Reward 83: 218.000, Len(game): 218, Training Time: 4.856s, Prediction Time: 9.039s
label KQLearning, Reward 84: 200.000, Len(game): 200, Training Time: 4.856s, Prediction Time: 9.152s
label KQLearning, Reward 85: 210.000, Len(game): 210, Training Time: 4.856s, Prediction Time: 9.270s
label KQLearning, Reward 86: 171.000, Len(game): 171, Training Time: 4.856s, Prediction Time: 9.374s
label KQLearning, Reward 87: 187.000, Len(game): 187, Training Time: 4.856s, Prediction Time: 9.482s
label KQLearning, Reward 88: 144.000, Len(game): 144, Training Time: 4.856s, Prediction Time: 9.564s
label KQLearning, Reward 89: 185.000, Len(game): 185, Training Time: 4.856s, Prediction Time: 9.669s
label KQLearning, Reward 90: 133.000, Len(game): 133, Training Time: 4.856s, Prediction Time: 9.744s
label KQLearning, Reward 91: 160.000, Len(game): 160, Training Time: 4.856s, Prediction Time: 9.834s
label KQLearning, Reward 92: 161.000, Len(game): 161, Training Time: 4.856s, Prediction Time: 9.926s
label KQLearning, Reward 93: 211.000, Len(game): 211, Training Time: 4.856s, Prediction Time: 10.046s
label KQLearning, Reward 94: 164.000, Len(game): 164, Training Time: 4.856s, Prediction Time: 10.137s
label KQLearning, Reward 95: 339.000, Len(game): 339, Training Time: 4.856s, Prediction Time: 10.328s
label KQLearning, Reward 96: 175.000, Len(game): 175, Training Time: 4.856s, Prediction Time: 10.426s
label KQLearning, Reward 97: 138.000, Len(game): 138, Training Time: 4.856s, Prediction Time: 10.504s
label KQLearning, Reward 98: 151.000, Len(game): 151, Training Time: 4.856s, Prediction Time: 10.589s
label KQLearning, Reward 99: 167.000, Len(game): 167, Training Time: 4.856s, Prediction Time: 10.684s
2

Total running time of the script: (4 minutes 8.002 seconds)

Gallery generated by Sphinx-Gallery