OpenAI Gym MultiNEAT – GALLUS GALLUS ROBOTICUS

ok also, just saw this: https://gym.openai.com/evaluations/eval_a0YXWDc4SKeJjyTH7IrHBg/‘
it doesn’t work apparrently, but could be salvaged into something,
possibly written by this guy https://blog.otoro.net/
https://attentionagent.github.io/ “ there is no conscious perception of the visual world without attention to it“
# Using ES-HyperNEAT to try to solve the Bipedal walker.
# This attempt was not successful. Adjustment of hyperparameters is likely needed.

# A neural network is trained using NeuroEvolution of Augmenting Topologies
# The idea is from the paper: "Evolving Neural Networks through Augmenting Topologies"
# This gist is using MultiNEAT (http://multineat.com/)

import logging
import numpy as np
import pickle

import gym

import MultiNEAT as NEAT

# NEAT setup
params = NEAT.Parameters()
params.PopulationSize = 200;

params.DynamicCompatibility = True;
params.CompatTreshold = 2.0;
params.YoungAgeTreshold = 15;
params.SpeciesMaxStagnation = 100;
params.OldAgeTreshold = 35;
params.MinSpecies = 5;
params.MaxSpecies = 10;
params.RouletteWheelSelection = False;

params.MutateRemLinkProb = 0.02;
params.RecurrentProb = 0;
params.OverallMutationRate = 0.15;
params.MutateAddLinkProb = 0.08;
params.MutateAddNeuronProb = 0.01;
params.MutateWeightsProb = 0.90;
params.MaxWeight = 8.0;
params.WeightMutationMaxPower = 0.2;
params.WeightReplacementMaxPower = 1.0;

params.MutateActivationAProb = 0.0;
params.ActivationAMutationMaxPower = 0.5;
params.MinActivationA = 0.05;
params.MaxActivationA = 6.0;

params.MutateNeuronActivationTypeProb = 0.03;

params.ActivationFunction_SignedSigmoid_Prob = 0.0;
params.ActivationFunction_UnsignedSigmoid_Prob = 0.0;
params.ActivationFunction_Tanh_Prob = 1.0;
params.ActivationFunction_TanhCubic_Prob = 0.0;
params.ActivationFunction_SignedStep_Prob = 1.0;
params.ActivationFunction_UnsignedStep_Prob = 0.0;
params.ActivationFunction_SignedGauss_Prob = 1.0;
params.ActivationFunction_UnsignedGauss_Prob = 0.0;
params.ActivationFunction_Abs_Prob = 0.0;
params.ActivationFunction_SignedSine_Prob = 1.0;
params.ActivationFunction_UnsignedSine_Prob = 0.0;
params.ActivationFunction_Linear_Prob = 1.0;

params.DivisionThreshold = 0.5;
params.VarianceThreshold = 0.03;
params.BandThreshold = 0.3;
params.InitialDepth = 2;
params.MaxDepth = 3;
params.IterationLevel = 1;
params.Leo = False;
params.GeometrySeed = False;
params.LeoSeed = False;
params.LeoThreshold = 0.3;
params.CPPN_Bias = -1.0;
params.Qtree_X = 0.0;
params.Qtree_Y = 0.0;
params.Width = 1.;
params.Height = 1.;
params.Elitism = 0.1;

rng = NEAT.RNG()
rng.TimeSeed()

list = []

for i in range(0,14):
	list.append((-1. +(2.*i/13.), -1., 0.))

for i in range(0,10):
	list.append((-1. +(2.*i/9), -0.5, 0))


substrate = NEAT.Substrate(list,
                           [],
                           [(-1., 1., 0.), (-0.5, 1., 0.), (0.5, 1., 0.), (1., 1., 0.)])

substrate.m_allow_input_hidden_links = False;
substrate.m_allow_input_output_links = False;
substrate.m_allow_hidden_hidden_links = False;
substrate.m_allow_hidden_output_links = False;
substrate.m_allow_output_hidden_links = False;
substrate.m_allow_output_output_links = False;
substrate.m_allow_looped_hidden_links = True;
substrate.m_allow_looped_output_links = False;

substrate.m_allow_input_hidden_links = True;
substrate.m_allow_input_output_links = False;
substrate.m_allow_hidden_output_links = True;
substrate.m_allow_hidden_hidden_links = True;

substrate.m_hidden_nodes_activation = NEAT.ActivationFunction.SIGNED_SIGMOID;
substrate.m_output_nodes_activation = NEAT.ActivationFunction.UNSIGNED_SIGMOID;

substrate.m_with_distance = False;

substrate.m_max_weight_and_bias = 8.0;


def trainNetwork(env, seed):
    # Training parameters
    generationSize = 50
    episode_count = 10
    max_steps = 1000
    # Max reward for environments that reward 1 for each succesfull step (e.g. CartPole-v0)
    max_reward = episode_count * max_steps

    def evaluate(genome):
        net = NEAT.NeuralNetwork()
        genome.BuildESHyperNEATPhenotype(net, substrate, params)

        cum_reward = 0

        for i in xrange(episode_count):
            ob = env.reset()
            net.Flush()

            for j in xrange(max_steps):
                # get next action
                net.Input(ob)
                net.Activate()
                o = net.Output()
                action = np.clip(o,-1,1)
                ob, reward, done, _ = env.step(action)
                cum_reward += reward
                if done:
                    break

        return cum_reward

    # Create initial genome
    g = NEAT.Genome(0, 24, 0, 4, False, 
                    NEAT.ActivationFunction.TANH, NEAT.ActivationFunction.TANH, 0, params)
    pop = NEAT.Population(g, params, True, 1.0, seed)

    current_best = None

    for generation in range(generationSize):
        for i_episode, genome in enumerate(NEAT.GetGenomeList(pop)):
            reward = evaluate(genome)

            if reward == max_reward:
                return pickle.dumps(genome)

            genome.SetFitness(reward)

        print('Generation: {}, max fitness: {}'.format(generation,
                            max((x.GetFitness() for x in NEAT.GetGenomeList(pop)))))
        current_best = pickle.dumps(pop.GetBestGenome())
        pop.Epoch()


    return current_best

env_name = "BipedalWalker"

if __name__ == '__main__':
    # Test the algorithm multiple times
    for test_case in xrange(0, 1):
        # setup logger, environment and monitor
        logger = logging.getLogger()
        logger.setLevel(logging.INFO)
        env = gym.make("%s-v2" % env_name)
        outdir = "/tmp/neat-%s-results-%d" % (env_name, test_case)
        env.monitor.start(outdir, force=True)

        # Train network
        learned = trainNetwork(env, test_case)

        # Test trained network on 1000 episodes
        learned_genome = pickle.loads(learned)
        net = NEAT.NeuralNetwork()
        learned_genome.BuildESHyperNEATPhenotype( net,substrate, params)

        episode_count = 1000
        max_steps = 1000

        for i in xrange(episode_count):
            ob = env.reset()
            net.Flush()

            for j in xrange(max_steps):
                # get next action
                net.Input(ob)
                net.Activate()
                o = net.Output()
                action = np.clip(o,-1,1)
                ob, reward, done, _ = env.step(action)
                if done:
                    break


        # Dump result info to disk
        env.monitor.close()