ok also, just saw this: https://gym.openai.com/evaluations/eval_a0YXWDc4SKeJjyTH7IrHBg/‘
it doesn’t work apparrently, but could be salvaged into something,
possibly written by this guy https://blog.otoro.net/
https://attentionagent.github.io/ “ there is no conscious perception of the visual world without attention to it“
# Using ES-HyperNEAT to try to solve the Bipedal walker.
# This attempt was not successful. Adjustment of hyperparameters is likely needed.
# A neural network is trained using NeuroEvolution of Augmenting Topologies
# The idea is from the paper: "Evolving Neural Networks through Augmenting Topologies"
# This gist is using MultiNEAT (http://multineat.com/)
import logging
import numpy as np
import pickle
import gym
import MultiNEAT as NEAT
# NEAT setup
params = NEAT.Parameters()
params.PopulationSize = 200;
params.DynamicCompatibility = True;
params.CompatTreshold = 2.0;
params.YoungAgeTreshold = 15;
params.SpeciesMaxStagnation = 100;
params.OldAgeTreshold = 35;
params.MinSpecies = 5;
params.MaxSpecies = 10;
params.RouletteWheelSelection = False;
params.MutateRemLinkProb = 0.02;
params.RecurrentProb = 0;
params.OverallMutationRate = 0.15;
params.MutateAddLinkProb = 0.08;
params.MutateAddNeuronProb = 0.01;
params.MutateWeightsProb = 0.90;
params.MaxWeight = 8.0;
params.WeightMutationMaxPower = 0.2;
params.WeightReplacementMaxPower = 1.0;
params.MutateActivationAProb = 0.0;
params.ActivationAMutationMaxPower = 0.5;
params.MinActivationA = 0.05;
params.MaxActivationA = 6.0;
params.MutateNeuronActivationTypeProb = 0.03;
params.ActivationFunction_SignedSigmoid_Prob = 0.0;
params.ActivationFunction_UnsignedSigmoid_Prob = 0.0;
params.ActivationFunction_Tanh_Prob = 1.0;
params.ActivationFunction_TanhCubic_Prob = 0.0;
params.ActivationFunction_SignedStep_Prob = 1.0;
params.ActivationFunction_UnsignedStep_Prob = 0.0;
params.ActivationFunction_SignedGauss_Prob = 1.0;
params.ActivationFunction_UnsignedGauss_Prob = 0.0;
params.ActivationFunction_Abs_Prob = 0.0;
params.ActivationFunction_SignedSine_Prob = 1.0;
params.ActivationFunction_UnsignedSine_Prob = 0.0;
params.ActivationFunction_Linear_Prob = 1.0;
params.DivisionThreshold = 0.5;
params.VarianceThreshold = 0.03;
params.BandThreshold = 0.3;
params.InitialDepth = 2;
params.MaxDepth = 3;
params.IterationLevel = 1;
params.Leo = False;
params.GeometrySeed = False;
params.LeoSeed = False;
params.LeoThreshold = 0.3;
params.CPPN_Bias = -1.0;
params.Qtree_X = 0.0;
params.Qtree_Y = 0.0;
params.Width = 1.;
params.Height = 1.;
params.Elitism = 0.1;
rng = NEAT.RNG()
rng.TimeSeed()
list = []
for i in range(0,14):
list.append((-1. +(2.*i/13.), -1., 0.))
for i in range(0,10):
list.append((-1. +(2.*i/9), -0.5, 0))
substrate = NEAT.Substrate(list,
[],
[(-1., 1., 0.), (-0.5, 1., 0.), (0.5, 1., 0.), (1., 1., 0.)])
substrate.m_allow_input_hidden_links = False;
substrate.m_allow_input_output_links = False;
substrate.m_allow_hidden_hidden_links = False;
substrate.m_allow_hidden_output_links = False;
substrate.m_allow_output_hidden_links = False;
substrate.m_allow_output_output_links = False;
substrate.m_allow_looped_hidden_links = True;
substrate.m_allow_looped_output_links = False;
substrate.m_allow_input_hidden_links = True;
substrate.m_allow_input_output_links = False;
substrate.m_allow_hidden_output_links = True;
substrate.m_allow_hidden_hidden_links = True;
substrate.m_hidden_nodes_activation = NEAT.ActivationFunction.SIGNED_SIGMOID;
substrate.m_output_nodes_activation = NEAT.ActivationFunction.UNSIGNED_SIGMOID;
substrate.m_with_distance = False;
substrate.m_max_weight_and_bias = 8.0;
def trainNetwork(env, seed):
# Training parameters
generationSize = 50
episode_count = 10
max_steps = 1000
# Max reward for environments that reward 1 for each succesfull step (e.g. CartPole-v0)
max_reward = episode_count * max_steps
def evaluate(genome):
net = NEAT.NeuralNetwork()
genome.BuildESHyperNEATPhenotype(net, substrate, params)
cum_reward = 0
for i in xrange(episode_count):
ob = env.reset()
net.Flush()
for j in xrange(max_steps):
# get next action
net.Input(ob)
net.Activate()
o = net.Output()
action = np.clip(o,-1,1)
ob, reward, done, _ = env.step(action)
cum_reward += reward
if done:
break
return cum_reward
# Create initial genome
g = NEAT.Genome(0, 24, 0, 4, False,
NEAT.ActivationFunction.TANH, NEAT.ActivationFunction.TANH, 0, params)
pop = NEAT.Population(g, params, True, 1.0, seed)
current_best = None
for generation in range(generationSize):
for i_episode, genome in enumerate(NEAT.GetGenomeList(pop)):
reward = evaluate(genome)
if reward == max_reward:
return pickle.dumps(genome)
genome.SetFitness(reward)
print('Generation: {}, max fitness: {}'.format(generation,
max((x.GetFitness() for x in NEAT.GetGenomeList(pop)))))
current_best = pickle.dumps(pop.GetBestGenome())
pop.Epoch()
return current_best
env_name = "BipedalWalker"
if __name__ == '__main__':
# Test the algorithm multiple times
for test_case in xrange(0, 1):
# setup logger, environment and monitor
logger = logging.getLogger()
logger.setLevel(logging.INFO)
env = gym.make("%s-v2" % env_name)
outdir = "/tmp/neat-%s-results-%d" % (env_name, test_case)
env.monitor.start(outdir, force=True)
# Train network
learned = trainNetwork(env, test_case)
# Test trained network on 1000 episodes
learned_genome = pickle.loads(learned)
net = NEAT.NeuralNetwork()
learned_genome.BuildESHyperNEATPhenotype( net,substrate, params)
episode_count = 1000
max_steps = 1000
for i in xrange(episode_count):
ob = env.reset()
net.Flush()
for j in xrange(max_steps):
# get next action
net.Input(ob)
net.Activate()
o = net.Output()
action = np.clip(o,-1,1)
ob, reward, done, _ = env.step(action)
if done:
break
# Dump result info to disk
env.monitor.close()