BEST: 'peek' with fitness 41
BEST: 'peek' with fitness 41
BEST: 'peek' with fitness 41
BEST: 'peek' with fitness 41
BEST: 'peek' with fitness 41
BEST: '{const 9}' with fitness 37
BEST: '{const 9}' with fitness 37
BEST: '{const 9}' with fitness 37
BEST: '{const 9}' with fitness 37
BEST: 'mod dup *' with fitness 3
BEST: 'mod dup *' with fitness 3
BEST: 'mod dup *' with fitness 3
BEST: 'dup *' with fitness 2This is the code: # Genetic programming in Tcl, using a stack based approach.
# Copyright (C) 2004 Salvatore Sanfilippo <antirez@invece.org>
# This code is released under the BSD license.
################################################################################
# Virtual Machine
################################################################################
# Define a simple stack virtual machine.
# It's as simple as possible, and there is no program that will
# cause an error. Every program composed of valid words is valid.
# The following is a list populated by the [instr] procedure.
set ::instructions {}
# Define a new instruction for the VM
proc instr {name arglist body} {
lappend ::instructions $name
proc $name $arglist $body
}
# Prepare the VM state for the execution of a new program
proc init stackval {
set ::stack $stackval ;# The VM stack
set ::ip -1 ;# The VM istruction pointer
}
# Push and pop from the VM stack.
proc push element {
lappend ::stack $element
}
proc pop {} {
set element [lindex $::stack end]
set ::stack [lrange [lindex [list $::stack [unset ::stack]] 0] 0 end-1]
return $element
}
# Check if the stack length is at least 'n', otherwise
# force the caller procedure to return.
proc needlen n {
if {[llength $::stack] < $n} {
return -code return
}
}
# VM instructions
instr + {} {
needlen 2
push [expr {[pop]+[pop]}]
}
instr - {} {
needlen 2
push [expr {[pop]-[pop]}]
}
instr * {} {
needlen 2
push [expr {[pop]*[pop]}]
}
proc divmod op {
needlen 2
set a [pop]
set b [pop]
if {!$b} {
push $b
push $a
return
}
push [expr "$a $op $b"]
}
instr / {} {divmod /}
instr mod {} {divmod %}
instr dup {} {
needlen 1
set a [pop]
push $a
push $a
}
instr dup2 {} {
needlen 2
set a [pop]
set b [pop]
push $b
push $a
push $b
push $a
}
instr swap {} {
needlen 2
set a [pop]
set b [pop]
push $a
push $b
}
instr drop {} {
needlen 1
pop
}
instr rot {} {
needlen 3
set c [pop]
set b [pop]
set a [pop]
push $c
push $a
push $b
}
instr peek {} {
needlen 2
push [lindex $::stack end-1]
}
instr > {} {
needlen 2
push [expr {[pop]>[pop]}]
}
instr < {} {
needlen 2
push [expr {[pop]<[pop]}]
}
instr == {} {
needlen 2
push [expr {[pop]==[pop]}]
}
instr jz {{n {-10 10}}} {
needlen 1
if {[pop] == 0} {
incr ::ip $n
if {$::ip < -1} {
set ::ip -1
}
}
}
instr jnz {{n {-10 10}}} {
needlen 1
if {[pop] != 0} {
incr ::ip $n
if {$::ip < -1} {
set ::ip -1
}
}
}
# Nop istruction is important to kill some istruction by mutation.
instr nop {} {
}
# Return if zero
instr retz {} {
needlen 1
if {[pop] == 0} {
set ::ip 100000
}
}
# Return if nonzero
instr retnz {} {
needlen 1
if {[pop] != 0} {
set ::ip 100000
}
}
# Reiterate the program if zero
instr againifz {} {
needlen 1
if {[pop] == 0} {
set ::ip -1
}
}
# non-zero version
instr againifnz {} {
needlen 1
if {[pop] != 0} {
set ::ip -1
}
}
# Not
instr not {} {
needlen 1
push [expr {![pop]}]
}
instr const {{n {-10 10}}} {
push $n
}
# Run the program
proc run {prg stack {maxinstr 100}} {
init $stack
set instrcount 0 ; # Counter used to limit the total program run time.
while 1 {
incr ::ip
set instr [lindex $prg $::ip]
if {$instr eq {}} break
if {[llength $instr] == 1} {
$instr
} else {
[lindex $instr 0] [lindex $instr 1]
}
incr instrcount
if {$instrcount > $maxinstr} break
}
return $::stack
}
################################################################################
# The evolution engine
################################################################################
# Generate a random integer in the min-max (both included) interval.
proc rand {min max} {
expr {$min+int(rand()*(($max-$min)+1))}
}
# Returns a random element from the list
proc lrand list {
lindex $list [expr {int(rand()*[llength $list])}]
}
# Returns a random instruction
proc randinstr {} {
set instr [lrand $::instructions]
set arglist [info args $instr]
if {$arglist eq {}} {
return $instr
} else {
info default $instr [lindex $arglist 0] l
foreach {min max} $l break
return [list $instr [rand $min $max]]
}
}
# Create a random program of length 'n'
proc randprog n {
while {[incr n -1] >= 0} {
lappend prg [randinstr]
}
return $prg
}
# Create an initial population of programs
# of the specified number of individuals with length in
# the specified range.
proc randpopulation {n minlen maxlen} {
while {[incr n -1] >= 0} {
lappend result [randprog [rand $minlen $maxlen]]
}
return $result
}
# Two points crossover. This is used to create two offsprings
# from two programs. An example about how it works:
#
# Given two programs: {A B C D E F} and {1 2 3 4 5 6 7 8 9}
# We identify two random points in both the programs:
#
# {A B C D E F}
# ^ ^
#
# {1 2 3 4 5 6 7 8 9}
# ^ ^
#
# The two offsprings are created using the external part of
# the first and the internal part of the second, and vice versa,
# So the first offspring will be:
#
# {A 1 2 3 4 5 6 E F}
#
# And the second
#
# {B C D 7 8 9}
#
# The input programs are 'a' and 'b', the two crossovers are
# returnes as a two elements list.
proc crossover {a b} {
# Get the four crossover points
set a0 [rand 0 [expr {[llength $a]-1}]]
set a1 [rand 0 [expr {[llength $a]-1}]]
set b0 [rand 0 [expr {[llength $b]-1}]]
set b1 [rand 0 [expr {[llength $b]-1}]]
# Swap the two points if needed to be sure a0>=a1 and b0>=b1
if {$a0 > $a1} {set t $a0; set a0 $a1; set a1 $t}
if {$b0 > $b1} {set t $b0; set b0 $b1; set b1 $t}
# Get the left/center/right part of every program
set aleft [lrange $a 0 [expr {$a0-1}]]
set acenter [lrange $a $a0 $a1]
set aright [lrange $a [expr {$a1+1}] end]
set bleft [lrange $b 0 [expr {$b0-1}]]
set bcenter [lrange $b $b0 $b1]
set bright [lrange $b [expr {$b1+1}] end]
# Now create the crossovers by mean of list contatenation
set x0 [concat $aleft $bcenter $aright]
set x1 [concat $bleft $acenter $bright]
list $x0 $x1
}
# Given a program returns a mutated one where every istruction
# of the original program will be substituted (or a new one
# inserted just after it) with the specified probability '$p'.
proc mutate {program prob} {
for {set i 0} {$i < [llength $program]} {incr i} {
if {[expr {rand()}] <= $prob} {
lset program $i [randinstr]
}
if {[expr {rand()}] <= $prob} {
set program [linsert $program $i [randinstr]]
}
}
return $program
}
# The core of the GP engine.
# Creates a random population of the specified size, and starts
# the evolution process rating every program using the 'fitness function'
# specified. At every itearation 1/3 of the population having the
# best fitness is used to create another 1/3 of the population
# by offsprings, and another 1/3 by simple mutation of the original
# programs. The evolution is then reiterated.
#
# The fitness function should return an integer representing the
# amount of error in the computation done by the program.
# It takes as input an individual. Usually the fitness function
# will use the "run" procedure to execute the script with a
# significant input stack and will rate the program observing the
# output.
#
# At every iteration, the individual with best fitness is printed
# on the screen.
#
# Parameters specification:
#
# individuals: The number of individuals in the whole population.
# This number is approximated to a multiple of 3.
# len: Max length of individuals of the initial population.
# fitnessfunc: The fitness function (a Tcl procedure name).
# mutprob: Mutation probability used for the 'mutate' procedure.
proc evolve {individuals len fitnessfunc mutprob} {
set population [randpopulation $individuals 1 $len]
while 1 {
# Run every program, and populate a list with it and its fitness.
set res {}
foreach prg $population {
set fitness [$fitnessfunc $prg]
lappend res [list $fitness $prg]
}
# Sort the individuals by fitness (low fitness == better)
set sorted [lsort -integer -index 0 $res]
# Get the lead population
set l [expr {[llength $sorted]/3}]
set leaders [lrange $sorted 0 [expr {$l-1}]]
# Generate another 1/3 of population by offsprings of random leaders.
set offsprings {}
while 1 {
set x [rand 0 [expr {[llength $leaders]-1}]]
set parent0 [lindex $leaders $x 1]
set x [rand 0 [expr {[llength $leaders]-1}]]
set parent1 [lindex $leaders $x 1]
foreach {offspring0 offspring1} [crossover $parent0 $parent1] break
lappend offsprings $offspring0
if {[llength $offsprings] == $l} break
lappend offsprings $offspring1
if {[llength $offsprings] == $l} break
}
set mutated {}
# Generate the last 1/3 of population mutating the leaders.
foreach leader $leaders {
lappend mutated [mutate [lindex $leader 1] $mutprob]
}
# Glue the three populations (leaders, offsprings, mutated) to
# create the population for the next iteration.
set new {}
foreach leader $leaders {
lappend new [lindex $leader 1]
}
set population [concat $new $offsprings $mutated]
# Print the best individual in this iteration:
puts "BEST: '[lindex $leaders 0 1]' with fitness [lindex $leaders 0 0]"
# Print the population
#puts $population
if 0 {
foreach p $population {
puts $p
}
}
}
}
################################################################################
# Usage example. Evolve a program that computes the square of a number
# The best program is "DUP *".
################################################################################
# Calculate the fitness.
# Best fitness is 2 (no errors with all the inputs, program len == 2).
proc squareFitness prg {
set fitness 0
foreach i {1 2 3 4 5} o {1 4 9 16 25} {
set stack [run $prg $i]
set result [lindex $stack end]
if {$result eq {}} {
incr fitness 50
} else {
set delta [expr {abs($o-$result)}]
if {$delta > 1000} {
# anti overflow
set delta 1000
}
incr fitness $delta
}
}
return [expr {$fitness+[llength $prg]}]
}
# Uncomment the following to run this example
evolve 90 5 squareFitness 0.1See also Brute force meets Goedel
Harm Olthof: This is not what is generally considered with the phrase Genetic Programming (GP) In GP you have a tree: each node is a function or a terminal (constant) The parameters of the function are the branches of the subtrees, which is a function or a terminal. You start with the root and work your way through the branches, resulting in one nested call to the main function in the root. Take a population of trees and in the evolutionary cycle exchange subbranches between the trees. Trees with higher fitness have higher change to exchange their subbranches with other trees. see [1]. This is not exacty the same as Genetic Algorithms: see eg. Una-May O'Reilly thesis. ([2])
Note that the tree is just an implementation detail. To use the tree or the stack is the same, and actually there is quite a lot of literature about stack-based GP. See for example [3]. Both with the tree or the stack-based approach programs are mixed together, and resulting programs are always valid. With the stack-based approach GP is more directly related to GA because the program looks a lot like random "data".
AM (6 april 2005) Some musings:
- How to randomly select floating-point numbers - the problem is that very small and very large values may be necessary - you need to come up with a decent distribution...
- Converting a stack-based program into a nice little mathematical formula may not be trivial
- A nice little exercise: let the script come up with a solution for squaring a number without *
- I wonder what happens if the cost (fitness is not very intuitive a word here) does not depend on the size ...

