#!/usr/bin/perl -w #-d:DProf use strict; use constant STR_LENGTH => 20; use constant GENERATIONS => 50; use constant MAX_TOKENS => STR_LENGTH-5; use constant MAX_SCORE => STR_LENGTH+MAX_TOKENS; use PPI::Tokenizer; use AI::Genetic; use POSIX 'ceil'; use Safe; { my $cpt = new Safe; sub fitness { local $_; # PPI::Tokenizer clobbers $_, which breaks AI::Genetic my ($chars) = shift; shift; my $s = join '', @$chars; $cpt->reval('return;' . $s); ## low random score (0-4) and return if it fails to compile if ($@ ne "") { return ceil(rand()*10) % 5; } my $ret=STR_LENGTH; ## otherwise, tokenize it and return the number of tokens, which, ## with any luck, has /something/ to do with the as-yet undefined ## ``awesomeness'' of the code. my $Tokenizer = PPI::Tokenizer->new(\$s); $ret += scalar @{ $Tokenizer->all_tokens }; ## Without this block, the function selects for strings where /every/ ## char is a token. We want to include multi-character tokens as well. ## This gives a string with STR_LENGTH tokens the same score as one ## with MAX_TOKENS tokens. if ($ret > MAX_TOKENS + STR_LENGTH) { $ret = MAX_TOKENS + STR_LENGTH; } ## Also do this. $ret-=(STR_LENGTH - 5) if (substr($s, 0, 1) eq '='); # POD is cheating! $ret-=8 if ($s =~ /<[^>]{8,}>/); # significant portion of code is a file(handle|glob) $ret-=8 if ($s =~ /\?[^?]{8,}\?/); # significant portion of code is a ?..? regex return $ret; } } my $ga = new AI::Genetic( -fitness => \&fitness, -type => 'listvector', ); my @chars = ( 32..34,36..64,91..126 ); $_ = chr($_) for @chars; $ga->init([ map { [@chars] } 1..STR_LENGTH ]); $ga->evolve('randomTwoPoint', GENERATIONS); my $chars = $ga->getFittest->genes();; print "Best score = ", $ga->getFittest->score() . "/" . MAX_SCORE . "\n"; my $s = join '',@$chars; print "$s\n\n"; print "Deparsed:\n------------\n"; system('perl', '-MO=Deparse', '-e', $s); print "\n\nTokenized:\n------------\n"; my $Tokenizer = PPI::Tokenizer->new(\$s); my $i=0; while ($Tokenizer->get_token()) { print "$i: $_\n"; $i++; }