| Next | Higher-Order Parsing | 38 |
my %builtin = (sin => 1, cos => 1, sqrt => 1);
sub make_tokens {
my $s = shift;
my @tokens;
my $lexer = sub {
TOP: {
return undef if $s =~ m/\G\z/gxc;
return ["NUMBER", $1] if $s =~ m/\G (\d+) /gxc;
return $builtin{$1} ? ["FUNCTION", $1]
: ["VAR", $1]
if $s =~ m/\G ([A-Za-z]\w*) /gxc;
return ["^"] if $s =~ m/\G ( \^ | \*\* ) /gxc;
return ["+"] if $s =~ m/\G \+ /gxc;
return ["*"] if $s =~ m/\G \* /gxc;
return ["("] if $s =~ m/\G \( /gxc;
return [")"] if $s =~ m/\G \) /gxc;
redo TOP if $s =~ m/\G \s+ /gxc;
die "Unknown character '$1' at ..."
if $s =~ m/\G (.) /gxc;
}};
while (my $token = $lexer->()) {
push @tokens, $token;
}
return @tokens;
}
Notice how the lexer can recognize both ^ and ** and eliminate the distinction
Also notice how ** is lexed as a power operator, not as two multiplication signs
| Next | ![]() |
![]() |
Copyright © 2007 M. J. Dominus |