Next | Higher-Order Parsing | 38 |
my %builtin = (sin => 1, cos => 1, sqrt => 1); sub make_tokens { my $s = shift; my @tokens; my $lexer = sub { TOP: { return undef if $s =~ m/\G\z/gxc; return ["NUMBER", $1] if $s =~ m/\G (\d+) /gxc; return $builtin{$1} ? ["FUNCTION", $1] : ["VAR", $1] if $s =~ m/\G ([A-Za-z]\w*) /gxc; return ["^"] if $s =~ m/\G ( \^ | \*\* ) /gxc; return ["+"] if $s =~ m/\G \+ /gxc; return ["*"] if $s =~ m/\G \* /gxc; return ["("] if $s =~ m/\G \( /gxc; return [")"] if $s =~ m/\G \) /gxc; redo TOP if $s =~ m/\G \s+ /gxc; die "Unknown character '$1' at ..." if $s =~ m/\G (.) /gxc; }}; while (my $token = $lexer->()) { push @tokens, $token; } return @tokens; }
Notice how the lexer can recognize both ^ and ** and eliminate the distinction
Also notice how ** is lexed as a power operator, not as two multiplication signs
Next | Copyright © 2007 M. J. Dominus |