# $DEBUG = 1; require 5.00557; use Tk; use re 'eval'; # print and exit; print "Enter the regex that you would like to debug:\n\t"; chomp($regex = ); print "Enter the target string: "; chomp($target = ); $regex =~ s/\\n/\n/g; $target =~ s/\\n/\n/g; unless ($regex =~ /\(\?\{\&(pause|match)\}\)/) { $orig_regex = $regex; $regex =~ s/((?); if ($yn !~ /^y/) { print "OK, I'll just use $orig_regex then.\n"; $regex = $orig_regex; } } my ($pre, $in, $post) = ('$`', '$&', '$\''); my @nums = map "\$$_", (1..9); my $count; pipe FROMCHILD, TOPARENT or die "pipe up: $!; aborting"; pipe FROMPARENT, TOCHILD or die "pipe down: $!; aborting"; for (\*FROMCHILD, \*TOCHILD, \*FROMPARENT, \*TOPARENT) { bless $_ => FileHandle; select $_; $|++; } select STDOUT; my $pid = fork; defined $pid or die "fork: $!; aborting"; if ($pid) { # parent close TOPARENT; close FROMPARENT; } else { close TOCHILD; close FROMCHILD; mainlooper(); exit; } while ($target =~ /$regex(?{&match})(?!)/og) { # print "$1 $2\n"; 1; } $DEBUG && print "PARENT: Exiting\n"; exit; sub pause { $DEBUG && print "PARENT: Pausing $` $& $'\n"; writestrs(\*TOCHILD, $`, $&, $', $1, $2, $3, $4, $5, $6, $7, $8, $9, $matching, ); $matching = 0; pipewait(\*FROMCHILD); } sub match { $matching = 1; $DEBUG && print "PARENT: Matched: $&\n"; &pause; } sub mainlooper { my $mw = new Tk::MainWindow; my @po = qw(-fill both -expand yes); my $div = $mw->Frame(-height => 70, -width => 300)->pack(-expand => yes); $div->Label(-textvar => \$pre, -fg => 'blue')->pack(-side => left, -padx => 4); # ` $inlabel = $div->Label(-textvar => \$in, -fg => 'red')->pack(-side => left); $div->Label(-textvar => \$post, -fg => 'blue')->pack(-side => left, -padx => 2); # ' $div->pack(-side => top); my $n; for $n (0 .. 8) { $mw->Label(-textvar => \$nums[$n])->pack(-side => top); } $mw->Label(-textvar => \$count); $button = $mw->Button(-text => 'Click to Begin', -command => \&button)->pack(@po); MainLoop; } sub button { my ($br) = @_; $button->configure(-text => 'Next'); ++$count; $DEBUG && print "CHILD: In button callback\n"; my @rest; ($pre, $in, $post, @rest) = readstrs(\*FROMPARENT); { my $succeeded = pop @rest; my $color = $succeeded ? 'green' : 'red'; $inlabel->configure(-fg => $color); } for ($i= 0; $i < @rest; $i++) { $nums[$i] = $rest[$i]; } pipekick(\*TOPARENT); $DEBUG && print "CHILD: Returning to event loop.\n"; } sub writestrs { my $fh = shift; my $n = @_; $DEBUG && do { local $" = ')('; print "PARENT: Writing (@_)\n" }; print $fh $n, ':'; local $_; for (@_) { print $fh length($_), ':', $_; # print STDOUT length($_), ':', $_; } # $fh->flush; } sub readstrs { my $fh = shift; my $n = readnum($fh); $DEBUG && print "CHILD: Reading $n strings from pipe:\n"; my @strs; while ($n--) { my $len = readnum($fh); my $s; read $fh, $s, $len; $DEBUG && print "CHILD: Read $s\n"; push @strs, $s; } @strs; } sub readnum { my $n = ''; my $fh = shift; my $c; $DEBUG && print "CHILD: Reading number from pipe:\n"; while (($c = getc($fh)) ne ':' && $c ne '') { $n .= $c; } if ($c eq '') { $DEBUG && print "CHILD: Premature end of file on pipe; aborting\n"; exit; } $DEBUG && print "CHILD: Read number $n.\n"; $n; } sub pipewait { my $pipe = shift; my $s; $DEBUG && print "PARENT: Waiting for kick.\n"; sysread $pipe, $s, 1; $DEBUG && print "PARENT: Got the kick.\n"; } sub pipekick { my $pipe = shift; $DEBUG && print "CHILD: Kicking pipe.\n"; print $pipe "x"; $pipe->flush; } sub FileHandle::flush { my ($self) = @_; my $ofh = select $self; my $ob = $|++; print $self ''; $| = $ob; select $ofh; } __DATA__ This is an *experimental* regex debugger. I wrote it today, so it isn't very good yet. The finished version will have a picture of the regex node diagram, will show all the saved state of the regex, and will have forward and backward buttons. It will have a good interface for user-settable break points. It will also have a better display. The debugger displays the part of the string that is currently being considered for a match in red. If the entire regex matches, the matched part turns green. The parts of the target string before and after the current partial match are displayed in blue. For example, suppose you are matching the string `crate' against the regex /rat/. Perl will try to match the r first, and you'll see c r ate with the `c' and the `ate' in blue, and the r in red. Then Perl will include the `a' in the matching string, and you'll see: c ra te with ra in red and the rest in blue. Then Perl will match the `t', and you'll see c rat e with `rat' in red and the c and e in blue. Finally, the `rat' will turn green to indicate that the regex matched successfully. the debugger tries to `instrument' your regex in an interesting way. Instumentation means that it adds breakpoints to your regex to make it stop at the places it thinks will be informative. Instrumentation also copies `interesting' substrings of the target string into the backreference variables and displays them in the debugger window as they change. The instrumented form of /rat/ is (r)(?{&pause})(a)(?{&pause})(t)(?{&pause}) The (r), (a), and (t) mean that those parts of the target that match the (r), (a), and (t) will be dislpayed in the debugger window. The (?{&pause})'s mean that the debugger will pause at those points to refresh the window and to wait for you to press the button before it will go on. If you don't like the way the regex is instrumented, you can tell the debugger to use the uninstrumented version. In this case you won't get any extra (...)'s and there won't be any pause points unless you put them in yourself. If your regex already contains (?{&pause}), the debugger will assume that you instrumented it yourself and will not try to instrument it further. Some examples to try: rat with target string crate e*nt with target string seventeenth dog|cat with target string domesticate a+b+ with target string aaabbbaabb The default instrumentation should work well for these examples. The code that prints out these instructions is right at the top of the program. If you comment it out, the program will function normally.