use Fcntl ':seek'; use Test::More qw(no_plan); # # Arguments: # $fh: A filehandle opened to a sorted, seekable source of data such as a file # $key: A target key # Returns: # The first record $R from the file such that $R ge $key # Or undef if there is no such record # Side effects: # $fh is left positioned at the beginning of $R sub search { my ($fh, $key) = @_; # Invariant condition: Suppose the correct final position for $fh is $N. # Then $lo <= $N < $hi at all times. my ($lo, $hi) = (0, -s $fh); while (1) { my $mid = int(($lo + $hi)/2); if ($mid) { # If $mid points into the middle of a record, # skip the rest of that record and start at the next complete record seek $fh, $mid-1, SEEK_SET; my $junk = <$fh>; } else { # Special case for $mid at the very beginning of the file seek $fh, 0, SEEK_SET; } # The curent record starts at $start my $start = tell $fh; my $rec = <$fh>; return unless defined $rec; # End of the file chomp $rec; if ($hi == $lo) { # We've come as close as possible seek $fh, $start, SEEK_SET; return $rec } if ($rec lt $key) { $lo = $mid+1 } else { $hi = $mid } } } my $dict = "./words"; open FH, "<", $dict or die $!; if ($TEST) { # Exact match is(search(\*FH, 'snob'), 'snob'); # Mismatch is(search(\*FH, 'snonk'), 'snook'); # Mismatches is(search(\*FH, 'snoba'), 'snobbery'); is(search(\*FH, 'snobb'), 'snobbery'); is(search(\*FH, 'snobbe'), 'snobbery'); is(search(\*FH, 'snobber'), 'snobbery'); # Exact match is(search(\*FH, 'snobbery'), 'snobbery'); # Mismatch is(search(\*FH, 'snobberys'), 'snobbish'); # First word in the file is(search(\*FH, ''), '10th'); is(search(\*FH, '1'), '10th'); # Second word in the file is(search(\*FH, '11'), '1st'); # Last word in the file is(search(\*FH, 'zy'), 'zygote'); is(search(\*FH, 'zygote'), 'zygote'); is(search(\*FH, 'zv'), 'zygote'); # Next-to-last word in the file is(search(\*FH, 'zu'), 'zucchini'); # Failure calls is(search(\*FH, 'zygotes'), undef); is(search(\*FH, 'zz'), undef); if ($TEST > 1) { # Now try everything open D, "<", $dict or die $!; while () { chomp; is(search(\*FH, $_), $_, $_); } } } my $dict = "/usr/dict/Web2"; open FH, "<", $dict or die $!; chomp(my @words = ); my ($su, $ss) = times; for my $key (@words) { while () { chomp; last if $_ ge $key; } seek FH, 0, SEEK_SET; print STDERR "# $key\n"; } my ($eu, $es) = times; my ($tu, $ts) = ($eu-$su, $es-$ss); my $time = $tu+$ts; printf "%12s: %5.2f %5.2f %6.2f\n", "Linear", $tu, $ts, $time; my ($su, $ss) = times; for my $key (@words) { search(\*FH, $key); } my ($eu, $es) = times; my ($tu, $ts) = ($eu-$su, $es-$ss); my $time = $tu+$ts; printf "%12s: %5.2f %5.2f %6.2f\n", "Binary", $tu, $ts, $time; __DATA__ ambulatory appetite armoire fan lien matriarch scarves suspense tinkle tuff