Listing 10: best-search.pl. This program performs a Web search through HTML and text files while ignoring directories containing .nosearch files, allowing for three types of searches ("and", "or", and phrase), and ignoring the contents of HTML tags. #!/usr/bin/perl -w use strict; use diagnostics; use File::Find; use CGI; use CGI::Carp qw|fatalsToBrowser|; # Which directory should start the search? my $search_root = "/usr/local/apache/htdocs"; # What is the beginning of every URL? my $url_origin = "http://localhost"; # Slurp up files in one fell swoop undef $/; # What directories should we avoid? my %ignore_directory = (); # Create an instance of CGI my $query = new CGI; # Send a MIME header print $query->header("text/html"); # Get the text pattern for which to search my $pattern = $query->param("pattern"); # What kind of search will we run? my $search_type = $query->param("type"); # How many matches did we find? my $total_matches = 0; # Make sure that $pattern is defined unless ($pattern) { print $query->start_html(-title => "No pattern named"); print "
You must enter a pattern!
"; print $query->end_html; exit; } # Start the HTML output print $query->start_html(-title => "Search results"); print qq{Results of a $search_type search for "$pattern":
\n }; # Start an unordered list print "$total_matches $match_or_matches found.
\n"; print $query->end_html; # ------------------------------------------------------------ # Subroutine that searches through files for matches sub find_matches { # Return if we have already marked this directory as ignorable return if ($ignore_directory{$File::Find::dir}); # Mark the directory as ignorable ... $ignore_directory{$File::Find::dir} = 1 if (($_ eq ".nosearch") || # ... if this file is .nosearch (-e ".nosearch") || # ... if there is .nosearch in this dir (-e "../.nosearch")); # ... if there is .nosearch up one dir # Make sure that this is an HTML or text return unless (m/\.html?$/i or m/\.te?xt$/i); # Get the filename my $filename = $_; # Open the file, and search through its contents if (open FILE, $filename) { # Turn the filename into a viable URL my $url = "$File::Find::dir/$filename"; $url =~ s/$search_root/$url_origin/; # Get the file my $contents = (