# Perl Output Filter for Apache 2.0: # Automatically conceals email addresses to # prevent harvesters from fetching them. package MyApache::ObMail; use strict; use warnings; use Apache::Filter (); use Apache::RequestRec (); use APR::Table (); use Apache::Const -compile => qw(OK DECLINED); use constant BUFF_LEN => 10240; # Store apache output data sub obfuscate { # Invocation: obfuscate(data) # Conceal all mail addresses my $line = shift; my $mail_regexp = '[A-Za-z_0-9.-]+@'. '([A-Za-z_0-9-]+\'. '.)+[A-Za-z]{2,6}'; my $adr = undef; while ($line =~ /($mail_regexp)/g) { # Split address into single characters # and reassemble with spaces in between $mail = $1; $obfus = join(' ',split(//,$mail)); # Replace all occurences $line =~ s/$mail/$obfus/gi; } return $line; } sub handler { # Called by Apache. Works through the blocks # of data delivered by the httpd. my $f = shift; unless ($f->ctx) { # Test content-type on first invocation unless ($f->r->content_type =~ m!text/(html|plain)!i ) { # Only modify text/html and text/plain return Apache::DECLINED; } # Reset Content-Length calculated by the # server. We'll change the amount of data $f->r->headers_out->unset('Content-Length'); } my $leftover = $f->ctx; while ($f->read(my $buffer, BUFF_LEN)) { $buffer = $leftover.$buffer if defined $leftover; if (length($buffer) > (2*BUFF_LEN)) { # Don't wait forever for whitespace $f->print(obfuscate($buffer)); $buffer = $leftover = ""; } else { # Keep the last beginning of a word # in leftover to work only on full # addresses and not on fragments. $buffer =~ /(.*)(\s\S*)\z/gs; $leftover = $2; $f->print(obfuscate($1)); } } if ($f->seen_eos) { # End of data-stream in sight. if (defined $leftover) { $leftover=obfuscate($leftover); $f->print(scalar $leftover); } } else { # Pass remaining data to next invocation $f->ctx($leftover) if defined $leftover; } return Apache::OK; } 1;