Changeset 67

Show
Ignore:
Timestamp:
06/06/01 06:00:56
Author:
schwern
Message:

Version 0.04

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Email-Find/trunk/Changes

    r25 r67  
    11Revision history for Perl extension Email::Find. 
     2 
     30.04  Thu May 10 00:38:08 BST 2001 
     4    * Using Tatsuhiko Miyagawa's much faster regex 
     5    - Now eating my own dog food (using Test::More for testing) 
     6    - Added test against RFC822 
    27 
    380.03  Thu Dec  7 15:17:53 EST 2000 
  • Email-Find/trunk/MANIFEST

    r6 r67  
    33Makefile.PL 
    44t/Find.t 
     5t/RFC822.t 
     6t/lib/Test/More.pm 
     7t/lib/Test/Simple.pm 
     8t/rfc822.txt 
    59lib/Email/Find.pm 
  • Email-Find/trunk/lib/Email/Find.pm

    r66 r67  
    33use strict; 
    44use vars qw($VERSION @EXPORT); 
    5 $VERSION = '0.03'; 
     5$VERSION = '0.04'; 
    66 
    77# Need qr//. 
     
    1414require Mail::Address; 
    1515 
    16 # XXX Boy, does this need to be cleaned up! 
    17  
    18 # XXX I can probably get these from a module. 
    19 # Build up basic RFC 822 BNF definitions. 
    20 use vars qw($Specials $Space $Char $Ctl $Atom_re $Specials_cheat 
    21             $Atom_cheat_re 
    22            ); 
    23 $Specials = quotemeta '()<>@,;:\\".[]'; 
    24 $Space    = '\040'; 
    25 $Char     = '\000-\177'; 
    26 $Ctl      = '\000-\037\177'; 
    27 $Atom_re  = qq/[^$Ctl$Space$Specials]+/; 
    28 $Specials_cheat = $Specials; 
    29 $Specials_cheat =~ s/\\\.//; 
    30 $Atom_cheat_re = qq/[^$Ctl$Space$Specials_cheat]+/; 
    31  
    32 # Build quoted string regex 
    33 use vars qw($Qtext_re $Qpair_re $Quoted_string_re); 
    34 $Qtext_re = '[^"\\\r]+';      # " # 
    35 $Qpair_re = qq/\\\\[$Char]/; 
    36 $Quoted_string_re = qq/"(?:$Qtext_re|$Qpair_re)*"/; 
    37  
    38 # Build domain regex. 
    39 use vars qw($Domain_ref_re $Dtext_re $Domain_literal_re $Sub_domain_re 
    40             $Domain_ref_cheat_re $Sub_domain_literal_cheat_re 
    41             $Domain_literal_cheat_re 
    42            ); 
    43 $Domain_ref_re = $Atom_re; 
    44 $Dtext_re = q/[^\[\]\\\\\r]/; 
    45 $Domain_literal_re = q/\[(?:$Dtext_re|$Qpair_re)*\]/; 
    46 $Sub_domain_re = "(?:$Domain_ref_re|$Domain_literal_re)"; 
    47 $Domain_ref_cheat_re = $Atom_cheat_re; 
    48  
    49 $Sub_domain_literal_cheat_re = "(?:$Dtext_re|$Qpair_re)*"; 
    50 $Domain_literal_cheat_re = qq/\\[$Sub_domain_literal_cheat_re\\]/; 
    51  
    52 # Build local part regex. 
    53 use vars qw($Word_re $Local_part_re $Local_part_cheat_re); 
    54 $Word_re = "(?:$Atom_re|$Quoted_string_re)+"; 
    55 $Local_part_re = qq/$Word_re(?:\\.$Word_re)*/; 
    56 $Local_part_cheat_re = qq/(?:$Atom_cheat_re|$Quoted_string_re)+/; 
     16 
     17my $esc         = '\\\\';               my $period      = '\.'; 
     18my $space       = '\040'; 
     19my $open_br     = '\[';                 my $close_br    = '\]'; 
     20my $nonASCII    = '\x80-\xff';          my $ctrl        = '\000-\037'; 
     21my $cr_list     = '\n\015'; 
     22my $qtext       = qq/[^$esc$nonASCII$cr_list\"]/; 
     23my $dtext       = qq/[^$esc$nonASCII$cr_list$open_br$close_br]/; 
     24my $quoted_pair = qq<$esc>.qq<[^$nonASCII]>; 
     25my $atom_char   = qq/[^($space)<>\@,;:\".$esc$open_br$close_br$ctrl$nonASCII]/; 
     26my $atom        = qq<$atom_char+(?!$atom_char)>; 
     27my $quoted_str  = qq<\"$qtext*(?:$quoted_pair$qtext*)*\">; 
     28my $word        = qq<(?:$atom|$quoted_str)>; 
     29my $domain_ref  = $atom; 
     30my $domain_lit  = qq<$open_br(?:$dtext|$quoted_pair)*$close_br>; 
     31my $sub_domain  = qq<(?:$domain_ref|$domain_lit)>; 
     32my $domain      = qq<$sub_domain(?:$period$sub_domain)*>; 
     33my $local_part  = qq<$word(?:$period$word)*>; 
     34 
    5735 
    5836# Finally, the address-spec regex (more or less) 
    5937use vars qw($Addr_spec_re); 
    60  $Addr_spec_re = qr/$Local_part_cheat_re\ ?\@\ ? 
    61                         (?:$Domain_ref_cheat_re| 
    62                            $Domain_literal_cheat_re) 
    63                    /x; 
     38$Addr_spec_re   = qr<$local_part\s*\@\s*$domain>; 
    6439 
    6540 
     
    224199Thanks to Jeremy Howard for his patch to make it work under 5.005. 
    225200 
     201Many thanks to Tatsuhiko Miyagawa for the much, much faster and 
     202simpler regex! 
    226203 
    227204=head1 LICENSE 
  • Email-Find/trunk/t/Find.t

    r6 r67  
    1 # Before `make install' is performed this script should be runnable with 
    2 # `make test'. After `make install' it should work as `perl test.pl' 
    3  
    4 ######################### We start with some black magic to print on failure. 
    5  
    6 # Change 1..1 below to 1..last_test_to_print . 
    7 # (It may become useful if the test is moved to ./t subdirectory.) 
    8 use strict; 
    9  
    10 use vars qw($Total_tests); 
    11  
    12 my $loaded; 
    13 my $test_num = 1; 
    14 BEGIN { $| = 1; $^W = 1; } 
    15 END {print "not ok $test_num\n" unless $loaded;} 
    16 print "1..$Total_tests\n"; 
    17 use Email::Find; 
    18 $loaded = 1; 
    19 ok(1, 'compile'); 
    20 ######################### End of black magic. 
    21  
    22 # Insert your test code below (better if it prints "ok 13" 
    23 # (correspondingly "not ok 13") depending on the success of chunk 13 
    24 # of the test code): 
    25 sub ok { 
    26     my($test, $name) = @_; 
    27     print "not " unless $test; 
    28     print "ok $test_num"; 
    29     print " - $name" if defined $name; 
    30     print "\n"; 
    31     $test_num++; 
    32 
    33  
    34 sub eqarray  { 
    35     my($a1, $a2) = @_; 
    36     return 0 unless @$a1 == @$a2; 
    37     my $ok = 1; 
    38     for (0..$#{$a1}) {  
    39         unless($a1->[$_] eq $a2->[$_]) { 
    40         $ok = 0; 
    41         last; 
    42         } 
    43     } 
    44     return $ok; 
    45 
    46  
    47 # Change this to your # of ok() calls + 1 
    48 BEGIN { $Total_tests = 1 } 
     1use lib qw(t/lib); 
     2use Test::More tests => 14; 
     3BEGIN { use_ok('Email::Find') } 
    494 
    505my %Tests; 
     
    538                  => '"@".+*@[132.205.7.51]', 
    549              'What about "@"@foo.com?' => '"@"@foo.com', 
    55               'Eli the Beared <*@qz.to>' => '*@qz.to' 
     10              'Eli the Beared <*@qz.to>' => '*@qz.to', 
     11              '"@"+*@[132.205.7.51]'    => '+*@[132.205.7.51]', 
    5612             ); 
    57  
    58     $Total_tests += (3 * keys %Tests); 
    5913} 
    6014 
    6115while( my($text, $expect) = each %Tests ) { 
    6216    my($orig_text) = $text; 
    63     ok( find_emails($text, sub { ok( $_[0]->address eq $expect );   
     17    ok( find_emails($text, sub { ok( $_[0]->address eq $expect,  
     18                                     "Found $_[1]" ); 
    6419                                 return $_[1]  
    6520                             }  
    66                    ) == 1  
     21                   ) == 1, 
     22        "  just one" 
    6723      ); 
    68     ok( $text eq $orig_text ); 
     24    ok( $text eq $orig_text,    "  and replaced" ); 
    6925} 
    7026 
    71 BEGIN { $Total_tests++ } 
    7227 
    7328# Do all the tests again as one big block of text. 
    7429my $mess_text = join "\n", keys %Tests; 
    75 ok( find_emails($mess_text, sub { return $_[1] }) == keys %Tests ); 
     30ok( find_emails($mess_text, sub { return $_[1] }) == keys %Tests, 
     31    'One big block' ); 
    7632 
    7733 
     
    7935my @FalseTests; 
    8036BEGIN { 
     37    # No tests at the moment. 
    8138    @FalseTests = ( 
    82                    '"@"+*@[132.205.7.51]' 
    8339                  ); 
    84  
    85     $Total_tests += @FalseTests * 2; 
    8640} 
    8741 
    8842foreach my $f_text (@FalseTests) { 
    8943    my $orig_text = $f_text; 
    90     ok( find_emails($f_text, sub {1}) == 0 ); 
    91     ok( $orig_text eq $f_text ); 
     44    ok( find_emails($f_text, sub {1}) == 0, "False positive: $f_text" ); 
     45    ok( $orig_text eq $f_text,              "  replaced" ); 
    9246}