Changeset 2239

Show
Ignore:
Timestamp:
05/09/07 16:54:30
Author:
miyagawa
Message:

API CHANGE: Now scraper {} returns Web::Scraper object, not the closure.
You should call ->scrape() method to get the data back.

I loved the fact that it returns closure, but technically it doesn't
need to be, and this is actually more compatible with scrapi.rb and
less surprising to people.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Web-Scraper/trunk/eg/ebay-auction.pl

    r2231 r2239  
    2121}; 
    2222 
    23 my $auctions = $ebay->( URI->new("http://search.ebay.com/apple-ipod-nano_W0QQssPageNameZWLRS") ); 
     23my $auctions = $ebay->scrape( URI->new("http://search.ebay.com/apple-ipod-nano_W0QQssPageNameZWLRS") ); 
    2424 
    2525use YAML; 
  • Web-Scraper/trunk/eg/extract-links.pl

    r2225 r2239  
    1313}; 
    1414 
    15 my $links = $scraper->(URI->new($uri)); 
     15my $links = $scraper->scrape(URI->new($uri)); 
    1616use YAML; 
    1717warn Dump $links; 
  • Web-Scraper/trunk/eg/hatena-keyword.pl

    r2231 r2239  
    1414}; 
    1515 
    16 my $res = $keyword->(URI->new("http://d.hatena.ne.jp/keyword/%BA%B0%CC%EE%A4%A2%A4%B5%C8%FE")); 
     16my $res = $keyword->scrape(URI->new("http://d.hatena.ne.jp/keyword/%BA%B0%CC%EE%A4%A2%A4%B5%C8%FE")); 
    1717 
    1818use YAML; 
  • Web-Scraper/trunk/eg/twitter-friends.pl

    r2225 r2239  
    1818}; 
    1919 
    20 my $friends = $twitter->($uri); 
     20my $friends = $twitter->scrape($uri); 
    2121 
    2222use YAML; 
  • Web-Scraper/trunk/lib/Web/Scraper.pm

    r2235 r2239  
    3030sub scraper(&) { 
    3131    my($coderef) = @_; 
     32    bless { code => $coderef }, __PACKAGE__; 
     33} 
    3234 
    33     sub
    34         my $stuff = shift; 
    35         my($html, $tree)
     35sub scrape
     36    my $self = shift; 
     37    my($stuff) = @_
    3638 
    37         if (blessed($stuff) && $stuff->isa('URI')) { 
    38             require HTTP::Response::Encoding; 
    39             my $ua  = __ua; 
    40             my $res = $ua->get($stuff); 
    41             if ($res->is_success) { 
    42                 $html = $res->decoded_content; 
    43             } else { 
    44                 croak "GET $stuff failed: ", $res->status_line; 
    45             } 
    46         } elsif (blessed($stuff) && $stuff->isa('HTML::Element')) { 
    47             $tree = $stuff->clone; 
    48         } elsif (ref($stuff) && ref($stuff) eq 'SCALAR') { 
    49             $html = $$stuff; 
     39    my($html, $tree); 
     40 
     41    if (blessed($stuff) && $stuff->isa('URI')) { 
     42        require HTTP::Response::Encoding; 
     43        my $ua  = __ua; 
     44        my $res = $ua->get($stuff); 
     45        if ($res->is_success) { 
     46            $html = $res->decoded_content; 
    5047        } else { 
    51             $html = $stuff
     48            croak "GET $stuff failed: ", $res->status_line
    5249        } 
     50    } elsif (blessed($stuff) && $stuff->isa('HTML::Element')) { 
     51        $tree = $stuff->clone; 
     52    } elsif (ref($stuff) && ref($stuff) eq 'SCALAR') { 
     53        $html = $$stuff; 
     54    } else { 
     55        $html = $stuff; 
     56    } 
    5357 
    54         $tree ||= do { 
    55             my $t = HTML::TreeBuilder::XPath->new; 
    56             $t->parse($html); 
    57             $t; 
    58         }; 
     58    $tree ||= do { 
     59        my $t = HTML::TreeBuilder::XPath->new; 
     60        $t->parse($html); 
     61        $t; 
     62    }; 
    5963 
    60         my $stash = {}; 
    61         no warnings 'redefine'; 
    62         local *process       = create_process(0, $tree, $stash); 
    63         local *process_first = create_process(1, $tree, $stash); 
     64    my $stash = {}; 
     65    no warnings 'redefine'; 
     66    local *process       = create_process(0, $tree, $stash); 
     67    local *process_first = create_process(1, $tree, $stash); 
    6468 
    65         local *result = sub { 
    66             my @keys = @_; 
     69    local *result = sub { 
     70        my @keys = @_; 
    6771 
    68             if (@keys == 1) { 
    69                 return $stash->{$keys[0]}; 
    70             } else { 
    71                 my %res; 
    72                 @res{@keys} = @{$stash}{@keys}; 
    73                 return \%res; 
    74            
    75         }; 
     72        if (@keys == 1) { 
     73            return $stash->{$keys[0]}; 
     74        } else { 
     75            my %res; 
     76            @res{@keys} = @{$stash}{@keys}; 
     77            return \%res; 
     78       
     79    }; 
    7680 
    77         my $ret = $coderef->($tree); 
     81    my $ret = $self->{code}->($tree); 
    7882 
    79         # check user specified return value 
    80         return $ret if $ret; 
     83    # check user specified return value 
     84    return $ret if $ret; 
    8185 
    82         return $stash; 
    83     }; 
     86    return $stash; 
    8487} 
    8588 
     
    100103                } 
    101104            } elsif ($key =~ s!\[\]$!!) { 
    102                 $stash->{$key} = [ map get_value($_, $val), @nodes ]; 
     105                $stash->{$key} = [ map __get_value($_, $val), @nodes ]; 
    103106            } else { 
    104                 $stash->{$key} = get_value($nodes[0], $val); 
     107                $stash->{$key} = __get_value($nodes[0], $val); 
    105108            } 
    106109        } 
     
    110113} 
    111114 
    112 sub get_value { 
     115sub __get_value { 
    113116    my($node, $val) = @_; 
    114117 
    115118    if (ref($val) && ref($val) eq 'CODE') { 
    116119        return $val->($node); 
     120    } elsif (blessed($val) && $val->isa('Web::Scraper')) { 
     121        return $val->scrape($node); 
    117122    } elsif ($val =~ s!^@!!) { 
    118123        return $node->attr($val); 
     
    120125        return $node->as_text; 
    121126    } else { 
    122         Carp::cluck "WTF"; 
     127        Carp::croak "Unknown value type $val"; 
    123128    } 
    124129} 
    125130 
    126 sub stub { 
     131sub __stub { 
    127132    my $func = shift; 
    128133    return sub { 
     
    131136} 
    132137 
    133 *process       = stub 'process'; 
    134 *process_first = stub 'process_first'; 
    135 *result        = stub 'result'; 
     138*process       = __stub 'process'; 
     139*process_first = __stub 'process_first'; 
     140*result        = __stub 'result'; 
    136141 
    1371421; 
     
    165170  }; 
    166171 
    167   $ebay->( URI->new("http://search.ebay.com/apple-ipod-nano_W0QQssPageNameZWLRS") ); 
     172  $ebay->scrape( URI->new("http://search.ebay.com/apple-ipod-nano_W0QQssPageNameZWLRS") ); 
    168173 
    169174=head1 DESCRIPTION 
  • Web-Scraper/trunk/t/01_process.t

    r2232 r2239  
    1717        result 'text'; 
    1818    }; 
    19     my $text = $s->($block->html); 
     19    my $text = $s->scrape($block->html); 
    2020    is $text, $block->expected, $block->name; 
    2121}; 
  • Web-Scraper/trunk/t/02_process_multi.t

    r2232 r2239  
    1717        result 'text'; 
    1818    }; 
    19     my $texts = $s->($block->html); 
     19    my $texts = $s->scrape($block->html); 
    2020    is_deeply $texts, $block->expected, $block->name; 
    2121}; 
  • Web-Scraper/trunk/t/03_scraper_nest.t

    r2233 r2239  
    2020    }; 
    2121 
    22     my $res = $s->($block->html); 
     22    my $res = $s->scrape($block->html); 
    2323    is_deeply $res, $block->expected, $block->name; 
    2424}; 
  • Web-Scraper/trunk/t/04_callback.t

    r2234 r2239  
    2020        }; 
    2121    }; 
    22     $s->($block->html); 
     22    $s->scrape($block->html); 
    2323    is_deeply \@value, $block->expected, $block->name; 
    2424}; 
  • Web-Scraper/trunk/t/05_process_first.t

    r2234 r2239  
    2020        }; 
    2121    }; 
    22     $s->($block->html); 
     22    $s->scrape($block->html); 
    2323    is_deeply \@value, $block->expected, $block->name; 
    2424};