Changeset 2324

Show
Ignore:
Timestamp:
08/30/07 18:42:00
Author:
miyagawa
Message:

Checking in changes prior to tagging of version 0.12. Changelog diff is:

=== Changes
==================================================================
--- Changes (revision 6880)
+++ Changes (local)
@@ -1,5 +1,11 @@

Revision history for Perl extension Web
Scraper

+0.12 Thu Aug 30 02:39:44 PDT 2007
+ - Added 's' command to scraper to get the HTML source
+ - You can use $tree variable to deal with the HTML::Element object in scraper shell
+ - Give a graceful error message if the given Selector/XPath doesn't compile
+ - Give a better error when number of args in process() seems wrong
+

0.11 Tue Aug 28 02:50:01 PDT 2007

- Supported hash-reference in process values, like

process "a", "people[]", { link => '@href', name => 'TEXT' };

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Web-Scraper/trunk/Changes

    r2318 r2324  
    11Revision history for Perl extension Web::Scraper 
     2 
     30.12  Thu Aug 30 02:39:44 PDT 2007 
     4        - Added 's' command to scraper to get the HTML source 
     5        - You can use $tree variable to deal with the HTML::Element object in scraper shell 
     6        - Give a graceful error message if the given Selector/XPath doesn't compile 
     7        - Give a better error when number of args in process() seems wrong 
    28 
    390.11  Tue Aug 28 02:50:01 PDT 2007 
  • Web-Scraper/trunk/MANIFEST

    r2318 r2324  
    3838t/08-leak.t 
    3939t/09-process_hash.t 
     40t/10_invalid_xpath.t 
    4041t/perlcriticrc 
  • Web-Scraper/trunk/bin/scraper

    r2315 r2324  
    1313 
    1414my $term    = Term::ReadLine->new(); 
    15 my $scraper = scraper { run_loop($term) }; 
     15my $scraper = scraper { run_loop($_[0], $term) }; 
    1616my $result  = $scraper->scrape($stuff); 
    1717 
     
    3232 
    3333sub run_loop { 
    34     my $term = shift
     34    my($tree, $term) = @_
    3535    while (defined(my $in = $term->readline("scraper> "))) { 
    3636        if ($in eq 'd') { 
     
    3939        } elsif ($in eq 'y') { 
    4040            warn Dump result; 
     41        } elsif ($in eq 's') { 
     42            warn $tree->as_HTML(undef, "  "); 
    4143        } elsif ($in eq 'q') { 
    4244            return; 
  • Web-Scraper/trunk/lib/Web/Scraper.pm

    r2318 r2324  
    77use HTML::Selector::XPath; 
    88 
    9 our $VERSION = '0.11'; 
     9our $VERSION = '0.12'; 
    1010 
    1111sub import { 
     
    104104 
    105105        my $xpath = $exp =~ m!^/! ? $exp : HTML::Selector::XPath::selector_to_xpath($exp); 
    106         my @nodes = $tree->findnodes($xpath) or return; 
     106        my @nodes = eval { 
     107            local $SIG{__WARN__} = sub { }; 
     108            $tree->findnodes($xpath); 
     109        }; 
     110 
     111        if ($@) { 
     112            die "'$xpath' doesn't look like a valid XPath expression: $@"; 
     113        } 
     114 
     115        @nodes or return; 
    107116        @nodes = ($nodes[0]) if $first; 
    108117 
    109118        while (my($key, $val) = splice(@attr, 0, 2)) { 
    110             if (ref($key) && ref($key) eq 'CODE' && !defined $val) { 
    111                 for my $node (@nodes) { 
    112                     local $_ = $node; 
    113                     $key->($node); 
     119            if (!defined $val) { 
     120                if (ref($key) && ref($key) eq 'CODE') { 
     121                    for my $node (@nodes) { 
     122                        local $_ = $node; 
     123                        $key->($node); 
     124                    } 
     125                } else { 
     126                    die "Don't know what to do with $key => undef"; 
    114127                } 
    115128            } elsif ($key =~ s!\[\]$!!) {