Changeset 2231

Show
Ignore:
Timestamp:
05/09/07 13:14:54
Author:
miyagawa
Message:

make use of 'TEXT' instead of content, to be more compatible with Scrapi

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Web-Scraper/trunk/eg/ebay-auction.pl

    r2225 r2231  
    88my $ebay_auction = scraper { 
    99    process "h3.ens>a", 
    10         description => 'content', 
     10        description => 'TEXT', 
    1111        url => '@href'; 
    12     process "td.ebcPr>span", price => "content"; 
     12    process "td.ebcPr>span", price => "TEXT"; 
    1313    process "div.ebPicture >a>img", image => '@src'; 
    1414    result 'description', 'url', 'price', 'image'; 
  • Web-Scraper/trunk/eg/hatena-keyword.pl

    r2225 r2231  
    22use strict; 
    33use warnings; 
     4use lib "lib"; 
     5use URI; 
     6use Web::Scraper; 
    47 
    58# same as http://d.hatena.ne.jp/secondlife/20060922/1158923779 
    69 
    710my $keyword = scraper { 
    8     process 'span.title > a:first-child', title => 'content', url => '@href'; 
    9     process 'span.furigana', furigana => 'content'; 
    10     process 'ul.list-circle > li:first-child > a', category => 'content'; 
     11    process 'span.title > a:first-child', title => 'TEXT', url => '@href'; 
     12    process 'span.furigana', furigana => 'TEXT'; 
     13    process 'ul.list-circle > li:first-child > a', category => 'TEXT'; 
    1114}; 
    1215 
  • Web-Scraper/trunk/lib/Web/Scraper.pm

    r2229 r2231  
    5656            } elsif ($val =~ s!^@!!) { 
    5757                return $node->attr($val); 
    58             } elsif ($val eq 'content') { 
     58            } elsif (lc($val) eq 'content' || lc($val) eq 'text') { 
    5959                return $node->as_text; 
    6060            } else { 
     
    126126  my $ebay_auction = scraper { 
    127127      process "h3.ens>a", 
    128           description => 'content', 
     128          description => 'TEXT', 
    129129          url => '@href'; 
    130       process "td.ebcPr>span", price => "content"; 
     130      process "td.ebcPr>span", price => "TEXT"; 
    131131      process "div.ebPicture >a>img", image => '@src'; 
    132132