Changeset 2358

Show
Ignore:
Timestamp:
09/21/07 14:40:26
Author:
miyagawa
Message:

try to get encoding from META tags as well

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Web-Scraper/trunk/lib/Web/Scraper.pm

    r2357 r2358  
    33use warnings; 
    44use Carp; 
    5 use Scalar::Util 'blessed'; 
     5use Scalar::Util qw(blessed); 
     6use List::Util qw(first); 
    67use HTML::Entities; 
    78use HTML::Tagset; 
     
    5859        my $res = $ua->get($stuff); 
    5960        if ($res->is_success) { 
    60             my $encoding = $res->encoding || "latin-1"; 
     61            my @encoding = ( 
     62                $res->encoding, 
     63                # could be multiple because HTTP response and META might be different 
     64                ($res->header('Content-Type') =~ /charset=([\w\-]+)/g), 
     65                "latin-1", 
     66            ); 
     67            my $encoding = first { defined $_ && Encode::find_encoding($_) } @encoding; 
    6168            $html = Encode::decode($encoding, $res->content); 
    6269        } else {