root/Web-Scraper/trunk/bin/scraper

Revision 2328 (checked in by miyagawa, 13 years ago)

added WARN handy sub to scraper

  • Property svn:executable set to *
Line 
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4
5 use Config;
6 use Term::ReadLine;
7 use Data::Dumper;
8 use URI;
9 use Web::Scraper;
10 use YAML;
11
12 sub WARN() { return sub { warn $_->as_HTML } }
13
14 my(@stack, $source);
15
16 my $stuff   = process_args($ARGV[0])
17     or die "Usage: scraper [URI-or-filename]\n";
18
19 my $term    = Term::ReadLine->new();
20 my $scraper = scraper { run_loop($_[0], $term) };
21 my $result  = $scraper->scrape($stuff);
22
23 sub process_args {
24     my $uri = shift;
25
26     if (!-t STDIN and my $content = join "", <STDIN>) {
27         $source = [ 'stdin' ];
28         return \$content;
29     } elsif ($uri && $uri =~ m!^https?://!) {
30         $source = [ "URI", $uri ];
31         return URI->new($uri);
32     } elsif ($uri && -e $uri) {
33         $source = [ 'file', $uri ];
34         open my $fh, "<", $uri or die "$uri: $!";
35         return join "", <$fh>;
36     }
37
38     return;
39 }
40
41 sub run_loop {
42     my($tree, $term) = @_;
43     while (defined(my $in = $term->readline("scraper> "))) {
44         if ($in eq 'd') {
45             $Data::Dumper::Indent = 1;
46             warn Dumper result;
47         } elsif ($in eq 'y') {
48             warn Dump result;
49         } elsif ($in eq 's') {
50             warn $tree->as_HTML(undef, "  ");
51         } elsif ($in eq 'q') {
52             return;
53         } elsif ($in eq 'c') {
54             print generate_code($source, $stack[-1]);
55         } elsif ($in =~ /^c\s+all\s*$/) {
56             print generate_code($source, @stack);
57         } else {
58             my $res = eval $in;
59             warn $@ if $@;
60             push @stack, $in unless $@;
61         }
62     }
63 }
64
65 sub generate_code {
66     my($source, @stack) = @_;
67
68     my $code_stack = join "\n", map { "    $_" . (/;$/ ? "" : ";") } @stack;
69     my $stuff =
70         $source->[0] eq 'stdin'         ? '\join "", <STDIN>' :
71         $source->[0] eq 'URI'           ? qq(URI->new("$source->[1]")) :
72         $source->[0] eq 'file'          ? qq(\\do { my \$file = "$source->[1]"; open my \$fh, \$file or die "\$file: \$!"; join '', <\$fh> }) :
73                                           '...';
74
75     return <<CODE;
76 #!$Config{perlpath}
77 use strict;
78 use Web::Scraper;
79 use URI;
80
81 my \$stuff   = $stuff;
82 my \$scraper = scraper {
83 $code_stack
84 };
85 my \$result = \$scraper->scrape(\$stuff);
86 CODE
87
88 }
Note: See TracBrowser for help on using the browser.