root/HTML-ResolveLink/trunk/lib/HTML/ResolveLink.pm

Revision 1785 (checked in by miyagawa, 14 years ago)

import HTML::ResolveLink?

  • Property svn:keywords set to Id Revision
Line 
1 package HTML::ResolveLink;
2
3 use strict;
4 our $VERSION = '0.01';
5 use base qw(HTML::Parser);
6
7 use Carp;
8 use HTML::Tagset ();
9 use URI;
10
11 sub new {
12     my($class, %p) = @_;
13     my $self = $class->SUPER::new(
14         start_h => [ \&_start_tag, "self,tagname,attr,attrseq,text" ],
15         default_h => [ \&_default, "self,tagname,attr,text" ],
16     );
17
18     unless ($p{base}) {
19         Carp::croak("HTML::ResolveLink->new: base is a required parameter");
20     }
21
22     $p{base} = URI->new($p{base}) unless ref $p{base};
23     $self->{resolvelink_base} = $p{base};
24
25     $self;
26 }
27
28 sub _start_tag {
29     my($self, $tagname, $attr, $attrseq, $text) = @_;
30
31     my $base = $self->{resolvelink_base};
32
33     my $links = $HTML::Tagset::linkElements{$tagname} || [];
34     $links = [$links] unless ref $links;
35
36     for my $a (@$links) {
37         next unless exists $attr->{$a};
38
39         my $link = $attr->{$a};
40         my $uri  = URI->new($link);
41
42         # relative link:
43         unless (defined $uri->scheme) {
44             $uri = $uri->abs($base);
45             $attr->{$a} = $uri->as_string;
46         }
47     }
48
49     $self->{resolvelink_html} .= "<$tagname";
50     for my $a (@$attrseq) {
51         next if $a eq '/';
52         $self->{resolvelink_html} .= sprintf qq( %s="%s"), $a, _escape($attr->{$a});
53     }
54     $self->{resolvelink_html} .= ' /' if $attr->{'/'};
55     $self->{resolvelink_html} .= '>';
56 }
57
58 sub _default {
59     my($self, $tagname, $attr, $text) = @_;
60     $self->{resolvelink_html} .= $text;
61 }
62
63 my %escape = (
64     '<' => '&lt;',
65     '>' => '&gt;',
66     '"' => '&qout;',
67     '&' => '&amp;',
68 );
69 my $esc_re = join '|', keys %escape;
70
71 sub _escape {
72     my $str = shift;
73     $str =~ s/($esc_re)/$escape{$1}/g;
74     $str;
75 }
76
77 sub resolve {
78     my($self, $html) = @_;
79
80     $self->{resolvelink_html} = ''; # init
81     $self->parse($html);
82     $self->eof;
83
84     $self->{resolvelink_html};
85 }
86
87 1;
88 __END__
89
90 =head1 NAME
91
92 HTML::ResolveLink - Resolve relative links in (X)HTML into absolute URI
93
94 =head1 SYNOPSIS
95
96   use HTML::ResolveLink;
97
98   my $resolver = HTML::ResolveLink->new(
99       base => 'http://www.example.com/',
100   );
101   $html = $resolver->resolve($html);
102
103 =head1 DESCRIPTION
104
105 HTML::ResolveLink is a module to rewrite relative links in XHTML or
106 HTML into absolute URI.
107
108 =head1 METHODS
109
110 =over 4
111
112 =item new
113
114   my $resolver = HTML::ResolveLink->new(
115       base => 'http://www.example.com/',
116   );
117
118 C<base> is a required parameter, which is used to resolve the relative
119 URI found in the document.
120
121 =item resolve
122
123   $html = $resolver->resolve($html);
124
125 Resolves relative URI found in C<$html> into absolute and returns a
126 string containing rewritten one.
127
128 =head1 AUTHOR
129
130 Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt>
131
132 This library is free software; you can redistribute it and/or modify
133 it under the same terms as Perl itself.
134
135 =head1 SEE ALSO
136
137 L<HTML::Parser>, L<HTML::LinkExtor>
138
139 =cut
Note: See TracBrowser for help on using the browser.