root/Encode-First/trunk/lib/Encode/First.pm

Revision 2184 (checked in by miyagawa, 14 years ago)

import Encode::First

Line 
1 package Encode::First;
2
3 use strict;
4 our $VERSION = '0.01';
5
6 use Carp ();
7 use Encode ();
8
9 require Exporter;
10 *import = \&Exporter::import;
11 our @EXPORT = qw( encode_first );
12
13 sub encode_first {
14     my($encodings, $str) = @_;
15
16     $encodings = _encodings($encodings);
17
18     for my $enc (@$encodings) {
19         my $copy  = $str; # Encode::encode might break the original string
20         my $bytes;
21         eval {
22             $bytes = Encode::encode($enc, $str, Encode::FB_CROAK);
23         };
24         unless ($@) {
25             return wantarray ? ($enc, $bytes) : $enc;
26         }
27     }
28
29     Carp::croak("No encoding can encode the given string.");
30 }
31
32 sub _encodings {
33     my $encodings = shift;
34     return if ref $encodings && ref $encodings eq 'ARRAY';
35
36     Carp::croak "Unknown reference type ", ref $encodings
37         if ref $encodings && ref $encodings ne 'ARRAY';
38
39     return [ split /[:,]/, $encodings ];
40 }
41
42 1;
43 __END__
44
45 =for stopwords Juerd Waalboer encodable iso-2022-jp utf-8
46
47 =head1 NAME
48
49 Encode::First - Encode strings in a first possible encoding
50
51 =head1 SYNOPSIS
52
53   use Encode::First;
54
55   my($enc, $bytes) = encode_first("ascii,latin-1,iso-2022-jp,utf-8", $string);
56
57 =head1 DESCRIPTION
58
59 Encode::First provides a function to encode strings in the first
60 possible encoding out of multiple encodings supplied as a list.
61
62 It'd be useful to figure out what's the minimal encoding to encode the
63 email content, for instance, to be friendly with utf-8 incapable email
64 clients.
65
66 =head1 FUNCTIONS
67
68 =over 4
69
70 =item encode_first
71
72   ($enc, $bytes) = encode_first($encodings, $string);
73
74 returns I<$enc> (encoding used) and I<$bytes>, the encoded
75 characters. I<$enc> is the first encoding that I<$string> is encodable
76 into. I<$encodings> can be either comma or colon separated scalar, or
77 an array reference.
78
79 If none of I<$encodings> can encode the I<$string> safely, the
80 function would throw an exception. To avoid that, you should always
81 add I<utf-8> in I<$encodings>.
82
83   $enc = encode_first($encodings, $string);
84
85 In a scalar context it just returns the name of encoding.
86
87 This function is exported by default.
88
89 =back
90
91 =head1 BUGS
92
93 As of this writing, if you include I<iso-2022-jp> in the list of
94 encodings, this module will return I<iso-2022-jp> as the best encoding
95 for most of the Unicode strings, because of Encode::JP::JIS7 bug. The
96 bug is reported and awaits for the patch to be applied.
97
98 =head1 AUTHOR
99
100 Tatsuhiko Miyagawa E<lt>miyagawa@bulknews.netE<gt>
101
102 Juerd Waalboer
103
104 =head1 LICENSE
105
106 This library is free software; you can redistribute it and/or modify
107 it under the same terms as Perl itself.
108
109 =head1 SEE ALSO
110
111 L<Encode::InCharset>, L<http://use.perl.org/~miyagawa/journal/32781>
112
113 =cut
Note: See TracBrowser for help on using the browser.