Changeset 206

Show
Ignore:
Timestamp:
07/28/01 04:21:08
Author:
miyagawa
Message:

use Unicode::String
inputs are UTF8

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • Convert-DUDE/trunk/Changes

    r187 r206  
    11Revision history for Perl extension Convert::DUDE. 
     2 
     30.02  Sat Jul 28 04:18:38 JST 2001 
     4        - Now Convert::DUDE handles Unicode strings as UTF8. 
     5          Requires Unicode::String. 
    26 
    370.01  Thu Jul 19 18:17:35 2001 
  • Convert-DUDE/trunk/Makefile.PL

    r187 r206  
    55    'NAME'      => 'Convert::DUDE', 
    66    'VERSION_FROM' => 'lib/Convert/DUDE.pm', # finds $VERSION 
     7    'PREREQ_PM' => { 
     8        'Unicode::String' => 2.06, 
     9    }, 
    710); 
  • Convert-DUDE/trunk/README

    r187 r206  
    66 
    77      # handles 'dq--' prefix 
    8       $domain  = to_dude($utf16); 
    9       $utf16   = from_dude($domain); 
     8      $domain  = to_dude($utf8); 
     9      $utf8    = from_dude($domain); 
    1010 
    1111      # don't care about 'dq--' prefix 
    1212      # not exported by default            
    13       $dudestr = dude_encode($utf16); 
    14       $utf16   = dude_decode($dudestr); 
     13      $dudestr = dude_encode($utf8); 
     14      $utf8    = dude_decode($dudestr); 
    1515 
    1616DESCRIPTION 
     
    3131 
    3232    to_dude 
    33           $domain = to_dude($utf16str); 
     33          $domain = to_dude($utf8); 
    3434 
    35         takes UTF16-encoded string, encodes it in DUDE and adds 'dq--' 
    36         prefix in front. 
     35        takes UTF8-encoded string, encodes it in DUDE and adds 'dq--' prefix 
     36        in front. 
    3737 
    3838    from_dude 
    39           $utf16str = from_dude($domain); 
     39          $utf8 = from_dude($domain); 
    4040 
    4141        takes 'dq--' prefixed DUDE encoded string and decodes it to original 
    42         UTF16 strings. 
     42        UTF8 strings. 
    4343 
    4444    Following two functions can be exported to your package when you import 
     
    4646 
    4747    dude_encode 
    48           $dude = dude_encode($utf16str); 
     48          $dude = dude_encode($utf8); 
    4949 
    50         takes UTF16-encoded string, encodes it in DUDE. Note that it doesn't 
     50        takes UTF8-encoded string, encodes it in DUDE. Note that it doesn't 
    5151        care about 'dq--' prefix. 
    5252 
    5353    dude_decode 
    54           $utf16str = dude_decode($dude); 
     54          $utf8 = dude_decode($dude); 
    5555 
    56         takes DUDE encoded string and decodes it to original UTF16 strings. 
     56        takes DUDE encoded string and decodes it to original UTF8 strings. 
    5757        Note that it doesn't care about 'dq--' prefix. 
    5858 
     
    6868 
    6969EXAMPLES 
    70     HEre's a sample code which does RACE-DUDE conversion. 
     70    Here's a sample code which does RACE-DUDE conversion. 
    7171 
    7272      use Convert::RACE; 
    7373      use Convert::DUDE; 
    74  
     74      use Unicode::String qw(utf16); 
     75                
    7576      my $race = "bq--aewrcsy"; 
    7677 
    7778      eval { 
    7879          my $utf16 = from_race($race); 
    79           my $dude = to_dude($utf16); 
     80          my $dude = to_dude(utf16($utf16)->utf8); 
    8081          print "RACE: $race => DUDE: $dude\n"; 
    8182      }; 
     
    8889    *   There's no constraints on the input. See internet draft for nameprep 
    8990        about IDN input validation. 
    90  
    91     *   to_dude() assumes inputs are UTF-16 strings. Thus, this module 
    92         hasn't been tested against unassigned code points like "u+2C7EF 
    93         u+2C7EF" in the DUDE intrernet draft. (Do I have to use 
    94         Unicode::String?) 
    9591 
    9692TODO 
  • Convert-DUDE/trunk/lib/Convert/DUDE.pm

    r187 r206  
    33use strict; 
    44use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 
    5 $VERSION = '0.01'; 
     5$VERSION = '0.02'; 
     6 
     7use Unicode::String qw(utf8); 
    68 
    79BEGIN { 
     
    575910111 z 
    586011000 2 
    59 11001 3         
     6111001 3 
    606211010 4 
    616311011 5 
     
    8890 
    8991sub dude_encode ($) { 
    90     my $input = shift; 
    91      
    92     if (length($input) % 2 != 0) { 
    93         _die "Odd length of input. to_dude() takes UTF 16 encoded strings"; 
    94     } 
    95      
     92    my $input = utf8(shift); 
     93 
    9694    my $output; 
    97      
    98     my $prev = "\x00\x60"; 
    99     while ($input =~ m/(..)/gs) { 
    100         my $n = $1; 
    101         if ($n eq "\x00\x2d") { 
     95    my $prev = 0x60; 
     96    for my $i (0 .. $input->length-1) { 
     97        my $n = $input->substr($i, 1)->ord; 
     98        if ($n == 0x2d) { 
    10299            $output .= '-'; 
    103100            next; 
     
    105102 
    106103        my $diff = $prev ^ $n; 
    107         my @quartets = unpack('B*', $diff) =~ m/(.{4})/gs; 
     104 
     105        my @quartets = unpack('B*', pack('n*', $diff)) =~ m/(.{4})/gs; 
    108106        shift @quartets while (@quartets && $quartets[0] eq '0000'); 
    109107 
     
    113111        $prev = $n; 
    114112    } 
    115  
    116113    return $output; 
    117114} 
     
    143140=end algorithm 
    144141 
    145 =cut     
     142=cut 
    146143 
    147144sub dude_decode ($) { 
    148145    my $input = lc shift; 
    149      
    150     my $prev = "\x00\x60"
     146 
     147    my $prev = 0x60
    151148    my @input = split //, $input; 
    152149 
    153     my $output
     150    my $output = Unicode::String->new
    154151    while (@input) { 
    155152        if ($input[0] eq '-') { 
    156             $output .= "\x00\x2d"
     153            $output->append(Unicode::String::uchr(0x2d))
    157154            shift @input; 
    158155            next; 
    159156        } 
    160          
     157 
    161158        my @quintets; 
    162159        CONSUME: while (1) { 
     
    172169            last CONSUME if substr($quintet, 0, 1) eq '0'; 
    173170        } 
    174         unshift @quintets, '00000' if @quintets % 2; # odd 
    175          
    176         my $diff = pack 'B*', join '', map { substr($_, 1) } @quintets; 
    177         $prev = $prev ^ ("\x00" x (length($prev) - length($diff))) . $diff; 
    178         $output .= $prev; 
     171 
     172        my $diff = 0; 
     173        my $order = 0; 
     174        for my $quintet (reverse @quintets) { 
     175            $diff += ord(pack('B*', '0000' . substr($quintet, 1))) * (16 ** $order++); 
     176        } 
     177        $prev = $prev ^ $diff; 
     178        $output->append(Unicode::String::uchr($prev)); 
    179179    } 
    180180 
    181     unless (dude_encode($output) eq $input) { 
     181    unless (dude_encode($output->utf8) eq $input) { 
    182182        _die "uniqueness check (paranoia) failed."; 
    183183    } 
    184      
    185     return $output
     184 
     185    return $output->utf8
    186186} 
    187187 
     
    192192    return dude_decode($dude); 
    193193} 
    194                   
    195                  
     194 
     195 
    1961961; 
     197 
    197198__END__ 
    198199 
     
    206207 
    207208  # handles 'dq--' prefix 
    208   $domain  = to_dude($utf16); 
    209   $utf16   = from_dude($domain); 
     209  $domain  = to_dude($utf8); 
     210  $utf8    = from_dude($domain); 
    210211 
    211212  # don't care about 'dq--' prefix 
    212213  # not exported by default            
    213   $dudestr = dude_encode($utf16); 
    214   $utf16   = dude_decode($dudestr); 
     214  $dudestr = dude_encode($utf8); 
     215  $utf8    = dude_decode($dudestr); 
    215216 
    216217=head1 DESCRIPTION 
     
    236237=item to_dude 
    237238 
    238   $domain = to_dude($utf16str); 
    239  
    240 takes UTF16-encoded string, encodes it in DUDE and adds 'dq--' prefix 
     239  $domain = to_dude($utf8); 
     240 
     241takes UTF8-encoded string, encodes it in DUDE and adds 'dq--' prefix 
    241242in front. 
    242243 
    243244=item from_dude 
    244245 
    245   $utf16str = from_dude($domain); 
     246  $utf8 = from_dude($domain); 
    246247 
    247248takes 'dq--' prefixed DUDE encoded string and decodes it to original 
    248 UTF16 strings. 
     249UTF8 strings. 
    249250 
    250251=back 
     
    257258=item dude_encode 
    258259 
    259   $dude = dude_encode($utf16str); 
    260  
    261 takes UTF16-encoded string, encodes it in DUDE. Note that it doesn't 
     260  $dude = dude_encode($utf8); 
     261 
     262takes UTF8-encoded string, encodes it in DUDE. Note that it doesn't 
    262263care about 'dq--' prefix. 
    263264 
    264265=item dude_decode 
    265266 
    266   $utf16str = dude_decode($dude); 
    267  
    268 takes DUDE encoded string and decodes it to original UTF16 
     267  $utf8 = dude_decode($dude); 
     268 
     269takes DUDE encoded string and decodes it to original UTF8 
    269270strings. Note that it doesn't care about 'dq--' prefix. 
    270271 
     
    289290=head1 EXAMPLES 
    290291 
    291 HEre's a sample code which does RACE-DUDE conversion. 
     292Here's a sample code which does RACE-DUDE conversion. 
    292293 
    293294  use Convert::RACE; 
    294295  use Convert::DUDE; 
    295  
     296  use Unicode::String qw(utf16); 
     297                
    296298  my $race = "bq--aewrcsy"; 
    297299 
    298300  eval { 
    299301      my $utf16 = from_race($race); 
    300       my $dude = to_dude($utf16); 
     302      my $dude = to_dude(utf16($utf16)->utf8); 
    301303      print "RACE: $race => DUDE: $dude\n"; 
    302304  }; 
     
    315317about IDN input validation. 
    316318 
    317 =item * 
    318  
    319 to_dude() assumes inputs are UTF-16 strings. Thus, this module hasn't 
    320 been tested against unassigned code points like "u+2C7EF u+2C7EF" in 
    321 the DUDE intrernet draft. (Do I have to use Unicode::String?) 
    322  
    323319=back 
    324320 
  • Convert-DUDE/trunk/t/00_dude.t

    r187 r206  
    44 
    55use Convert::DUDE qw(:all); 
     6use Unicode::String qw(utf16); 
    67 
    7 # taken from ietf-idn-dude-02.txt 
    8 my %todo = ( 
     8# taken from ietf-idn-dude-02.txt, embeded in UTF16 
     9my @todo = ( 
    910    "\x00\x33\x5e\x74\x00\x62\x7d\x44\x91\xd1\x51\x6b\x51\x48\x75\x1f" => 'xdx8whx8tgz7ug863f6s5kuduwxh', 
    1011    "\x5B\x89\x5B\xA4\x59\x48\x7F\x8E\x60\x75\x00\x2D\x00\x77\x00\x69\x00\x74\x00\x68\x00\x2D\x00\x73\x00\x75\x00\x70\x00\x65\x00\x72\x00\x2D\x00\x6D\x00\x6F\x00\x6E\x00\x6B\x00\x65\x00\x79\x00\x73" => 'x58jupu8nuy6gt99m-yssctqtptn-tmgftfth-trcbfqtnk', 
     
    1415); 
    1516 
    16 while (my($utf, $dude) = each %todo) { 
    17     ok(dude_encode($utf) eq $dude); 
    18     ok(dude_decode($dude) eq $utf); 
     17while (my($utf16, $dude) = splice(@todo, 0, 2)) { 
     18eval {    my $utf8 = utf16($utf16)->utf8; 
     19    ok(dude_encode($utf8) eq $dude); 
     20    ok(dude_decode($dude) eq $utf8); 
     21      }; 
     22 
    1923} 
    2024 
     25