# Функции для перекодировки в/из Punycode / RACE
# Version 2.0
# Author: Walery Studennikov <despair@cpan.org>, 2003
package punylib;
require 5.008;
require Exporter;

@ISA = qw(Exporter);
@EXPORT = qw(
    utf8_to_unicode unicode_to_utf8
    escapeuchars_to_unicode is_punychars
    unicode_to_punycode unicode_to_punycode_smart
    punycode_to_unicode punycode_to_unicode_smart
    cp1251_to_race cp1251_to_punycode
    cp1251_to_race_smart cp1251_to_punycode_smart
    punycode_to_cp1251 race_to_cp1251
    punycode_to_cp1251_smart race_to_cp1251_smart
    aced_name unace_name
);

use strict;
use Text::Iconv;
use Convert::RACE;
use Unicode::Normalize;
use IDNA::Punycode;
use Net::IDN::Nameprep;

my $cp1251toUCS2conv = Text::Iconv->new("cp1251", "ucs-2be");
my $UCS2convtocp1251 = Text::Iconv->new("ucs-2be", "cp1251");

# domain functions

# Разбить домен на имя и TLD
sub split_domname {
    return $_[0] unless ($_[0] =~ /(.+?)\.([^.]+)$/);
    return ($1, $2);
}

sub d_name {
    return (split_domname($_[0]))[0];
}

sub d_tld {
    return (split_domname($_[0]))[1];
}

sub is_punychars {
    my ($str) = @_;
    return 0 unless $str;
    return ($str =~ tr/-a-z0-9/-a-z0-9/) == length($str);
}

# **** UNICODE conversions ****

# raw byte ucs2-be unicode -> utf8-flagged string
sub ucs2unicode {
    my ($in) = @_;

    my @out;
    while ($in =~ m/(.)(.)/gs) {
	my ($u2, $u1) = ($1, $2);
	push @out, (ord($u2) << 8) + ord($u1);
    }
    my $out = join '', map chr, @out;
    return $out;
}

sub unicode2ucs {
    my ($in) = @_;

    my $out;
    foreach (split //, $in) {
	my $wchar = ord($_);
	my ($u2, $u1) = ($wchar & 255, $wchar >> 8);
	$out .= chr($u1).chr($u2);
    }
    return $out;
}

sub utf8_to_unicode {
    my ($s) = @_;
    utf8::decode($s);
    return $s;
}

sub unicode_to_utf8 {
    my ($s) = @_;
    return undef unless defined $s;
    utf8::encode($s);
    return $s;
}

# unicode -> Punycode
sub unicode_to_punycode {
    my ($unicode) = @_;
    # Nameprep, STD3 and other checking rules
    return undef if !$unicode || $unicode =~ /^-/ || $unicode =~ /-$/;
    eval {
	$unicode = nameprep($unicode);
    };
    return undef if $@;
    return undef if !$unicode || $unicode =~ /^-/ || $unicode =~ /-$/;
    return undef if $unicode =~ /xn--/;

    foreach my $ch (split //, $unicode) {
	my $c = ord($ch);
	return undef if $c >= 0xD800 && $c <= 0xFFFF || $c >= 0xE0000;
    }

    # conversion
    if (is_punychars($unicode)) {
	utf8::encode($unicode);
	return $unicode;
    }
    my $punycode = encode_punycode( $unicode );
    utf8::encode($punycode);
    return "xn--$punycode";
}

# Перевод домена из UNICODE в Punycode, если есть non-ascii символы
sub unicode_to_punycode_smart {
    my ($str) = @_;
    my @newords;
    foreach my $word (split /[.\x{3002}\x{FF0E}\x{FF61}]/, $str) {
	if (is_punychars($word)) {
	    utf8::encode($word);
	} else {
	    $word = unicode_to_punycode($word);
	}
	return undef unless defined $word;
	push @newords, $word;
    }
    return join '.', @newords;
}

# decoding

sub punycode_to_unicode {
    my ($str) = @_;
    my $unicode = eval {
	if ($str =~ s/^xn--//) {
	    return decode_punycode( $str );
	} else {
	    return $str;
	}
    };
    return undef if $@;
    return undef if $unicode =~ /xn--/;
    return undef if !$unicode || $unicode =~ /^-/ || $unicode =~ /-$/;

    my $unicode2;
    eval { $unicode2 = nameprep($unicode); };
    return undef if $@;
    return undef if $unicode2 ne $unicode;

    return $unicode;
}

# Перевод домена из Punycode в cp1251, если он русский
sub punycode_to_unicode_smart {
    my ($str) = @_;
    my @newords;
    foreach my $word (split /\./, $str) {
	if ($word =~ m/xn--/) {
	    $word = punycode_to_unicode($word);
	}
	return undef unless defined $word;
	push @newords, $word;
    }
    return join '.', @newords;
}

sub escapeuchars_to_unicode {
    my ($str) = @_;
    $str =~ s/(<[A-Fa-f0-9]{4,5}>)/unescape_uchar($1)/eg;
    return $str;
}

sub unescape_uchar {
    my ($str) = @_;
    $str =~ s/<(.{4,5})>/\\x\{$1\}/;
    $str = eval( '"'.$str.'"' );
    return $str;
}

# **** CP1251 conversions ****

# windows-1251 -> RACE
sub cp1251_to_race {
    my ($str, $ra) = @_;
    my $utf16 = $cp1251toUCS2conv->convert($str);
    my $race = to_race( $utf16 );
    $race =~ s/^bq--/ra--/ if $ra;
    return $race;
}

sub cp1251_to_unicode {
    my $utf16 = $cp1251toUCS2conv->convert( $_[0] );
    return ucs2unicode( $utf16 );
}

# windows-1251 -> Punycode
sub cp1251_to_punycode {
    my $unicode = cp1251_to_unicode( $_[0] );
    return unicode_to_punycode( $unicode );
}

# Перевод домена из cp1251 в RACE, если он русский
sub cp1251_to_race_smart {
    my ($str, $ra) = @_;
    my @newords;
    foreach my $word (split /\./, $str) {
	unless (is_punychars($word)) {
	    $word = cp1251_to_race($word, $ra);
	}
	push @newords, $word;
    }
    return join '.', @newords;
}

# Перевод домена из cp1251 в Punycode, если он русский
sub cp1251_to_punycode_smart {
    my $unicode = cp1251_to_unicode( $_[0] );
    return unicode_to_punycode_smart( $unicode );
}

# Перевод домена из Punycode в cp1251, если он русский
sub punycode_to_cp1251_smart {
    my $unicode = punycode_to_unicode_smart( $_[0] );
    my $utf16 = unicode2ucs( $unicode );
    return $UCS2convtocp1251->convert($utf16);
}

sub punycode_to_cp1251 {
    my ($str) = @_;
    $str =~ s/^xn--//;
    my $unicode = decode_punycode( $str );
    my $utf16 = unicode2ucs( $unicode );
    my $cp1251 = $UCS2convtocp1251->convert($utf16);
    return $cp1251;
}

sub race_to_cp1251 {
    my ($str) = @_;
    $str =~ s/^ra--//;
    $str =~ s/^bq--//;
    my $utf16 = from_race( "bq--$str" );
    my $cp1251 = $UCS2convtocp1251->convert($utf16);
    return $cp1251;
}

# Перевод домена из Punycode в cp1251, если он русский
sub race_to_cp1251_smart {
    my ($str) = @_;
    my @newords;
    foreach my $word (split /\./, $str) {
	if ($word =~ m/bq--/ || $word =~ m/ra--/) {
	    $word = race_to_cp1251($word);
	}
	push @newords, $word;
    }
    return join '.', @newords;
}

sub aced_name {
    my ($domain) = @_;
    return $domain if is_punychars($domain);
    my $tld = d_tld( $domain );
    my $israce = isin($tld, [qw(com net org)]);
    return $israce
	? cp1251_to_race_smart( $domain, 1 ).".mltbd.net"
	: cp1251_to_punycode_smart( $domain ).".aced.net";
}

sub unace_name {
    my ($domain) = @_;

    $domain =~ s/.aced.net$//;
    if ($domain =~ /bq--/ || $domain =~ /ra--/) {
	return race_to_cp1251_smart( $domain );
    } elsif ($domain =~ /xn--/) {
	return punycode_to_cp1251_smart( $domain );
    }
    return $domain;
}

