=head2 unidecode_any($text, $encoding) Similar to Text::Unidecode::unidecode, but convert to the given $encoding. This will return an octet string in the given I<$encoding>. If all you want is just to restrict the charset of the string to a specific encoding charset, then it's best to C the result again with I<$encoding>. =cut sub unidecode_any { my($text, $encoding) = @_; require Text::Unidecode; require Encode; # provide better conversions for german umlauts my %override = ("\xc4" => "Ae", "\xd6" => "Oe", "\xdc" => "Ue", "\xe4" => "ae", "\xf6" => "oe", "\xfc" => "ue", ); my $override_rx = "(" . join("|", map { quotemeta } keys %override) . ")"; $override_rx = qr{$override_rx}; my $res = ""; if (!eval { Encode->VERSION(2.12); # need v2.12 to support coderef $res = Encode::encode($encoding, $text, sub { my $ch = chr $_[0]; if ($ch =~ $override_rx) { return $override{$ch}; } else { my $ascii = unidecode($ch); Encode::_utf8_off($ascii); $ascii; } }); 1; }) { for (split //, $text) { my $conv = eval { Encode::encode($encoding, $_, Encode::FB_CROAK()) }; if ($@) { $res .= Text::Unidecode::unidecode($_); } else { $res .= $conv; } } } $res; }