package encode_html;
use strict;
use warnings;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK) ;
$VERSION = '1.0' ;
require Exporter ;
@ISA = qw ( Exporter AutoLoader ) ;
@EXPORT = qw (string2html) ;
# Liste der bekannten HTML-zeichen:
my %xmlchars=(
'<' =>'<', '>' =>'>', "'" =>''', '"' =>'"', '&' =>'&',
"\x{27}" =>''', "\x{22}" =>'"', "\x{A0}" =>' ', "\x{A1}" =>'¡', "\x{A2}" =>'¢',
"\x{A3}" =>'£', "\x{A4}" =>'¤'
# , ....
);
my $xmlcharstring='['.join('',keys(%xmlchars)).']';
# nicht ASCII und Unbekannte zeichen Quoten
sub string2html
{
my $str=shift;
# already quoted transform to ignore
$str=~s/&(#?\w+);/-!=$1=!-/gs;
# replace known UTF-8
$str=~s/($xmlcharstring)/$xmlchars{$1}/gse;
# quote unknown UTF-8
$str=~s/([^\x20-\x7F])/'' . ord($1) . ';'/gse;
# transorm back already quoted
$str=~s/-!=(#?\w+)=!-/&$1;/gs;
return $str;
}
1;