# siehe http://www.unicode.org/reports/tr44/#UnicodeData.txt
my @FIELDS = qw(
    Codepoint
    Name
    General_Category
    Canonical_Combining_Class
    Bidi_Class
    Decomposition_Type_Mapping
    IF_Numeric_Type_Decimal
    If_Numeric_Type_Digit
    If_Numeric_Type_Numeric
    Bidi_Mirrored
    Unicode_1_Name
    ISO_Comment
    Simple_Uppercase_Mapping
    Simple_Lowercase_Mapping
    Simple_Titlecase_Mapping
);

my %CANONICAL_COMBINING_CLASS_VALUES = (
    0   => 'Not_Reordered, Spacing and enclosing marks; also many vowel and consonant signs, even if nonspacing',
    1   => 'Overlay, Marks which overlay a base letter or symbol',
    7   => 'Nukta, Diacritic nukta marks in Brahmi-derived scripts',
    8   => 'Kana_Voicing, Hiragana/Katakana voicing marks',
    9   => 'Virama, Viramas',
    10  => 'Ccc10, Start of fixed position classes',
    199 => 'End of fixed position classes',
    200 => 'Attached_Below_Left, Marks attached at the bottom left',
    202 => 'Attached_Below,  Marks attached directly below',
    204 => 'Marks attached at the bottom right',
    208 => 'Marks attached to the left',
    210 => 'Marks attached to the right',
    212 => 'Marks attached at the top left',
    214 => 'Attached_Above, Marks attached directly above',
    216 => 'Attached_Above_Right, Marks attached at the top right',
    218 => 'Below_Left, Distinct marks at the bottom left',
    220 => 'Below, Distinct marks directly below',
    222 => 'Below_Right, Distinct marks at the bottom right',
    224 => 'Left, Distinct marks to the left',
    226 => 'Right, Distinct marks to the right',
    228 => 'Above_Left, Distinct marks at the top left',
    230 => 'Above, Distinct marks directly above',
    232 => 'Above_Right, Distinct marks at the top right',
    233 => 'Double_Below, Distinct marks subtending two bases',
    234 => 'Double_Above, Distinct marks extending above two bases',
    240 => 'Iota_Subscript, Greek iota subscript only'
);

my %BIDI_CLASS_VALUES = (
    L   => 'Left_To_Right, any strong left-to-right character',
    R   => 'Right_To_Left, any strong right-to-left (non-Arabic-type) character',
    AL  => 'Arabic_Letter, any strong right-to-left (Arabic-type) character',
    EN  => 'European_Number, any ASCII digit or Eastern Arabic-Indic digit',
    ES  => 'European_Separator, plus and minus signs',
    ET  => 'European_Terminator, a terminator in a numeric format context, includes currency signs',
    AN  => 'Arabic_Number, any Arabic-Indic digit',
    CS  => 'Common_Separator, commas, colons, and slashes',
    NSM => 'Nonspacing_Mark, any nonspacing mark',
    BN  => 'Boundary_Neutral, most format characters, control codes, or noncharacters',
    B   => 'Paragraph_Separator, various newline characters',
    S   => 'Segment_Separator, various segment-related control codes',
    WS  => 'White_Space, spaces',
    ON  => 'Other_Neutral, most other symbols and punctuation marks',
    LRE => 'Left_To_Right_Embedding, U+202A: the LR embedding control',
    LRO => 'Left_To_Right_Override, U+202D: the LR override control',
    RLE => 'Right_To_Left_Embedding, U+202B: the RL embedding control',
    RLO => 'Right_To_Left_Override, U+202E: the RL override control',
    PDF => 'Pop_Directional_Format, U+202C: terminates an embedding or override control',
    LRI => 'Left_To_Right_Isolate, U+2066: the LR isolate control',
    RLI => 'Right_To_Left_Isolate, U+2067: the RL isolate control',
    FSI => 'First_Strong_Isolate, U+2068: the first strong isolate control',
    PDI => 'Pop_Directional_Isolate, U+2069: terminates an isolate control'
);


my %COMPATIBILITY_TAGS = (
    font     => 'Font variant (for example, a blackletter form)',
    noBreak  => 'No-break version of a space or hyphen',
    initial  => 'Initial presentation form (Arabic)',
    medial   => 'Medial presentation form (Arabic)',
    final    => 'Final presentation form (Arabic)',
    isolated => 'Isolated presentation form (Arabic)',
    circle   => 'Encircled form',
    super    => 'Superscript form',
    sub      => 'Subscript form',
    vertical => 'Vertical layout presentation form',
    wide     => 'Wide (or zenkaku) compatibility character',
    narrow   => 'Narrow (or hankaku) compatibility character',
    small    => 'Small variant form (CNS compatibility)',
    square   => 'CJK squared font variant',
    fraction => 'Vulgar fraction form',
    compat   => 'Otherwise unspecified compatibility character'
);

my %GENERAL_CATEGORY_VALUES = (
    Lu  => 'Uppercase_Letter, an uppercase letter',
    Ll  => 'Lowercase_Letter, a lowercase letter',
    Lt  => 'Titlecase_Letter, a digraphic character, with first part uppercase',
    LC  => 'Cased_Letter, Lu | Ll | Lt',
    Lm  => 'Modifier_Letter, a modifier letter',
    Lo  => 'Other_Letter, other letters, including syllables and ideographs',
    L   => 'Letter Lu | Ll | Lt | Lm | Lo',
    Mn  => 'Nonspacing_Mark, a nonspacing combining mark (zero advance width)',
    Mc  => 'Spacing_Mark, a spacing combining mark (positive advance width)',
    Me  => 'Enclosing_Mark, an enclosing combining mark',
    M   => 'Mark, Mn | Mc | Me',
    Nd  => 'Decimal_Number, a decimal digit',
    Nl  => 'Letter_Number, a letterlike numeric character',
    No  => 'Other_Number, a numeric character of other type',
    N   => 'Number  Nd | Nl | No',
    Pc  => 'Connector_Punctuation, a connecting punctuation mark, like a tie',
    Pd  => 'Dash_Punctuation, a dash or hyphen punctuation mark',
    Ps  => 'Open_Punctuation, an opening punctuation mark (of a pair)',
    Pe  => 'Close_Punctuation, a closing punctuation mark (of a pair)',
    Pi  => 'Initial_Punctuation, an initial quotation mark',
    Pf  => 'Final_Punctuation, a final quotation mark',
    Po  => 'Other_Punctuation, a punctuation mark of other type',
    P   => 'Punctuation Pc | Pd | Ps | Pe | Pi | Pf | Po',
    Sm  => 'Math_Symbol, a symbol of mathematical use',
    Sc  => 'Currency_Symbol, a currency sign',
    Sk  => 'Modifier_Symbol, a non-letterlike modifier symbol',
    So  => 'Other_Symbol, a symbol of other type',
    S   => 'Symbol  Sm | Sc | Sk | So',
    Zs  => 'Space_Separator, a space character (of various non-zero widths)',
    Zl  => 'Line_Separator, U+2028 LINE SEPARATOR only',
    Zp  => 'Paragraph_Separator, U+2029 PARAGRAPH SEPARATOR only',
    Z   => 'Separator, Zs | Zl | Zp',
    Cc  => 'Control, a C0 or C1 control code',
    Cf  => 'Format, a format control character',
    Cs  => 'Surrogate, a surrogate code point',
    Co  => 'Private_Use, a private-use character',
    Cn  => 'Unassigned, a reserved unassigned code point or a noncharacter',
    C   => 'Other, Cc | Cf | Cs | Co | Cn',
);