, which is either a decimal or a hexadecimal scalar designating a code point in the platform's native character set (extended to Unicode), or a string containing C followed by hexadecimals designating a Unicode code point. A leading 0 will force a hexadecimal interpretation, as will a hexadecimal digit that isn't a decimal digit. Examples: 223 # Decimal 223 in native character set 0223 # Hexadecimal 223, native (= 547 decimal) 0xDF # Hexadecimal DF, native (= 223 decimal) '0xDF' # String form of hexadecimal (= 223 decimal) 'U+DF' # Hexadecimal DF, in Unicode's character set (= LATIN SMALL LETTER SHARP S) Note that the largest code point in Unicode is U+10FFFF. =cut our %caseless_equivalent; our $e_precision; our %file_to_swash_name; our @inline_definitions; our %loose_property_name_of; our %loose_property_to_file_of; our %loose_to_file_of; our $MAX_CP; our %nv_floating_to_rational; our %prop_aliases; our %stricter_to_file_of; our %strict_property_to_file_of; our %SwashInfo; our %why_deprecated; my $v_unicode_version; # v-string. sub openunicode { my (@path) = @_; my $rfh; for my $d (@INC) { use File::Spec; my $f = File::Spec->catfile($d, "unicore", @path); return $rfh if open($rfh, '<', $f); } croak __PACKAGE__, ": failed to find ", File::Spec->catfile("unicore", @path), " in @INC"; } sub _dclone ($) { # Use Storable::dclone if available; otherwise emulate it. use if defined &DynaLoader::boot_DynaLoader, Storable => qw(dclone); return dclone(shift) if defined &dclone; my $arg = shift; my $type = ref $arg; return $arg unless $type; # No deep cloning needed for scalars if ($type eq 'ARRAY') { my @return; foreach my $element (@$arg) { push @return, &_dclone($element); } return \@return; } elsif ($type eq 'HASH') { my %return; foreach my $key (keys %$arg) { $return{$key} = &_dclone($arg->{$key}); } return \%return; } else { croak "_dclone can't handle " . $type; } } =head2 B use Unicode::UCD 'charinfo'; my $charinfo = charinfo(0x41); This returns information about the input L
the input native L
, all IN UPPER CASE. Some control-type code points do not have names. This field will be empty for C and C code points, and for the others without a name, it will contain a description enclosed in angle brackets, like CcontrolE>. =item B The short name of the general category of I. This will match one of the keys in the hash returned by L. The L function can be used to get all the synonyms of the category name. =item B the combining class number for I used in the Canonical Ordering Algorithm. For Unicode 5.1, this is described in Section 3.11 C available at L The L function can be used to get all the synonyms of the combining class number. =item B bidirectional type of I. This will match one of the keys in the hash returned by L. The L function can be used to get all the synonyms of the bidi type name. =item B is empty if I has no decomposition; or is one or more codes (separated by spaces) that, taken in order, represent a decomposition for I. Each has at least four hexdigits. The codes may be preceded by a word enclosed in angle brackets, then a space, like CcompatE >, giving the type of decomposition This decomposition may be an intermediate one whose components are also decomposable. Use L to get the final decomposition in one step. =item B if I represents a decimal digit this is its integer numeric value =item B if I represents some other digit-like number, this is its integer numeric value =item B if I represents a whole or rational number, this is its numeric value. Rational values are expressed as a string like C<1/4>. =item B C or C designating if I is mirrored in bidirectional text =item B name of I in the Unicode 1.0 standard if one existed for this code point and is different from the current name =item B As of Unicode 6.0, this is always empty. =item B is, if non-empty, the uppercase mapping for I expressed as at least four hexdigits. This indicates that the full uppercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple uppercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the lowercase mapping for I expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
. This will match one of the keys in the hash returned by L. The L function can be used to get all the synonyms of the category name. =item B the combining class number for I used in the Canonical Ordering Algorithm. For Unicode 5.1, this is described in Section 3.11 C available at L The L function can be used to get all the synonyms of the combining class number. =item B bidirectional type of I. This will match one of the keys in the hash returned by L. The L function can be used to get all the synonyms of the bidi type name. =item B is empty if I has no decomposition; or is one or more codes (separated by spaces) that, taken in order, represent a decomposition for I. Each has at least four hexdigits. The codes may be preceded by a word enclosed in angle brackets, then a space, like CcompatE >, giving the type of decomposition This decomposition may be an intermediate one whose components are also decomposable. Use L to get the final decomposition in one step. =item B if I represents a decimal digit this is its integer numeric value =item B if I represents some other digit-like number, this is its integer numeric value =item B if I represents a whole or rational number, this is its numeric value. Rational values are expressed as a string like C<1/4>. =item B C or C designating if I is mirrored in bidirectional text =item B name of I in the Unicode 1.0 standard if one existed for this code point and is different from the current name =item B As of Unicode 6.0, this is always empty. =item B is, if non-empty, the uppercase mapping for I expressed as at least four hexdigits. This indicates that the full uppercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple uppercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the lowercase mapping for I expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
used in the Canonical Ordering Algorithm. For Unicode 5.1, this is described in Section 3.11 C available at L The L function can be used to get all the synonyms of the combining class number. =item B bidirectional type of I. This will match one of the keys in the hash returned by L. The L function can be used to get all the synonyms of the bidi type name. =item B is empty if I has no decomposition; or is one or more codes (separated by spaces) that, taken in order, represent a decomposition for I. Each has at least four hexdigits. The codes may be preceded by a word enclosed in angle brackets, then a space, like CcompatE >, giving the type of decomposition This decomposition may be an intermediate one whose components are also decomposable. Use L to get the final decomposition in one step. =item B if I represents a decimal digit this is its integer numeric value =item B if I represents some other digit-like number, this is its integer numeric value =item B if I represents a whole or rational number, this is its numeric value. Rational values are expressed as a string like C<1/4>. =item B C or C designating if I is mirrored in bidirectional text =item B name of I in the Unicode 1.0 standard if one existed for this code point and is different from the current name =item B As of Unicode 6.0, this is always empty. =item B is, if non-empty, the uppercase mapping for I expressed as at least four hexdigits. This indicates that the full uppercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple uppercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the lowercase mapping for I expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
. This will match one of the keys in the hash returned by L. The L function can be used to get all the synonyms of the bidi type name. =item B is empty if I has no decomposition; or is one or more codes (separated by spaces) that, taken in order, represent a decomposition for I. Each has at least four hexdigits. The codes may be preceded by a word enclosed in angle brackets, then a space, like CcompatE >, giving the type of decomposition This decomposition may be an intermediate one whose components are also decomposable. Use L to get the final decomposition in one step. =item B if I represents a decimal digit this is its integer numeric value =item B if I represents some other digit-like number, this is its integer numeric value =item B if I represents a whole or rational number, this is its numeric value. Rational values are expressed as a string like C<1/4>. =item B C or C designating if I is mirrored in bidirectional text =item B name of I in the Unicode 1.0 standard if one existed for this code point and is different from the current name =item B As of Unicode 6.0, this is always empty. =item B is, if non-empty, the uppercase mapping for I expressed as at least four hexdigits. This indicates that the full uppercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple uppercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the lowercase mapping for I expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
has no decomposition; or is one or more codes (separated by spaces) that, taken in order, represent a decomposition for I. Each has at least four hexdigits. The codes may be preceded by a word enclosed in angle brackets, then a space, like CcompatE >, giving the type of decomposition This decomposition may be an intermediate one whose components are also decomposable. Use L to get the final decomposition in one step. =item B if I represents a decimal digit this is its integer numeric value =item B if I represents some other digit-like number, this is its integer numeric value =item B if I represents a whole or rational number, this is its numeric value. Rational values are expressed as a string like C<1/4>. =item B C or C designating if I is mirrored in bidirectional text =item B name of I in the Unicode 1.0 standard if one existed for this code point and is different from the current name =item B As of Unicode 6.0, this is always empty. =item B is, if non-empty, the uppercase mapping for I expressed as at least four hexdigits. This indicates that the full uppercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple uppercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the lowercase mapping for I expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
. Each has at least four hexdigits. The codes may be preceded by a word enclosed in angle brackets, then a space, like CcompatE >, giving the type of decomposition This decomposition may be an intermediate one whose components are also decomposable. Use L to get the final decomposition in one step. =item B if I represents a decimal digit this is its integer numeric value =item B if I represents some other digit-like number, this is its integer numeric value =item B if I represents a whole or rational number, this is its numeric value. Rational values are expressed as a string like C<1/4>. =item B C or C designating if I is mirrored in bidirectional text =item B name of I in the Unicode 1.0 standard if one existed for this code point and is different from the current name =item B As of Unicode 6.0, this is always empty. =item B is, if non-empty, the uppercase mapping for I expressed as at least four hexdigits. This indicates that the full uppercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple uppercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the lowercase mapping for I expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
represents a decimal digit this is its integer numeric value =item B if I represents some other digit-like number, this is its integer numeric value =item B if I represents a whole or rational number, this is its numeric value. Rational values are expressed as a string like C<1/4>. =item B C or C designating if I is mirrored in bidirectional text =item B name of I in the Unicode 1.0 standard if one existed for this code point and is different from the current name =item B As of Unicode 6.0, this is always empty. =item B is, if non-empty, the uppercase mapping for I expressed as at least four hexdigits. This indicates that the full uppercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple uppercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the lowercase mapping for I expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
represents some other digit-like number, this is its integer numeric value =item B if I represents a whole or rational number, this is its numeric value. Rational values are expressed as a string like C<1/4>. =item B C or C designating if I is mirrored in bidirectional text =item B name of I in the Unicode 1.0 standard if one existed for this code point and is different from the current name =item B As of Unicode 6.0, this is always empty. =item B is, if non-empty, the uppercase mapping for I expressed as at least four hexdigits. This indicates that the full uppercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple uppercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the lowercase mapping for I expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
represents a whole or rational number, this is its numeric value. Rational values are expressed as a string like C<1/4>. =item B C or C designating if I is mirrored in bidirectional text =item B name of I in the Unicode 1.0 standard if one existed for this code point and is different from the current name =item B As of Unicode 6.0, this is always empty. =item B is, if non-empty, the uppercase mapping for I expressed as at least four hexdigits. This indicates that the full uppercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple uppercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the lowercase mapping for I expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
is mirrored in bidirectional text =item B name of I in the Unicode 1.0 standard if one existed for this code point and is different from the current name =item B As of Unicode 6.0, this is always empty. =item B is, if non-empty, the uppercase mapping for I expressed as at least four hexdigits. This indicates that the full uppercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple uppercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the lowercase mapping for I expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
in the Unicode 1.0 standard if one existed for this code point and is different from the current name =item B As of Unicode 6.0, this is always empty. =item B is, if non-empty, the uppercase mapping for I expressed as at least four hexdigits. This indicates that the full uppercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple uppercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the lowercase mapping for I expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
expressed as at least four hexdigits. This indicates that the full uppercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple uppercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the lowercase mapping for I expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the lowercase mapping for I expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
expressed as at least four hexdigits. This indicates that the full lowercase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple lowercase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
itself; you'll need some other means, (like L or L to get the full mapping. =item B is, if non-empty, the titlecase mapping for I expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
expressed as at least four hexdigits. This indicates that the full titlecase mapping is a single character, and is identical to the simple (single-character only) mapping. When this field is empty, it means that the simple titlecase mapping is I itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
itself; you'll need some other means, (like L or L to get the full mapping. =item B the block I belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B
belongs to (used in C<\p{Blk=...}>). The L function can be used to get all the synonyms of the block name. See L. =item B