#!/usr/bin/perl use strict; use warnings; my $VERSION = 0.01; =head1 NAME test-unicodeprops.pl -- a simple script to get unicode properties of characters. =head1 DESCRIPTION This script lets you to test which characters belongs to available Perl unicode properties. =head1 COREQUISITES v5.8.0 =pod OSNAMES any =pod SCRIPT CATEGORIES Unicode =cut my @propnames = ( 'IsCc', # Other, Control 'IsCf', # Other, Format 'IsCn', # Other, Not assigned 'IsCo', # Other, Private Use 'IsCs', # Other, Surrogate 'IsLl', # Letter, Lowercase 'IsLm', # Letter, Modifier 'IsLo', # Letter, Other 'IsLt', # Letter, Titlecase 'IsLu', # Letter, Uppercase 'IsMc', # Mark, Combining 'IsMe', # Mark, Enclosing 'IsMn', # Mark, Nonspacing 'IsNd', # Number, Decimal digit 'IsNl', # Number, Letter 'IsNo', # Number, Other 'IsPc', # Punctuation, Connector 'IsPd', # Punctuation, Dash 'IsPe', # Punctuation, Close 'IsPf', # Punctuation, Final quote 'IsPi', # Punctuation, Initial quote 'IsPo', # Punctuation, Other 'IsPs', # Punctuation, Open 'IsSc', # Symbol, Currency 'IsSk', # Symbol, Modifier 'IsSm', # Symbol, Math 'IsSo', # Symbol, Other 'IsZl', # Separator, Line 'IsZp', # Separator, Paragraph 'IsZs', # Separator, Space ); push @propnames, qw( InArabic InCyrillic InHangulJamo InMalayalam InSyriac InArmenian InDevanagari InHebrew InMongolian InTamil InArrows InDingbats InHiragana InMyanmar InTelugu InBasicLatin InEthiopic InKanbun InOgham InThaana InBengali InGeorgian InKannada InOriya InThai InBopomofo InGreek InKatakana InRunic InTibetan InBoxDrawing InGujarati InKhmer InSinhala InYiRadicals InCherokee InGurmukhi InLao InSpecials InYiSyllables InAlphabeticPresentationForms InHalfwidthandFullwidthForms InArabicPresentationForms-A InHangulCompatibilityJamo InArabicPresentationForms-B InHangulSyllables InBlockElements InHighPrivateUseSurrogates InBopomofoExtended InHighSurrogates InBraillePatterns InIdeographicDescriptionCharacters InCJKCompatibility InIPAExtensions InCJKCompatibilityForms InKangxiRadicals InCJKCompatibilityIdeographs InLatin-1Supplement InCJKRadicalsSupplement InLatinExtended-A InCJKSymbolsandPunctuation InLatinExtended-B InCJKUnifiedIdeographs InLatinExtendedAdditional InCJKUnifiedIdeographsExtensionA InLetterlikeSymbols InCombiningDiacriticalMarks InLowSurrogates InCombiningHalfMarks InMathematicalOperators InMiscellaneousSymbols InControlPictures InMiscellaneousTechnical InCurrencySymbols InNumberForms InEnclosedAlphanumerics InOpticalCharacterRecognition InEnclosedCJKLettersandMonths InGeneralPunctuation InSuperscriptsandSubscripts InGeometricShapes InSmallFormVariants InGreekExtended InSpacingModifierLetters ) if 0; push @propnames, qw( ASCIIHexDigit BidiControl Dash Deprecated Diacritic Extender HexDigit Hyphen Ideographic IDSBinaryOperator IDSTrinaryOperator JoinControl LogicalOrderException NoncharacterCodePoint OtherAlphabetic OtherDefaultIgnorableCodePoint OtherGraphemeExtend OtherLowercase OtherMath OtherUppercase QuotationMark Radical SoftDotted TerminalPunctuation UnifiedIdeograph WhiteSpace ); my @asciichars = map { chr } 0..127; for my $prop_name (@propnames) { print "$prop_name: "; for my $chr (@asciichars) { if ($chr =~ m/^\p{$prop_name}$/) { print make_chr_printable( $chr ).' '; } } print "\n"; } sub make_chr_printable { my ($chr) = @_; my $code = ord( $chr ); if ($code <= 32) { return "#$code;" } return $chr; }