|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectorg.joni.encoding.Encoding
public abstract class Encoding
Field Summary | |
---|---|
protected int |
hashCode
|
protected byte[] |
name
|
static byte |
NEW_LINE
|
Constructor Summary | |
---|---|
Encoding()
|
Method Summary | |
---|---|
abstract void |
applyAllCaseFold(int flag,
ApplyAllCaseFoldFunction fun,
java.lang.Object arg)
Expand case folds given a character class (used for case insensitive matching) |
static int |
asciiToLower(int c)
|
static int |
asciiToUpper(int c)
|
abstract CaseFoldCodeItem[] |
caseFoldCodesByString(int flag,
byte[] bytes,
int p,
int end)
Expand AST string nodes into their folded alternatives (look at: Analyser.expandCaseFoldString )
Oniguruma equivalent: get_case_fold_codes_by_str |
abstract int |
codeToMbc(int code,
byte[] bytes,
int p)
Extracts code point into it's multibyte representation |
abstract int |
codeToMbcLength(int code)
Returns character length given a code point Oniguruma equivalent: code_to_mbclen |
abstract int[] |
ctypeCodeRange(int ctype,
IntHolder sbOut)
Returns code range for a given character type Oniguruma equivalent: get_ctype_code_range |
static int |
digitVal(int code)
|
boolean |
equals(java.lang.Object other)
|
byte[] |
getName()
|
int |
hashCode()
|
boolean |
isAlnum(int code)
|
boolean |
isAlpha(int code)
|
static boolean |
isAscii(int code)
|
boolean |
isBlank(int code)
|
boolean |
isCntrl(int code)
|
abstract boolean |
isCodeCType(int code,
int ctype)
Perform a check whether given code is of given character type (e.g. |
boolean |
isDigit(int code)
|
abstract boolean |
isFixedWidth()
|
boolean |
isGraph(int code)
|
boolean |
isLower(int code)
|
static boolean |
isMbcAscii(byte b)
|
boolean |
isMbcCrnl(byte[] bytes,
int p,
int end)
|
boolean |
isMbcHead(byte b)
|
boolean |
isMbcWord(byte[] bytes,
int p,
int end)
|
abstract boolean |
isNewLine(byte[] bytes,
int p,
int end)
Returns true if bytes[p] is a head of a new line character
Oniguruma equivalent: is_mbc_newline |
boolean |
isNewLine(int code)
|
boolean |
isPrint(int code)
|
boolean |
isPunct(int code)
|
abstract boolean |
isReverseMatchAllowed(byte[] bytes,
int p,
int end)
Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm Oniguruma equivalent: is_allowed_reverse_match |
boolean |
isSbWord(int code)
|
abstract boolean |
isSingleByte()
|
boolean |
isSpace(int code)
|
boolean |
isUpper(int code)
|
boolean |
isWord(int code)
|
static boolean |
isWordGraphPrint(int ctype)
|
boolean |
isXDigit(int code)
|
abstract int |
leftAdjustCharHead(byte[] bytes,
int p,
int end)
Seeks the previous character head in a stream Oniguruma equivalent: left_adjust_char_head |
abstract int |
length(byte c)
Returns character length given the character head returns 1 for singlebyte encodings or performs direct length table lookup for multibyte ones. |
abstract int |
maxLength()
Returns maximum character byte length that can appear in an encoding Oniguruma equivalent: max_enc_len |
int |
maxLengthDistance()
|
abstract int |
mbcCaseFold(int flag,
byte[] bytes,
IntHolder pp,
int end,
byte[] to)
Performs case folding for a character at bytes[pp.value] |
int |
mbcodeStartPosition()
|
abstract int |
mbcToCode(byte[] bytes,
int p,
int end)
Returns code point for a character Oniguruma equivalent: mbc_to_code |
abstract int |
minLength()
Returns minimum character byte length that can appear in an encoding Oniguruma equivalent: min_enc_len |
static int |
odigitVal(int code)
|
int |
prevCharHead(byte[] bytes,
int p,
int end)
|
abstract int |
propertyNameToCType(byte[] bytes,
int p,
int end)
Returns character type given character type name (used when e.g. |
int |
rightAdjustCharHead(byte[] bytes,
int p,
int end)
|
int |
rightAdjustCharHeadWithPrev(byte[] bytes,
int p,
int end,
IntHolder prev)
|
int |
step(byte[] bytes,
int p,
int end,
int n)
|
int |
stepBack(byte[] bytes,
int p,
int end,
int n)
|
int |
strByteLengthNull(byte[] bytes,
int p)
|
int |
strLength(byte[] bytes,
int p,
int end)
|
int |
strLengthNull(byte[] bytes,
int p)
|
int |
strNCmp(byte[] bytes,
int p,
int end,
byte[] ascii,
int asciiP,
int n)
|
byte[] |
toLowerCaseTable()
Returns lower case table if it's safe to use it directly, otherwise null
Used for fast case insensitive matching for some singlebyte encodings |
abstract java.lang.String |
toString()
|
int |
xdigitVal(int code)
|
Methods inherited from class java.lang.Object |
---|
clone, finalize, getClass, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
protected byte[] name
protected int hashCode
public static final byte NEW_LINE
Constructor Detail |
---|
public Encoding()
Method Detail |
---|
public abstract java.lang.String toString()
toString
in class java.lang.Object
public final boolean equals(java.lang.Object other)
equals
in class java.lang.Object
public final int hashCode()
hashCode
in class java.lang.Object
public final byte[] getName()
public abstract int length(byte c)
1
for singlebyte encodings or performs direct length table lookup for multibyte ones.
c
- Character head
Oniguruma equivalent: mbc_enc_len
public abstract int maxLength()
max_enc_len
public final int maxLengthDistance()
public abstract int minLength()
min_enc_len
public abstract boolean isNewLine(byte[] bytes, int p, int end)
bytes[p]
is a head of a new line character
Oniguruma equivalent: is_mbc_newline
public abstract int mbcToCode(byte[] bytes, int p, int end)
mbc_to_code
public abstract int codeToMbcLength(int code)
code_to_mbclen
public abstract int codeToMbc(int code, byte[] bytes, int p)
code_to_mbc
public abstract int mbcCaseFold(int flag, byte[] bytes, IntHolder pp, int end, byte[] to)
bytes[pp.value]
flag
- case fold flagpp
- an IntHolder
that points at character headto
- a buffer where to extract case folded character
Oniguruma equivalent: mbc_case_fold
public byte[] toLowerCaseTable()
null
Used for fast case insensitive matching for some singlebyte encodings
public abstract void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, java.lang.Object arg)
flag
- case fold flagfun
- case folding functor (look at: ApplyCaseFold
)arg
- case folding functor argument (look at: ApplyCaseFoldArg
)
Oniguruma equivalent: apply_all_case_fold
public abstract CaseFoldCodeItem[] caseFoldCodesByString(int flag, byte[] bytes, int p, int end)
Analyser.expandCaseFoldString
)
Oniguruma equivalent: get_case_fold_codes_by_str
public abstract int propertyNameToCType(byte[] bytes, int p, int end)
property_name_to_ctype
public abstract boolean isCodeCType(int code, int ctype)
code
- a code point of a characterctype
- a character type to check against
Oniguruma equivalent: is_code_ctype
public abstract int[] ctypeCodeRange(int ctype, IntHolder sbOut)
get_ctype_code_range
public abstract int leftAdjustCharHead(byte[] bytes, int p, int end)
left_adjust_char_head
public abstract boolean isReverseMatchAllowed(byte[] bytes, int p, int end)
is_allowed_reverse_match
public final int rightAdjustCharHead(byte[] bytes, int p, int end)
public final int rightAdjustCharHeadWithPrev(byte[] bytes, int p, int end, IntHolder prev)
public final int prevCharHead(byte[] bytes, int p, int end)
public final int stepBack(byte[] bytes, int p, int end, int n)
public final int step(byte[] bytes, int p, int end, int n)
public int strLength(byte[] bytes, int p, int end)
public final int strLengthNull(byte[] bytes, int p)
public final int strByteLengthNull(byte[] bytes, int p)
public final int strNCmp(byte[] bytes, int p, int end, byte[] ascii, int asciiP, int n)
public final boolean isNewLine(int code)
public final boolean isGraph(int code)
public final boolean isPrint(int code)
public final boolean isAlnum(int code)
public final boolean isAlpha(int code)
public final boolean isLower(int code)
public final boolean isUpper(int code)
public final boolean isCntrl(int code)
public final boolean isPunct(int code)
public final boolean isSpace(int code)
public final boolean isBlank(int code)
public final boolean isDigit(int code)
public final boolean isXDigit(int code)
public final boolean isWord(int code)
public final boolean isMbcWord(byte[] bytes, int p, int end)
public final boolean isSbWord(int code)
public final boolean isMbcHead(byte b)
public boolean isMbcCrnl(byte[] bytes, int p, int end)
public static int digitVal(int code)
public static int odigitVal(int code)
public final int xdigitVal(int code)
public static boolean isMbcAscii(byte b)
public static boolean isAscii(int code)
public static int asciiToLower(int c)
public static int asciiToUpper(int c)
public static boolean isWordGraphPrint(int ctype)
public final int mbcodeStartPosition()
public abstract boolean isSingleByte()
public abstract boolean isFixedWidth()
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |