org.joni.encoding
Class Encoding

java.lang.Object
  extended by org.joni.encoding.Encoding
Direct Known Subclasses:
MultiByteEncoding, SingleByteEncoding

public abstract class Encoding
extends java.lang.Object


Field Summary
protected  int hashCode
           
protected  byte[] name
           
static byte NEW_LINE
           
 
Constructor Summary
Encoding()
           
 
Method Summary
abstract  void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, java.lang.Object arg)
          Expand case folds given a character class (used for case insensitive matching)
static int asciiToLower(int c)
           
static int asciiToUpper(int c)
           
abstract  CaseFoldCodeItem[] caseFoldCodesByString(int flag, byte[] bytes, int p, int end)
          Expand AST string nodes into their folded alternatives (look at: Analyser.expandCaseFoldString) Oniguruma equivalent: get_case_fold_codes_by_str
abstract  int codeToMbc(int code, byte[] bytes, int p)
          Extracts code point into it's multibyte representation
abstract  int codeToMbcLength(int code)
          Returns character length given a code point Oniguruma equivalent: code_to_mbclen
abstract  int[] ctypeCodeRange(int ctype, IntHolder sbOut)
          Returns code range for a given character type Oniguruma equivalent: get_ctype_code_range
static int digitVal(int code)
           
 boolean equals(java.lang.Object other)
           
 byte[] getName()
           
 int hashCode()
           
 boolean isAlnum(int code)
           
 boolean isAlpha(int code)
           
static boolean isAscii(int code)
           
 boolean isBlank(int code)
           
 boolean isCntrl(int code)
           
abstract  boolean isCodeCType(int code, int ctype)
          Perform a check whether given code is of given character type (e.g.
 boolean isDigit(int code)
           
abstract  boolean isFixedWidth()
           
 boolean isGraph(int code)
           
 boolean isLower(int code)
           
static boolean isMbcAscii(byte b)
           
 boolean isMbcCrnl(byte[] bytes, int p, int end)
           
 boolean isMbcHead(byte b)
           
 boolean isMbcWord(byte[] bytes, int p, int end)
           
abstract  boolean isNewLine(byte[] bytes, int p, int end)
          Returns true if bytes[p] is a head of a new line character Oniguruma equivalent: is_mbc_newline
 boolean isNewLine(int code)
           
 boolean isPrint(int code)
           
 boolean isPunct(int code)
           
abstract  boolean isReverseMatchAllowed(byte[] bytes, int p, int end)
          Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm Oniguruma equivalent: is_allowed_reverse_match
 boolean isSbWord(int code)
           
abstract  boolean isSingleByte()
           
 boolean isSpace(int code)
           
 boolean isUpper(int code)
           
 boolean isWord(int code)
           
static boolean isWordGraphPrint(int ctype)
           
 boolean isXDigit(int code)
           
abstract  int leftAdjustCharHead(byte[] bytes, int p, int end)
          Seeks the previous character head in a stream Oniguruma equivalent: left_adjust_char_head
abstract  int length(byte c)
          Returns character length given the character head returns 1 for singlebyte encodings or performs direct length table lookup for multibyte ones.
abstract  int maxLength()
          Returns maximum character byte length that can appear in an encoding Oniguruma equivalent: max_enc_len
 int maxLengthDistance()
           
abstract  int mbcCaseFold(int flag, byte[] bytes, IntHolder pp, int end, byte[] to)
          Performs case folding for a character at bytes[pp.value]
 int mbcodeStartPosition()
           
abstract  int mbcToCode(byte[] bytes, int p, int end)
          Returns code point for a character Oniguruma equivalent: mbc_to_code
abstract  int minLength()
          Returns minimum character byte length that can appear in an encoding Oniguruma equivalent: min_enc_len
static int odigitVal(int code)
           
 int prevCharHead(byte[] bytes, int p, int end)
           
abstract  int propertyNameToCType(byte[] bytes, int p, int end)
          Returns character type given character type name (used when e.g.
 int rightAdjustCharHead(byte[] bytes, int p, int end)
           
 int rightAdjustCharHeadWithPrev(byte[] bytes, int p, int end, IntHolder prev)
           
 int step(byte[] bytes, int p, int end, int n)
           
 int stepBack(byte[] bytes, int p, int end, int n)
           
 int strByteLengthNull(byte[] bytes, int p)
           
 int strLength(byte[] bytes, int p, int end)
           
 int strLengthNull(byte[] bytes, int p)
           
 int strNCmp(byte[] bytes, int p, int end, byte[] ascii, int asciiP, int n)
           
 byte[] toLowerCaseTable()
          Returns lower case table if it's safe to use it directly, otherwise null Used for fast case insensitive matching for some singlebyte encodings
abstract  java.lang.String toString()
           
 int xdigitVal(int code)
           
 
Methods inherited from class java.lang.Object
clone, finalize, getClass, notify, notifyAll, wait, wait, wait
 

Field Detail

name

protected byte[] name

hashCode

protected int hashCode

NEW_LINE

public static final byte NEW_LINE
See Also:
Constant Field Values
Constructor Detail

Encoding

public Encoding()
Method Detail

toString

public abstract java.lang.String toString()
Overrides:
toString in class java.lang.Object

equals

public final boolean equals(java.lang.Object other)
Overrides:
equals in class java.lang.Object

hashCode

public final int hashCode()
Overrides:
hashCode in class java.lang.Object

getName

public final byte[] getName()

length

public abstract int length(byte c)
Returns character length given the character head returns 1 for singlebyte encodings or performs direct length table lookup for multibyte ones.

Parameters:
c - Character head Oniguruma equivalent: mbc_enc_len

maxLength

public abstract int maxLength()
Returns maximum character byte length that can appear in an encoding Oniguruma equivalent: max_enc_len


maxLengthDistance

public final int maxLengthDistance()

minLength

public abstract int minLength()
Returns minimum character byte length that can appear in an encoding Oniguruma equivalent: min_enc_len


isNewLine

public abstract boolean isNewLine(byte[] bytes,
                                  int p,
                                  int end)
Returns true if bytes[p] is a head of a new line character Oniguruma equivalent: is_mbc_newline


mbcToCode

public abstract int mbcToCode(byte[] bytes,
                              int p,
                              int end)
Returns code point for a character Oniguruma equivalent: mbc_to_code


codeToMbcLength

public abstract int codeToMbcLength(int code)
Returns character length given a code point Oniguruma equivalent: code_to_mbclen


codeToMbc

public abstract int codeToMbc(int code,
                              byte[] bytes,
                              int p)
Extracts code point into it's multibyte representation

Returns:
character length for the given code point Oniguruma equivalent: code_to_mbc

mbcCaseFold

public abstract int mbcCaseFold(int flag,
                                byte[] bytes,
                                IntHolder pp,
                                int end,
                                byte[] to)
Performs case folding for a character at bytes[pp.value]

Parameters:
flag - case fold flag
pp - an IntHolder that points at character head
to - a buffer where to extract case folded character Oniguruma equivalent: mbc_case_fold

toLowerCaseTable

public byte[] toLowerCaseTable()
Returns lower case table if it's safe to use it directly, otherwise null Used for fast case insensitive matching for some singlebyte encodings

Returns:
lower case table

applyAllCaseFold

public abstract void applyAllCaseFold(int flag,
                                      ApplyAllCaseFoldFunction fun,
                                      java.lang.Object arg)
Expand case folds given a character class (used for case insensitive matching)

Parameters:
flag - case fold flag
fun - case folding functor (look at: ApplyCaseFold)
arg - case folding functor argument (look at: ApplyCaseFoldArg) Oniguruma equivalent: apply_all_case_fold

caseFoldCodesByString

public abstract CaseFoldCodeItem[] caseFoldCodesByString(int flag,
                                                         byte[] bytes,
                                                         int p,
                                                         int end)
Expand AST string nodes into their folded alternatives (look at: Analyser.expandCaseFoldString) Oniguruma equivalent: get_case_fold_codes_by_str


propertyNameToCType

public abstract int propertyNameToCType(byte[] bytes,
                                        int p,
                                        int end)
Returns character type given character type name (used when e.g. \p{Alpha}) Oniguruma equivalent: property_name_to_ctype


isCodeCType

public abstract boolean isCodeCType(int code,
                                    int ctype)
Perform a check whether given code is of given character type (e.g. used by isWord(someByte) and similar methods)

Parameters:
code - a code point of a character
ctype - a character type to check against Oniguruma equivalent: is_code_ctype

ctypeCodeRange

public abstract int[] ctypeCodeRange(int ctype,
                                     IntHolder sbOut)
Returns code range for a given character type Oniguruma equivalent: get_ctype_code_range


leftAdjustCharHead

public abstract int leftAdjustCharHead(byte[] bytes,
                                       int p,
                                       int end)
Seeks the previous character head in a stream Oniguruma equivalent: left_adjust_char_head


isReverseMatchAllowed

public abstract boolean isReverseMatchAllowed(byte[] bytes,
                                              int p,
                                              int end)
Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm Oniguruma equivalent: is_allowed_reverse_match


rightAdjustCharHead

public final int rightAdjustCharHead(byte[] bytes,
                                     int p,
                                     int end)

rightAdjustCharHeadWithPrev

public final int rightAdjustCharHeadWithPrev(byte[] bytes,
                                             int p,
                                             int end,
                                             IntHolder prev)

prevCharHead

public final int prevCharHead(byte[] bytes,
                              int p,
                              int end)

stepBack

public final int stepBack(byte[] bytes,
                          int p,
                          int end,
                          int n)

step

public final int step(byte[] bytes,
                      int p,
                      int end,
                      int n)

strLength

public int strLength(byte[] bytes,
                     int p,
                     int end)

strLengthNull

public final int strLengthNull(byte[] bytes,
                               int p)

strByteLengthNull

public final int strByteLengthNull(byte[] bytes,
                                   int p)

strNCmp

public final int strNCmp(byte[] bytes,
                         int p,
                         int end,
                         byte[] ascii,
                         int asciiP,
                         int n)

isNewLine

public final boolean isNewLine(int code)

isGraph

public final boolean isGraph(int code)

isPrint

public final boolean isPrint(int code)

isAlnum

public final boolean isAlnum(int code)

isAlpha

public final boolean isAlpha(int code)

isLower

public final boolean isLower(int code)

isUpper

public final boolean isUpper(int code)

isCntrl

public final boolean isCntrl(int code)

isPunct

public final boolean isPunct(int code)

isSpace

public final boolean isSpace(int code)

isBlank

public final boolean isBlank(int code)

isDigit

public final boolean isDigit(int code)

isXDigit

public final boolean isXDigit(int code)

isWord

public final boolean isWord(int code)

isMbcWord

public final boolean isMbcWord(byte[] bytes,
                               int p,
                               int end)

isSbWord

public final boolean isSbWord(int code)

isMbcHead

public final boolean isMbcHead(byte b)

isMbcCrnl

public boolean isMbcCrnl(byte[] bytes,
                         int p,
                         int end)

digitVal

public static int digitVal(int code)

odigitVal

public static int odigitVal(int code)

xdigitVal

public final int xdigitVal(int code)

isMbcAscii

public static boolean isMbcAscii(byte b)

isAscii

public static boolean isAscii(int code)

asciiToLower

public static int asciiToLower(int c)

asciiToUpper

public static int asciiToUpper(int c)

isWordGraphPrint

public static boolean isWordGraphPrint(int ctype)

mbcodeStartPosition

public final int mbcodeStartPosition()

isSingleByte

public abstract boolean isSingleByte()

isFixedWidth

public abstract boolean isFixedWidth()


Copyright © {inceptionYear}-2008 null. All Rights Reserved.