/*
 * Decompiled with CFR 0.152.
 */
package org.lionsoul.jcseg.segmenter;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.lionsoul.jcseg.IChunk;
import org.lionsoul.jcseg.ISegment;
import org.lionsoul.jcseg.IWord;
import org.lionsoul.jcseg.dic.ADictionary;
import org.lionsoul.jcseg.segmenter.SegKit;
import org.lionsoul.jcseg.segmenter.SegmenterConfig;
import org.lionsoul.jcseg.segmenter.Word;
import org.lionsoul.jcseg.util.IPushbackReader;
import org.lionsoul.jcseg.util.IStringBuffer;
import org.lionsoul.jcseg.util.IntArrayList;
import org.lionsoul.jcseg.util.NumericUtil;
import org.lionsoul.jcseg.util.StringUtil;

public abstract class Segmenter
implements ISegment {
    protected int idx;
    protected IPushbackReader reader = null;
    protected final LinkedList<IWord> wordPool;
    protected final LinkedList<IWord> subWordPool;
    protected final IStringBuffer isb;
    protected final IntArrayList ialist;
    protected String behindLatin = null;
    protected int ctrlMask = 0;
    public final ADictionary dic;
    public final SegmenterConfig config;

    public Segmenter(SegmenterConfig config, ADictionary dic) {
        this.config = config;
        this.dic = dic;
        this.wordPool = new LinkedList();
        this.subWordPool = new LinkedList();
        this.isb = new IStringBuffer(64);
        this.ialist = new IntArrayList(15);
    }

    @Override
    public void reset(Reader input) throws IOException {
        if (input != null) {
            this.reader = new IPushbackReader(new BufferedReader(input));
        }
        this.idx = -1;
    }

    protected int readNext() throws IOException {
        int c = this.reader.read();
        if (c != -1) {
            ++this.idx;
        }
        return c;
    }

    protected void pushBack(int data) throws IOException {
        this.reader.unread(data);
        --this.idx;
    }

    protected void pushBack(String str) {
        char[] chars = str.toCharArray();
        for (int j = chars.length - 1; j >= 0; --j) {
            this.reader.unread(chars[j]);
        }
        this.idx -= chars.length;
    }

    @Override
    public int getStreamPosition() {
        return this.idx + 1;
    }

    public ADictionary getDict() {
        return this.dic;
    }

    public SegmenterConfig getConfig() {
        return this.config;
    }

    @Override
    public IWord next() throws IOException {
        int c;
        if (this.wordPool.size() > 0) {
            return this.wordPool.remove();
        }
        IWord word = null;
        while ((c = this.readNext()) != -1) {
            String str;
            String val;
            if (StringUtil.isWhitespace(c)) continue;
            int pos = this.idx;
            if (StringUtil.isCJKChar(c)) {
                this.behindLatin = null;
                word = this.getNextCJKWord(c, pos);
                if (this.behindLatin != null) {
                    this.pushBack(this.behindLatin);
                }
            } else if (StringUtil.isEnChar(c)) {
                word = this.getNextLatinWord(c, pos);
            } else if (this.config.PPT_MAX_LENGTH > 0 && StringUtil.isPairPunctuation((char)c)) {
                word = this.getNextPunctuationPairWord(c, pos);
            } else if (StringUtil.isLetterNumber(c)) {
                val = this.nextLetterNumber(c);
                if (this.config.CLEAR_STOPWORD && this.dic.match(7, val)) continue;
                word = new Word(val, 7);
                word.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                word.setPosition(pos);
            } else if (StringUtil.isOtherNumber(c)) {
                val = this.nextOtherNumber(c);
                if (this.config.CLEAR_STOPWORD && this.dic.match(7, val)) continue;
                word = new Word(val, 7);
                word.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                word.setPosition(pos);
            } else if (StringUtil.isCnPunctuation(c)) {
                str = String.valueOf((char)c);
                if (this.config.CLEAR_STOPWORD && this.dic.match(7, str)) continue;
                word = new Word(str, 10);
                word.setPartSpeechForNull(IWord.PUNCTUATION);
                word.setPosition(pos);
            } else if (this.config.KEEP_UNREG_WORDS) {
                str = String.valueOf((char)c);
                if (this.config.CLEAR_STOPWORD && this.dic.match(7, str)) continue;
                word = new Word(str, 11);
                word.setPartSpeechForNull(IWord.UNRECOGNIZE);
                word.setPosition(pos);
            }
            if (word != null) {
                return word;
            }
            if (this.wordPool.size() <= 0) continue;
            return this.wordPool.removeFirst();
        }
        return null;
    }

    protected IWord getNextCJKWord(int c, int pos) throws IOException {
        char[] chars = this.nextCJKSentence(c);
        int cjkidx = 0;
        IWord w = null;
        while (cjkidx < chars.length) {
            IChunk chunk;
            w = null;
            if (cjkidx + 1 < chars.length && NumericUtil.isCNNumeric(chars[cjkidx]) > -1) {
                String num = this.nextCNNumeric(chars, cjkidx);
                int NUMLEN = num.length();
                if ((this.ctrlMask & 2) != 0) {
                    w = new Word(num, 9);
                    w.setPosition(pos + cjkidx);
                    w.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                    this.wordPool.add(w);
                    if (this.config.CNFRA_TO_ARABIC) {
                        String[] split = num.split("\u5206\u4e4b");
                        Word wd = new Word(NumericUtil.cnNumericToArabic(split[1], true) + "/" + NumericUtil.cnNumericToArabic(split[0], true), 9);
                        wd.setPosition(w.getPosition());
                        wd.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                        this.wordPool.add(wd);
                    }
                } else if (NumericUtil.isCNNumeric(chars[cjkidx + 1]) > -1 || this.dic.match(1, chars[cjkidx + 1] + "")) {
                    StringBuilder sb = new StringBuilder();
                    String temp = null;
                    String ONUM = num;
                    sb.append(num);
                    boolean matched = false;
                    for (int j = num.length(); cjkidx + j < chars.length && j < this.config.MAX_LENGTH; ++j) {
                        sb.append(chars[cjkidx + j]);
                        temp = sb.toString();
                        if (!this.dic.match(0, temp)) continue;
                        w = this.dic.get(0, temp);
                        num = temp;
                        matched = true;
                    }
                    if (matched && num.length() - NUMLEN == 1 && this.dic.match(1, num.substring(NUMLEN))) {
                        num = ONUM;
                        matched = false;
                    }
                    Word wd = null;
                    if (!matched && this.config.CNNUM_TO_ARABIC) {
                        String arabic = NumericUtil.cnNumericToArabic(num, true) + "";
                        if (cjkidx + num.length() < chars.length && this.dic.match(1, chars[cjkidx + num.length()] + "")) {
                            char units = chars[cjkidx + num.length()];
                            num = num + units;
                            arabic = arabic + units;
                        }
                        wd = new Word(arabic, 9);
                        wd.setPosition(pos + cjkidx);
                        wd.setLength(num.length());
                        wd.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                    }
                    if (this.config.CLEAR_STOPWORD && this.dic.match(7, num)) {
                        cjkidx += num.length();
                        continue;
                    }
                    if (w == null) {
                        w = new Word(num, 9);
                        w.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                    } else {
                        w = w.clone();
                    }
                    w.setPosition(pos + cjkidx);
                    this.wordPool.add(w);
                    if (wd != null) {
                        this.wordPool.add(wd);
                    }
                }
                if (w != null) {
                    cjkidx += w.getLength();
                    this.appendCJKWordFeatures(w);
                    continue;
                }
            }
            String wps = (w = (chunk = this.getBestChunk(chars, cjkidx, this.config.MAX_LENGTH)).getWords()[0]).getPartSpeech() == null ? null : w.getPartSpeech()[0];
            int T = -1;
            if (this.config.I_CN_NAME && !"nr".equals(wps) && w.getLength() <= 2 && chunk.getWords().length > 1) {
                StringBuilder sb = new StringBuilder();
                sb.append(w.getValue());
                String str = null;
                if (this.dic.match(2, w.getValue()) && (str = this.findCHName(chars, 0, chunk)) != null) {
                    T = 3;
                    sb.append(str);
                } else if (this.dic.match(6, w.getValue()) && chunk.getWords()[1].getLength() <= 2 && this.dic.match(2, chunk.getWords()[1].getValue())) {
                    T = 4;
                    sb.append(chunk.getWords()[1].getValue());
                }
                if (T != -1) {
                    w = new Word(sb.toString(), T);
                    w.setPartSpeechForNull(IWord.NAME_POSPEECH);
                }
            }
            if (this.config.CLEAR_STOPWORD && this.dic.match(7, w.getValue())) {
                cjkidx += w.getLength();
                continue;
            }
            IWord ce = null;
            if ((this.ctrlMask & 1) != 0 && chars.length - cjkidx <= this.dic.mixPrefixLength && (ce = this.getNextMixedWord(chars, cjkidx)) != null) {
                T = -1;
            }
            if (ce == null) {
                if (T == -1) {
                    w = w.clone();
                }
            } else {
                w = ce.clone();
            }
            w.setPosition(pos + cjkidx);
            this.wordPool.add(w);
            cjkidx += w.getLength();
            if (T != -1) continue;
            this.appendCJKWordFeatures(w);
        }
        if (this.wordPool.size() == 0) {
            return null;
        }
        return this.wordPool.remove();
    }

    protected IWord getNextLatinWord(int c, int pos) throws IOException {
        if (StringUtil.isEnPunctuation(c)) {
            String str = String.valueOf((char)c);
            if (this.config.CLEAR_STOPWORD && this.dic.match(7, str)) {
                return null;
            }
            Word w = new Word(str, 10);
            w.setPosition(pos);
            w.setPartSpeechForNull(IWord.PUNCTUATION);
            return w;
        }
        IWord w = this.nextLatinWord(c, pos);
        w.setPosition(pos);
        if (this.config.CLEAR_STOPWORD && this.dic.match(7, w.getValue())) {
            w = null;
            return null;
        }
        if (!this.config.EN_SECOND_SEG || !this.enSecondSegFilter(w)) {
            this.appendCJKWordFeatures(w);
            return w;
        }
        this.subWordPool.clear();
        if ((this.ctrlMask & 4) != 0) {
            this.enSecondSeg(w, this.subWordPool);
        } else if (this.config.EN_WORD_SEG) {
            this.enWordSeg(w, this.subWordPool);
        } else {
            this.appendCJKWordFeatures(w);
            return w;
        }
        if (this.subWordPool.isEmpty()) {
            this.appendLatinWordFeatures(w);
            return w;
        }
        w = this.subWordPool.removeFirst();
        this.appendLatinWordFeatures(w);
        for (IWord sw : this.subWordPool) {
            this.wordPool.add(sw);
            this.appendLatinWordFeatures(sw);
        }
        return w;
    }

    protected IWord getNextMixedWord(char[] chars, int cjkidx) throws IOException {
        IStringBuffer buff = new IStringBuffer();
        buff.clear().append(chars, cjkidx);
        String tstring = buff.toString();
        if (!this.dic.match(8, tstring)) {
            return null;
        }
        if (this.behindLatin == null) {
            this.behindLatin = this.nextLatinString(this.readNext());
        }
        IWord wd = null;
        buff.append(this.behindLatin);
        tstring = buff.toString();
        if (this.dic.match(0, tstring)) {
            wd = this.dic.get(0, tstring);
        }
        if ((this.ctrlMask & 8) != 0 || this.dic.match(8, tstring)) {
            this.ialist.clear();
            int chr = -1;
            int mc = 0;
            for (int j = 0; j < this.dic.mixSuffixLength && (chr = this.readNext()) != -1; ++j) {
                buff.append((char)chr);
                this.ialist.add(chr);
                tstring = buff.toString();
                if (!this.dic.match(0, tstring)) continue;
                wd = this.dic.get(0, tstring);
                mc = j + 1;
            }
            for (int i = j - 1; i >= mc; --i) {
                this.pushBack(this.ialist.get(i));
            }
        }
        buff.clear();
        if (wd != null) {
            this.behindLatin = null;
        }
        return wd;
    }

    protected IWord getNextPunctuationPairWord(int c, int pos) throws IOException {
        Word w = null;
        Word w2 = null;
        String text = this.getPairPunctuationText(c);
        String str = String.valueOf((char)c);
        if (!this.config.CLEAR_STOPWORD || !this.dic.match(7, str)) {
            w = new Word(str, 10);
            w.setPartSpeechForNull(IWord.PUNCTUATION);
            w.setPosition(pos);
        }
        if (!(text == null || text.length() <= 0 || this.config.CLEAR_STOPWORD && this.dic.match(7, text))) {
            w2 = new Word(text, 0);
            w2.setPartSpeechForNull(IWord.PPT_POSPEECH);
            w2.setPosition(pos + 1);
            if (w == null) {
                w = w2;
            } else {
                this.wordPool.add(w2);
            }
        }
        if (w == null && w2 == null) {
            return null;
        }
        return w;
    }

    protected void appendCJKWordFeatures(IWord word) {
        if (this.config.APPEND_CJK_PINYIN && this.config.LOAD_CJK_PINYIN && word.getPinyin() != null) {
            SegKit.appendPinyin(this.config, this.wordPool, word);
        }
        if (this.config.APPEND_CJK_SYN && this.config.LOAD_CJK_SYN && word.getSyn() != null) {
            SegKit.appendSynonyms(this.config, this.wordPool, word);
        }
    }

    protected void appendLatinWordFeatures(IWord w) {
        IWord t;
        boolean append_pinyin;
        boolean append_syn = this.config.LOAD_CJK_SYN && this.config.APPEND_CJK_SYN;
        boolean bl = append_pinyin = this.config.LOAD_CJK_PINYIN && this.config.APPEND_CJK_PINYIN;
        IWord ew = append_syn && w.getSyn() == null && append_pinyin && w.getPinyin() == null ? ((t = this.dic.get(0, w.getValue())) == null ? w : t) : w;
        ew.setPosition(w.getPosition());
        if (append_syn && ew.getSyn() != null) {
            SegKit.appendSynonyms(this.config, this.wordPool, ew);
        }
        if (append_pinyin && ew.getPinyin() != null) {
            SegKit.appendPinyin(this.config, this.wordPool, ew);
        }
    }

    protected boolean enSecondSegFilter(IWord w) {
        return w.getType() != 2;
    }

    protected LinkedList<IWord> enSecondSeg(IWord w, LinkedList<IWord> wList) {
        char[] chars = w.getValue().toCharArray();
        int pos = w.getPosition();
        Word tw = null;
        String _str = null;
        if (wList == null) {
            wList = new LinkedList();
        }
        if (this.config.isKeepEnSecOriginalWord()) {
            wList.add(w);
        }
        int j = 0;
        while (j < chars.length) {
            int _ctype;
            int _TYPE = StringUtil.getEnCharType(chars[j]);
            if (_TYPE == 2) {
                tw = new Word(String.valueOf(chars[j]), 10);
                tw.setPartSpeechForNull(IWord.PUNCTUATION);
                tw.setPosition(pos + j);
                ++j;
                continue;
            }
            this.isb.clear().append(chars[j]);
            for (int i = j + 1; i < chars.length && (_ctype = StringUtil.getEnCharType(chars[i])) == _TYPE; ++i) {
                this.isb.append(chars[i]);
            }
            if (this.isb.length() >= this.config.EN_SEC_MIN_LEN) {
                _str = this.isb.toString();
                if (!this.config.CLEAR_STOPWORD || !this.dic.match(7, _str)) {
                    tw = new Word(_str, w.getType());
                    tw.setPartSpeechForNull(w.getPartSpeech());
                    tw.setPosition(pos + j);
                    if (this.config.EN_WORD_SEG && _TYPE == 0) {
                        this.enWordSeg(tw, wList);
                    } else {
                        wList.addLast(tw);
                    }
                }
            }
            j += this.isb.length();
        }
        chars = null;
        return wList;
    }

    protected LinkedList<IWord> enWordSeg(IWord w, LinkedList<IWord> wList) {
        char[] chars = w.getValue().toCharArray();
        IChunk chunk = null;
        IWord word = null;
        int pos = w.getPosition();
        for (int index = 0; index < chars.length; index += word.getValue().length()) {
            chunk = this.getBestChunk(chars, index, this.config.EN_MAX_LEN);
            word = chunk.getWords()[0].clone();
            word.setPosition(pos + index);
            wList.add(word);
        }
        return wList;
    }

    protected IWord[] getNextMatch(int maxLen, char[] chars, int index, List<IWord> wList) {
        String temp;
        IStringBuffer sb = new IStringBuffer(maxLen);
        sb.clear().append(chars[index]);
        if (wList == null) {
            wList = new ArrayList<IWord>(8);
        }
        if (this.dic.match(0, temp = sb.toString())) {
            wList.add(this.dic.get(0, temp));
        }
        String _key = null;
        for (int j = 1; j < maxLen && j + index < chars.length; ++j) {
            sb.append(chars[j + index]);
            _key = sb.toString();
            if (!this.dic.match(0, _key)) continue;
            wList.add(this.dic.get(0, _key));
        }
        if (wList.isEmpty()) {
            wList.add(new Word(temp, 17));
        }
        IWord[] words = new IWord[wList.size()];
        wList.toArray(words);
        wList.clear();
        return words;
    }

    protected String findCHName(char[] chars, int index, IChunk chunk) {
        StringBuilder isb = new StringBuilder();
        if (chunk.getWords().length == 2) {
            IWord w = chunk.getWords()[1];
            switch (w.getLength()) {
                case 1: {
                    if (this.dic.match(3, w.getValue())) {
                        isb.append(w.getValue());
                        return isb.toString();
                    }
                    return null;
                }
                case 2: 
                case 3: {
                    IWord iw;
                    String d1 = new String(w.getValue().charAt(0) + "");
                    String d2 = new String(w.getValue().charAt(1) + "");
                    if (this.dic.match(4, d1) && this.dic.match(5, d2)) {
                        isb.append(d1);
                        isb.append(d2);
                        return isb.toString();
                    }
                    if (this.dic.match(3, d1) && (iw = this.dic.get(0, d2)) != null && iw.getFrequency() >= this.config.NAME_SINGLE_THRESHOLD) {
                        isb.append(d1);
                        return isb.toString();
                    }
                    return null;
                }
            }
        } else {
            IWord w1 = chunk.getWords()[1];
            IWord w2 = chunk.getWords()[2];
            switch (w1.getLength()) {
                case 1: {
                    if (this.dic.match(4, w1.getValue())) {
                        if (w2.getLength() == 1) {
                            if (this.dic.match(5, w2.getValue())) {
                                isb.append(w1.getValue());
                                isb.append(w2.getValue());
                                return isb.toString();
                            }
                            if (this.dic.match(3, w1.getValue())) {
                                isb.append(w1.getValue());
                                return isb.toString();
                            }
                        } else {
                            String d1 = new String(w2.getValue().charAt(0) + "");
                            int index_ = index + chunk.getWords()[0].getLength() + 2;
                            IWord[] ws = this.getNextMatch(this.config.MAX_LENGTH, chars, index_, null);
                            if (this.dic.match(5, d1) && (ws.length > 1 || ws[0].getFrequency() >= this.config.NAME_SINGLE_THRESHOLD)) {
                                isb.append(w1.getValue());
                                isb.append(d1);
                                return isb.toString();
                            }
                            if (this.dic.match(3, w1.getValue())) {
                                isb.append(w1.getValue());
                                return isb.toString();
                            }
                        }
                    } else if (this.dic.match(3, w1.getValue())) {
                        isb.append(w1.getValue());
                        return isb.toString();
                    }
                    return null;
                }
                case 2: {
                    IWord iw;
                    String d1 = new String(w1.getValue().charAt(0) + "");
                    String d2 = new String(w1.getValue().charAt(1) + "");
                    if (this.dic.match(4, d1) && this.dic.match(5, d2)) {
                        isb.append(w1.getValue());
                        return isb.toString();
                    }
                    if (this.dic.match(3, d1) && (iw = this.dic.get(0, d2)) != null && iw.getFrequency() >= this.config.NAME_SINGLE_THRESHOLD) {
                        isb.append(d1);
                        return isb.toString();
                    }
                    return null;
                }
                case 3: {
                    String c1 = new String(w1.getValue().charAt(0) + "");
                    String c2 = new String(w1.getValue().charAt(1) + "");
                    IWord w3 = this.dic.get(0, w1.getValue().charAt(2) + "");
                    if (this.dic.match(4, c1) && this.dic.match(5, c2) && (w3 == null || w3.getFrequency() >= this.config.NAME_SINGLE_THRESHOLD)) {
                        isb.append(c1);
                        isb.append(c2);
                        return isb.toString();
                    }
                    return null;
                }
            }
        }
        return null;
    }

    protected char[] nextCJKSentence(int c) throws IOException {
        int ch;
        this.isb.clear();
        this.isb.append((char)c);
        this.ctrlMask &= 0xFFFFFFFE;
        while ((ch = this.readNext()) != -1) {
            if (StringUtil.isWhitespace(ch)) {
                this.pushBack(ch);
                break;
            }
            if (!StringUtil.isCJKChar(ch)) {
                this.pushBack(ch);
                if (!StringUtil.isEnLetter(ch) && !StringUtil.isEnNumeric(ch)) break;
                this.ctrlMask |= 1;
                break;
            }
            this.isb.append((char)ch);
        }
        return this.isb.toString().toCharArray();
    }

    protected IWord nextLatinWord(int c, int pos) throws IOException {
        boolean ssseg;
        int ch;
        this.isb.clear();
        if (c > 65280) {
            c -= 65248;
        }
        if (c >= 65 && c <= 90) {
            c += 32;
        }
        this.isb.append((char)c);
        boolean _check = false;
        boolean _wspace = false;
        int _ctype = 0;
        int tcount = 1;
        int _TYPE = StringUtil.getEnCharType(c);
        this.ctrlMask &= 0xFFFFFFFB;
        while ((ch = this.readNext()) != -1) {
            if (ch > 65280) {
                ch -= 65248;
            }
            if ((_ctype = StringUtil.getEnCharType(ch)) == 3) {
                _wspace = true;
                break;
            }
            if (_ctype == 2 && !this.config.isKeepPunctuation((char)ch)) {
                this.pushBack(ch);
                break;
            }
            if (_ctype == -1) {
                this.pushBack(ch);
                if (!StringUtil.isCJKChar(ch)) break;
                _check = true;
                break;
            }
            if (ch >= 65 && ch <= 90) {
                ch += 32;
            }
            this.isb.append((char)ch);
            if (_ctype != _TYPE) {
                ++tcount;
                _TYPE = _ctype;
            }
            if (this.isb.length() <= this.config.MAX_LATIN_LENGTH) continue;
        }
        String __str = this.isb.toString();
        IWord w = null;
        boolean chkunits = true;
        _ctype = 0;
        for (int t = this.isb.length() - 1; t > 0 && this.isb.charAt(t) != '%' && StringUtil.isEnPunctuation(this.isb.charAt(t)); --t) {
            if (this.dic.match(0, __str)) {
                w = this.dic.get(0, __str).clone();
                w.setType(2);
                w.setPartSpeechForNull(IWord.EN_POSPEECH);
                chkunits = false;
                break;
            }
            this.pushBack(this.isb.charAt(t));
            this.isb.deleteCharAt(t);
            __str = this.isb.toString();
            if (_ctype != 0) continue;
            --tcount;
            _ctype = 1;
        }
        boolean bl = ssseg = tcount > 1;
        if (ch == -1 || _wspace) {
            if (w == null) {
                w = this.wordNewOrClone(0, __str, 5);
                w.setPartSpeechForNull(IWord.EN_POSPEECH);
            }
            if (ssseg) {
                this.ctrlMask |= 4;
            }
            return w;
        }
        if (!_check) {
            if (chkunits && (StringUtil.isDigit(__str) || StringUtil.isDecimal(__str))) {
                ch = this.readNext();
                if (this.dic.match(1, (char)ch + "")) {
                    w = this.wordNewOrClone(0, new String(__str + (char)ch), 2);
                    w.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
                } else {
                    this.pushBack(ch);
                }
            }
            if (w == null) {
                w = this.wordNewOrClone(0, __str, 5);
                w.setPartSpeechForNull(IWord.EN_POSPEECH);
                if (ssseg) {
                    this.ctrlMask |= 4;
                }
            }
            return w;
        }
        IStringBuffer ibuffer = new IStringBuffer();
        ibuffer.append(__str);
        String _temp = null;
        int mc = 0;
        this.ialist.clear();
        for (int j = 0; j < this.dic.mixSuffixLength && ibuffer.length() < this.config.MAX_LENGTH && (ch = this.readNext()) != -1; ++j) {
            if (StringUtil.isWhitespace(ch)) {
                this.pushBack(ch);
                break;
            }
            ibuffer.append((char)ch);
            this.ialist.add(ch);
            _temp = ibuffer.toString();
            if (!this.dic.match(0, _temp)) continue;
            w = this.dic.get(0, _temp);
            w.setType(2);
            this.ctrlMask |= 4;
            mc = j + 1;
        }
        ibuffer.clear();
        ibuffer = null;
        for (int i = j - 1; i >= mc; --i) {
            this.pushBack(this.ialist.get(i));
        }
        if (chkunits && mc == 0 && (StringUtil.isDigit(__str) || StringUtil.isDecimal(__str))) {
            ch = this.readNext();
            if (this.dic.match(1, (char)ch + "")) {
                w = this.wordNewOrClone(0, new String(__str + (char)ch), 2);
                w.setPartSpeechForNull(IWord.NUMERIC_POSPEECH);
            } else {
                this.pushBack(ch);
            }
        }
        if (w == null) {
            w = this.wordNewOrClone(0, __str, 5);
            w.setPartSpeechForNull(IWord.EN_POSPEECH);
            if (ssseg) {
                this.ctrlMask |= 4;
            }
        } else if (mc > 0) {
            w = w.clone();
        }
        return w;
    }

    protected String nextLatinString(int c) throws IOException {
        int ch;
        this.isb.clear();
        if (c > 65280) {
            c -= 65248;
        }
        if (c >= 65 && c <= 90) {
            c += 32;
        }
        this.isb.append((char)c);
        int _ctype = 0;
        this.ctrlMask &= 0xFFFFFFF7;
        while ((ch = this.readNext()) != -1) {
            if (ch > 65280) {
                ch -= 65248;
            }
            if ((_ctype = StringUtil.getEnCharType(ch)) == 3) break;
            if (_ctype == 2 && !this.config.isKeepPunctuation((char)ch)) {
                this.pushBack(ch);
                break;
            }
            if (_ctype == -1) {
                this.pushBack(ch);
                if (!StringUtil.isCJKChar(ch)) break;
                this.ctrlMask |= 8;
                break;
            }
            if (ch >= 65 && ch <= 90) {
                ch += 32;
            }
            this.isb.append((char)ch);
            if (this.isb.length() <= this.config.MAX_LATIN_LENGTH) continue;
        }
        for (int j = this.isb.length() - 1; j > 0; --j) {
            if (this.isb.charAt(j) != '.') continue;
            this.isb.deleteCharAt(j);
        }
        return this.isb.toString();
    }

    protected String nextLetterNumber(int c) throws IOException {
        int ch;
        this.isb.clear();
        this.isb.append((char)c);
        while ((ch = this.readNext()) != -1) {
            if (StringUtil.isWhitespace(ch)) {
                this.pushBack(ch);
                break;
            }
            if (!StringUtil.isLetterNumber(ch)) {
                this.pushBack(ch);
                break;
            }
            this.isb.append((char)ch);
        }
        return this.isb.toString();
    }

    protected String nextOtherNumber(int c) throws IOException {
        int ch;
        this.isb.clear();
        this.isb.append((char)c);
        while ((ch = this.readNext()) != -1) {
            if (StringUtil.isWhitespace(ch)) {
                this.pushBack(ch);
                break;
            }
            if (!StringUtil.isOtherNumber(ch)) {
                this.pushBack(ch);
                break;
            }
            this.isb.append((char)ch);
        }
        return this.isb.toString();
    }

    protected String nextCNNumeric(char[] chars, int index) throws IOException {
        this.isb.clear();
        this.isb.append(chars[index]);
        this.ctrlMask &= 0xFFFFFFFD;
        for (int j = index + 1; j < chars.length; ++j) {
            if (NumericUtil.isCNNumeric(chars[j]) == -1) {
                if (j + 2 >= chars.length || chars[j] != '\u5206' || chars[j + 1] != '\u4e4b' || NumericUtil.isCNNumeric(chars[j + 2]) == -1) break;
                this.isb.append(chars[j++]);
                this.isb.append(chars[j++]);
                this.isb.append(chars[j]);
                this.ctrlMask |= 2;
                continue;
            }
            this.isb.append(chars[j]);
        }
        return this.isb.toString();
    }

    protected String getPairPunctuationText(int c) throws IOException {
        int ch;
        this.isb.clear();
        char echar = StringUtil.getPunctuationPair((char)c);
        boolean matched = false;
        this.ialist.clear();
        for (int j = 0; j < this.config.PPT_MAX_LENGTH && (ch = this.readNext()) != -1; ++j) {
            if (ch == echar) {
                matched = true;
                this.pushBack(ch);
                break;
            }
            this.isb.append((char)ch);
            this.ialist.add(ch);
        }
        if (!matched) {
            for (int i = j - 1; i >= 0; --i) {
                this.pushBack(this.ialist.get(i));
            }
            return null;
        }
        return this.isb.toString();
    }

    public IWord wordNewOrClone(int t, String str, int type) {
        return this.dic.match(t, str) ? this.dic.get(t, str).clone() : new Word(str, type);
    }

    protected IChunk getBestChunk(char[] chars, int index, int maxLen) {
        return null;
    }
}

