package org.apache.lucene.analysis.cn.smart;

import java.io.IOException;
import java.text.BreakIterator;
import java.util.Iterator;
import java.util.Locale;
import org.apache.lucene.analysis.cn.smart.hhmm.SegToken;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
import org.apache.lucene.util.AttributeFactory;

/* loaded from: input_file:org/apache/lucene/analysis/cn/smart/HMMChineseTokenizer.class */
public class HMMChineseTokenizer extends SegmentingTokenizerBase {
    private static final BreakIterator sentenceProto = BreakIterator.getSentenceInstance(Locale.ROOT);
    private final CharTermAttribute termAtt;
    private final OffsetAttribute offsetAtt;
    private final TypeAttribute typeAtt;
    private final WordSegmenter wordSegmenter;
    private Iterator<SegToken> tokens;

    public HMMChineseTokenizer() {
        this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
    }

    public HMMChineseTokenizer(AttributeFactory attributeFactory) {
        super(attributeFactory, (BreakIterator) sentenceProto.clone());
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        this.typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
        this.wordSegmenter = new WordSegmenter();
    }

    @Override // org.apache.lucene.analysis.util.SegmentingTokenizerBase
    protected void setNextSentence(int i, int i2) {
        this.tokens = this.wordSegmenter.segmentSentence(new String(this.buffer, i, i2 - i), this.offset + i).iterator();
    }

    @Override // org.apache.lucene.analysis.util.SegmentingTokenizerBase
    protected boolean incrementWord() {
        if (this.tokens == null || !this.tokens.hasNext()) {
            return false;
        }
        SegToken next = this.tokens.next();
        clearAttributes();
        this.termAtt.copyBuffer(next.charArray, 0, next.charArray.length);
        this.offsetAtt.setOffset(correctOffset(next.startOffset), correctOffset(next.endOffset));
        this.typeAtt.setType(TypeAttribute.DEFAULT_TYPE);
        return true;
    }

    @Override // org.apache.lucene.analysis.util.SegmentingTokenizerBase, org.apache.lucene.analysis.Tokenizer, org.apache.lucene.analysis.TokenStream
    public void reset() throws IOException {
        super.reset();
        this.tokens = null;
    }
}
