package org.apache.lucene.analysis.cn.smart;

import java.io.IOException;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeFactory;

@Deprecated
/* loaded from: input_file:org/apache/lucene/analysis/cn/smart/SentenceTokenizer.class */
public final class SentenceTokenizer extends Tokenizer {
    private static final String PUNCTION = "。，！？；,!?;";
    private final StringBuilder buffer;
    private int tokenStart;
    private int tokenEnd;
    private final CharTermAttribute termAtt;
    private final OffsetAttribute offsetAtt;
    private final TypeAttribute typeAtt;

    public SentenceTokenizer() {
        this.buffer = new StringBuilder();
        this.tokenStart = 0;
        this.tokenEnd = 0;
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        this.typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
    }

    public SentenceTokenizer(AttributeFactory attributeFactory) {
        super(attributeFactory);
        this.buffer = new StringBuilder();
        this.tokenStart = 0;
        this.tokenEnd = 0;
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        this.typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public boolean incrementToken() throws IOException {
        clearAttributes();
        this.buffer.setLength(0);
        boolean z = true;
        this.tokenStart = this.tokenEnd;
        int read = this.input.read();
        char c = (char) read;
        while (true) {
            if (read == -1) {
                break;
            }
            if (PUNCTION.indexOf(c) != -1) {
                this.buffer.append(c);
                this.tokenEnd++;
                break;
            }
            if (!z || Utility.SPACES.indexOf(c) == -1) {
                this.buffer.append(c);
                z = false;
                this.tokenEnd++;
                char c2 = c;
                read = this.input.read();
                c = (char) read;
                if (Utility.SPACES.indexOf(c) != -1 && Utility.SPACES.indexOf(c2) != -1) {
                    this.tokenEnd++;
                    break;
                }
            } else {
                this.tokenStart++;
                this.tokenEnd++;
                read = this.input.read();
                c = (char) read;
            }
        }
        if (this.buffer.length() == 0) {
            return false;
        }
        this.termAtt.setEmpty().append(this.buffer);
        this.offsetAtt.setOffset(correctOffset(this.tokenStart), correctOffset(this.tokenEnd));
        this.typeAtt.setType("sentence");
        return true;
    }

    @Override // org.apache.lucene.analysis.Tokenizer, org.apache.lucene.analysis.TokenStream
    public void reset() throws IOException {
        super.reset();
        this.tokenEnd = 0;
        this.tokenStart = 0;
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public void end() throws IOException {
        super.end();
        int correctOffset = correctOffset(this.tokenEnd);
        this.offsetAtt.setOffset(correctOffset, correctOffset);
    }
}
