package org.simmetrics.tokenizers;

import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Multiset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

/* loaded from: classes2.dex */
public final class Tokenizers {

    /* loaded from: classes2.dex */
    static class QGram extends AbstractTokenizer {
        private final boolean filter;
        private final int q;

        public QGram(int i) {
            this(i, false);
        }

        QGram(int i, boolean z) {
            Preconditions.checkArgument(i > 0, "q must be greater then 0");
            this.q = i;
            this.filter = z;
        }

        int getQ() {
            return this.q;
        }

        public String toString() {
            return "QGram [q=" + this.q + "]";
        }

        @Override // org.simmetrics.tokenizers.Tokenizer
        public List<String> tokenizeToList(String str) {
            if (str.isEmpty()) {
                return Collections.emptyList();
            }
            if (this.filter && str.length() < this.q) {
                return new ArrayList();
            }
            if (str.length() < this.q) {
                return Collections.singletonList(str);
            }
            try {
                int offsetByCodePoints = str.offsetByCodePoints(str.length(), -this.q);
                ArrayList arrayList = new ArrayList(str.length());
                int i = 0;
                while (i <= offsetByCodePoints) {
                    arrayList.add(str.substring(i, str.offsetByCodePoints(i, this.q)));
                    i = str.offsetByCodePoints(i, 1);
                }
                return arrayList;
            } catch (IndexOutOfBoundsException e) {
                return this.filter ? new ArrayList() : Collections.singletonList(str);
            }
        }
    }

    /* loaded from: classes2.dex */
    static class QGramExtended extends AbstractTokenizer {
        private final String endPadding;
        private final String startPadding;
        private final QGram tokenizer;

        public QGramExtended(int i) {
            this(i, "#", "#");
        }

        public QGramExtended(int i, String str, String str2) {
            Preconditions.checkArgument(!str.isEmpty(), "startPadding may not be empty");
            Preconditions.checkArgument(!str2.isEmpty(), "endPadding may not be empty");
            this.tokenizer = new QGram(i);
            this.startPadding = Strings.repeat(str, i - 1);
            this.endPadding = Strings.repeat(str2, i - 1);
        }

        public String toString() {
            return "QGramExtended [startPadding=" + this.startPadding + ", endPadding=" + this.endPadding + ", q=" + this.tokenizer.getQ() + "]";
        }

        @Override // org.simmetrics.tokenizers.Tokenizer
        public List<String> tokenizeToList(String str) {
            if (str.isEmpty()) {
                return Collections.emptyList();
            }
            return this.tokenizer.tokenizeToList(this.startPadding + str + this.endPadding);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: classes2.dex */
    public static final class Recursive implements Tokenizer {
        private final List<Tokenizer> tokenizers;

        Recursive(List<Tokenizer> list) {
            this.tokenizers = ImmutableList.copyOf((Collection) list);
        }

        List<Tokenizer> getTokenizers() {
            return this.tokenizers;
        }

        public String toString() {
            return Joiner.on(" -> ").join(this.tokenizers);
        }

        @Override // org.simmetrics.tokenizers.Tokenizer
        public List<String> tokenizeToList(String str) {
            ArrayList arrayList = new ArrayList(str.length());
            arrayList.add(str);
            ArrayList arrayList2 = new ArrayList(str.length());
            for (Tokenizer tokenizer : this.tokenizers) {
                Iterator it = arrayList.iterator();
                while (it.hasNext()) {
                    arrayList2.addAll(tokenizer.tokenizeToList((String) it.next()));
                }
                ArrayList arrayList3 = arrayList;
                arrayList = arrayList2;
                arrayList2 = arrayList3;
                arrayList2.clear();
            }
            return arrayList;
        }

        @Override // org.simmetrics.tokenizers.Tokenizer
        public Multiset<String> tokenizeToMultiset(String str) {
            HashMultiset create = HashMultiset.create(str.length());
            create.add(str);
            HashMultiset create2 = HashMultiset.create(str.length());
            for (Tokenizer tokenizer : this.tokenizers) {
                Iterator<E> it = create.iterator();
                while (it.hasNext()) {
                    create2.addAll(tokenizer.tokenizeToList((String) it.next()));
                }
                HashMultiset hashMultiset = create;
                create = create2;
                create2 = hashMultiset;
                create2.clear();
            }
            return create;
        }
    }

    public static Tokenizer chain(List<Tokenizer> list) {
        return list.size() == 1 ? list.get(0) : new Recursive(flatten(list));
    }

    private static List<Tokenizer> flatten(List<Tokenizer> list) {
        ImmutableList.Builder builder = ImmutableList.builder();
        for (Tokenizer tokenizer : list) {
            if (tokenizer instanceof Recursive) {
                builder.addAll((Iterable) ((Recursive) tokenizer).getTokenizers());
            } else {
                builder.add((ImmutableList.Builder) tokenizer);
            }
        }
        return builder.build();
    }

    public static Tokenizer qGramWithPadding(int i) {
        return new QGramExtended(i);
    }
}
