package ilsp.linguisticTools;

import java.util.StringTokenizer;
import org.josql.functions.ConversionFunctions;
import org.springframework.aop.framework.autoproxy.target.QuickTargetSourceCreator;
import org.springframework.beans.PropertyAccessor;
import org.springframework.transaction.interceptor.RuleBasedTransactionAttribute;

/* loaded from: input_file:ilsp/linguisticTools/FbtTokeniser.class */
public class FbtTokeniser {
    final String punctregex = "[\",-><`:;&\\+\\*!\\?/\\\\]";
    final String abbrs = " μ.χ π.χ κ.κ μ.μ o.k δολ δρχ δραχ δις δισ τρισ τρις σ.σ σ.τ.ε γ.γ γεν δ.σ δηλ ε.α εκατομ ευρωπ ηλεκτρ κ.α κλπ κ.λ.π κ.λπ κ.ο.κ κ.τ.λ λ.χ λιρ π.μ στρεμμ τ.μ μ τρισ χ.α.ω χγρ χιλ χλμ αριθ ε.γ κ.ε ε.ε κτλ αι. εστ. κ. εκ. ";
    final String[] nabbrs = {"μ.χ", "π.χ", "δολ", "δρχ", "δις", "δισ", "δραχ", "εκατομ", "στρεμμ", "τ.μ", "τρισ", "τρις", "χγρ", "χιλ", "χλμ", "χμ", "μ", "cm", "dm", "mm", "km", ConversionFunctions.MONTH, "kb", "mb", "gb", "tb"};

    private boolean isNumber(String str) {
        if (str.contains(".")) {
            str = str.replace(".", "");
        }
        if (str.contains(",")) {
            str = str.replace(",", "");
        }
        if (str.contains("'")) {
            str = str.replace("'", "");
        }
        if (str.contains("`")) {
            str = str.replace("`", "");
        }
        if (str.contains(QuickTargetSourceCreator.PREFIX_THREAD_LOCAL)) {
            str = str.replace(QuickTargetSourceCreator.PREFIX_THREAD_LOCAL, "");
        }
        if (str.contains(RuleBasedTransactionAttribute.PREFIX_ROLLBACK_RULE)) {
            str = str.replace(RuleBasedTransactionAttribute.PREFIX_ROLLBACK_RULE, "");
        }
        if (str.contains(RuleBasedTransactionAttribute.PREFIX_COMMIT_RULE)) {
            str = str.replace(RuleBasedTransactionAttribute.PREFIX_COMMIT_RULE, "");
        }
        if (str.contains("*")) {
            str = str.replace("*", "");
        }
        if (str.contains("/")) {
            str = str.replace("/", "");
        }
        if (str.contains("=")) {
            str = str.replace("=", "");
        }
        try {
            Integer.parseInt(str);
            return true;
        } catch (NumberFormatException e) {
            return false;
        }
    }

    private String isValue(String str) {
        if (str.length() < 2) {
            return null;
        }
        String str2 = "";
        String[] strArr = this.nabbrs;
        int length = strArr.length;
        int i = 0;
        while (true) {
            if (i >= length) {
                break;
            }
            String str3 = strArr[i];
            if (str.toLowerCase().endsWith(str3)) {
                str2 = str3;
                break;
            }
            if (str.toLowerCase().endsWith(String.valueOf(str3) + ".")) {
                str2 = String.valueOf(str3) + "\\.";
                break;
            }
            i++;
        }
        if (str2.compareTo("") == 0) {
            return null;
        }
        String[] split = str.split(str2);
        try {
            if (isNumber(split[0])) {
                return String.valueOf(split[0]) + " " + str2.replace("\\", "");
            }
            return null;
        } catch (ArrayIndexOutOfBoundsException e) {
            return String.valueOf(str) + " ";
        }
    }

    private boolean isPUN(String str) {
        return str.compareTo(".") == 0 || str.compareTo(",") == 0 || str.compareTo(RuleBasedTransactionAttribute.PREFIX_ROLLBACK_RULE) == 0 || str.compareTo("*") == 0 || str.compareTo(RuleBasedTransactionAttribute.PREFIX_COMMIT_RULE) == 0 || str.compareTo("/") == 0;
    }

    private boolean isABBR(String str) {
        return " μ.χ π.χ κ.κ μ.μ o.k δολ δρχ δραχ δις δισ τρισ τρις σ.σ σ.τ.ε γ.γ γεν δ.σ δηλ ε.α εκατομ ευρωπ ηλεκτρ κ.α κλπ κ.λ.π κ.λπ κ.ο.κ κ.τ.λ λ.χ λιρ π.μ στρεμμ τ.μ μ τρισ χ.α.ω χγρ χιλ χλμ αριθ ε.γ κ.ε ε.ε κτλ αι. εστ. κ. εκ. ".contains(new StringBuilder(" ").append(str.toLowerCase()).append(" ").toString());
    }

    private String changeToTETTaggerFormat(String str) {
        String str2 = "\t(SENT\t<S>\n";
        int i = 1;
        StringTokenizer stringTokenizer = new StringTokenizer(str, " ", false);
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            if (nextToken.compareTo(".") == 0 || nextToken.compareTo(";") == 0 || nextToken.compareTo("!") == 0) {
                str2 = String.valueOf(str2) + "1\\" + String.valueOf(i) + "\tPTERM_P\t" + nextToken + "\n";
                i += nextToken.length();
                if (stringTokenizer.hasMoreTokens()) {
                    str2 = String.valueOf(str2) + "\t)SENT\t</S>\n\t(SENT\t<S>\n";
                }
            } else if (nextToken.compareTo(")") == 0 || nextToken.compareTo("]") == 0 || nextToken.compareTo("»") == 0 || nextToken.compareTo("}") == 0) {
                str2 = String.valueOf(str2) + "1\\" + String.valueOf(i) + "\tCPUNCT\t" + nextToken + "\n";
                i += nextToken.length();
            } else if (nextToken.compareTo("(") == 0 || nextToken.compareTo(PropertyAccessor.PROPERTY_KEY_PREFIX) == 0 || nextToken.compareTo("«") == 0 || nextToken.compareTo("{") == 0) {
                str2 = String.valueOf(str2) + "1\\" + String.valueOf(i) + "\tOPUNCT\t" + nextToken + "\n";
                i += nextToken.length();
            } else if (isNumber(nextToken)) {
                str2 = String.valueOf(str2) + "1\\" + String.valueOf(i) + "\tDIG\t" + nextToken + "\n";
                i += nextToken.length();
            } else if (nextToken.matches("[\",-><`:;&\\+\\*!\\?/\\\\]")) {
                str2 = String.valueOf(str2) + "1\\" + String.valueOf(i) + "\tPUNCT\t" + nextToken + "\n";
                i += nextToken.length();
            } else if (!nextToken.endsWith(".") || nextToken.length() <= 1) {
                if (isABBR(nextToken)) {
                    str2 = String.valueOf(str2) + "1\\" + String.valueOf(i) + "\tNBABBR\t" + nextToken + "\n";
                    i += nextToken.length();
                } else {
                    str2 = String.valueOf(str2) + "1\\" + String.valueOf(i) + "\tTOK\t" + nextToken + "\n";
                    i += nextToken.length();
                }
            } else if (isABBR(nextToken.substring(0, nextToken.length() - 1))) {
                str2 = String.valueOf(str2) + "1\\" + String.valueOf(i) + "\tNBABBR\t" + nextToken + "\n";
                i += nextToken.length();
            } else {
                str2 = String.valueOf(str2) + "1\\" + String.valueOf(i) + "\tTOK\t" + nextToken + "\n";
                i += nextToken.length();
            }
        }
        return String.valueOf(str2) + "\t)SENT\t</S>\n";
    }

    private String checkDots(String str) {
        String str2 = "";
        if (str.endsWith(".")) {
            if (isABBR(str)) {
                str2 = String.valueOf(str2) + str + " ";
            } else if (isABBR(str.substring(0, str.length() - 1))) {
                str2 = String.valueOf(str2) + str + " ";
            } else {
                String isValue = isValue(str);
                str2 = isValue != null ? String.valueOf(str2) + isValue + " " : String.valueOf(str2) + checkDots(str.substring(0, str.length() - 1)) + ". ";
            }
        } else if (isABBR(str)) {
            str2 = String.valueOf(str2) + str + " ";
        } else if (isNumber(str)) {
            str2 = String.valueOf(str2) + str + " ";
        } else {
            String isValue2 = isValue(str);
            if (isValue2 != null) {
                str2 = String.valueOf(str2) + isValue2 + " ";
            } else {
                String[] split = str.split("\\.");
                int i = 0;
                while (i < split.length) {
                    str2 = i == split.length - 1 ? String.valueOf(str2) + split[i] + " " : String.valueOf(str2) + split[i] + " . ";
                    i++;
                }
            }
        }
        return str2;
    }

    public String breakIntoTokens(String str) {
        String str2 = "";
        StringTokenizer stringTokenizer = new StringTokenizer(str, " " + System.getProperty("line.separator") + ",)(]}{[><`'\"«»:;&-+*!?/\\", true);
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            if (!nextToken.contains(".") || nextToken.length() <= 1) {
                String isValue = isValue(nextToken);
                if (isValue != null) {
                    str2 = String.valueOf(str2) + isValue + " ";
                } else if (!nextToken.equals(" ") && nextToken.hashCode() != 13) {
                    str2 = nextToken.matches(new StringBuilder(PropertyAccessor.PROPERTY_KEY_PREFIX).append(System.getProperty("line.separator")).append("]").toString()) ? String.valueOf(str2) + " " : String.valueOf(str2) + nextToken + " ";
                }
            } else {
                str2 = String.valueOf(str2) + checkDots(nextToken);
            }
        }
        String str3 = " " + str2;
        String lowerCase = str3.toLowerCase();
        if (lowerCase.contains(" ό , τι ")) {
            str3 = str3.replaceAll("(?iu) ό , τι ", " ό,τι ");
        }
        if (lowerCase.contains(" ο , τιδήποτε ")) {
            str3 = str3.replaceAll("(?iu) ο , τιδήποτε ", " ο,τιδήποτε ");
        }
        if (lowerCase.contains(" αν ' ")) {
            str3 = str3.replaceAll("(?iu) αν ' ", " ανά ");
        }
        if (lowerCase.contains(" αντ ' ")) {
            str3 = str3.replaceAll("(?iu) αντ ' ", " αντί ");
        }
        if (lowerCase.contains(" απ ' ")) {
            str3 = str3.replaceAll("(?iu) απ ' ", " από ");
        }
        if (lowerCase.contains(" αφ ' ")) {
            str3 = str3.replaceAll("(?iu) αφ ' ", " από ");
        }
        if (lowerCase.contains(" γι ' ")) {
            str3 = str3.replaceAll("(?iu) γι ' ", " γιά ");
        }
        if (lowerCase.contains(" δι ' ")) {
            str3 = str3.replaceAll("(?iu) δι ' ", " διά ");
        }
        if (lowerCase.contains(" εξ ' ")) {
            str3 = str3.replaceAll("(?iu) εξ ' ", " εκ ");
        }
        if (lowerCase.contains(" επ ' ")) {
            str3 = str3.replaceAll("(?iu) επ ' ", " επί ");
        }
        if (lowerCase.contains(" εφ ' ")) {
            str3 = str3.replaceAll("(?iu) εφ ' ", " επί ");
        }
        if (lowerCase.contains(" κατ ' ")) {
            str3 = str3.replaceAll("(?iu) κατ ' ", " κατά ");
        }
        if (lowerCase.contains(" καθ ' ")) {
            str3 = str3.replaceAll("(?iu) καθ ' ", " κατά ");
        }
        if (lowerCase.contains(" μ ' ")) {
            str3 = str3.replaceAll("(?iu) μ ' ", " με ");
        }
        if (lowerCase.contains(" μετ ' ")) {
            str3 = str3.replaceAll("(?iu) μετ ' ", " μετά ");
        }
        if (lowerCase.contains(" παρ ' ")) {
            str3 = str3.replaceAll("(?iu) παρ ' ", " παρά ");
        }
        if (lowerCase.contains(" σ ' ")) {
            str3 = str3.replaceAll("(?iu) σ ' ", " σε ");
        }
        if (lowerCase.contains(" υπ ' ")) {
            str3 = str3.replaceAll("(?iu) υπ ' ", " υπό ");
        }
        if (lowerCase.contains(" υφ ' ")) {
            str3 = str3.replaceAll("(?iu) υφ ' ", " υπό ");
        }
        String trim = str3.trim();
        String[] split = trim.split(" ");
        if (split.length > 3) {
            for (int i = 0; i < split.length - 3; i++) {
                if (isNumber(split[i]) && isPUN(split[i + 1]) && isNumber(split[i + 2])) {
                    trim = trim.replace(String.valueOf(split[i]) + " " + split[i + 1] + " " + split[i + 2], String.valueOf(split[i]) + split[i + 1] + split[i + 2]);
                }
            }
        }
        if (split.length > 2) {
            for (int i2 = 0; i2 < split.length - 2; i2++) {
                if (split[i2].compareTo("`") == 0 || (split[i2].compareTo("'") == 0 && isNumber(split[i2 + 1]))) {
                    trim = trim.replace(String.valueOf(split[i2]) + " " + split[i2 + 1], String.valueOf(split[i2]) + split[i2 + 1]);
                }
            }
        }
        return trim;
    }

    public String tokenise(String str) {
        return changeToTETTaggerFormat(breakIntoTokens(str));
    }

    public static void main(String[] strArr) {
        System.out.println(">>> " + new FbtTokeniser().tokenise(" σ' το Άμστερνταμ κλπ. π.χ. σ': Αρθρογραφούσε π.χ σ' 15+6=2010 (επί σειρά ετών] κ.λ.π 2-6χμ 5. Ο οδηγός το έγραφε καθαρά."));
    }
}
