package ilsp.disambiguationTools;

import iai.globals.Language;
import iai.resources.Resources;
import ilsp.components.FromXmlToInternal;
import ilsp.core.DisjunctiveWord;
import ilsp.core.Document;
import ilsp.core.Element;
import ilsp.core.MultiWord;
import ilsp.core.Phrase;
import ilsp.core.Sentence;
import ilsp.core.Word;
import ilsp.tagger.FbtTagger;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.text.DateFormat;
import java.util.Calendar;
import java.util.Iterator;
import java.util.TimeZone;
import java.util.Vector;

/* loaded from: input_file:ilsp/disambiguationTools/Main.class */
public class Main {
    private static final String presemtDEENlexiconPath = "/home/gtsat/workspaceJ/presemt/commlex-som+wf/lex+presemt_DE-EN-utf8-inline.xml";
    private static final String semevalDEENlexiconPath = "/home/gtsat/workspaceJ/presemt/commlex-som+wf/lex+semeval_DE-EN-utf8-inline.xml";
    private static final String commonDEENlexiconPath = "/home/gtsat/workspaceJ/presemt/commlex-som+wf/lex+common_DE-EN-utf8-inline.xml";
    private static final String ELENlexiconPath = "data/Lexica/lex_EL-EN-utf8.xml";
    private static boolean saveAmbig = true;
    private static boolean saveDecoded = false;
    private static String GermanSOM = "/media/STORE N GO/results/20121016/maxBfreq-DE-70-75-HEADTRIGRAMS-iter500-filtered/map.dat";
    private static String GermanWF = "src/ilsp/disambiguationTools/results-WordCounter/Data.dat";
    private static String EnglishSOM = "/media/STORE N GO/results/20120724/maxBfreq-EN-70-75-HEADTRIGRAMS-iter500-filtered/map.dat";
    private static String EnglishWF = "/media/STORE N GO/results/08032012/wordFreqs-EN-1GB/Data.dat";

    public static void main(String[] strArr) throws Exception {
        testTrigrams();
    }

    public static void lemmatize(Language language) throws Exception {
        Document document = new Document(0, "Οι ένοπλοι αντάρτες απειλούν να ανατινάξουν το θέατρο εάν η αστυνομία προσπαθήσει να απελευθερώσει τους ομήρους. Ήταν η χρονιά που οδήγησε στην έκρηξη του εμφύλιου πολέμου. Ο Ερυθρός σταυρός ανακοίνωσε ότι τη βάση θα επισκεφτούν τέσσερα μέλη του, ανάμεσά τους και ένας γιατρός. Ο γενικός γραμματέας των Ηνωμένων Εθνών, Κόφι Ανάν, επέκρινε την απόφαση της Βουλής, αλλά εξέφρασε την ικανοποίησή του γιατί δεν κατακρατήθηκε ολόκληρο το ποσόν. Έκθεση του 2004 των Ηνωμένων Εθνών προειδοποιούσε ότι οι αποχετευτικές υποδομές είχαν χτιστεί για να εξυπηρετούν ένα πληθυσμό 50.000. Το 1966 ασκεί έφεση και οι αρχικές κατηγορίες καταρρέουν. Αρθρογραφούσε επί σειρά ετών στον τοπικό τύπο για όλα τα μεγάλα θέματα που έχουν σχέση με το νομό Χανίων, τους πολίτες και την ανάπτυξη της περιοχής. Η κίνηση έχει σαφώς μειωθεί στο κέντρο της πόλης.");
        if (language != Language.GREEK) {
            System.out.println(Resources.annotatedFromRaw("Οι ένοπλοι αντάρτες απειλούν να ανατινάξουν το θέατρο εάν η αστυνομία προσπαθήσει να απελευθερώσει τους ομήρους. Ήταν η χρονιά που οδήγησε στην έκρηξη του εμφύλιου πολέμου. Ο Ερυθρός σταυρός ανακοίνωσε ότι τη βάση θα επισκεφτούν τέσσερα μέλη του, ανάμεσά τους και ένας γιατρός. Ο γενικός γραμματέας των Ηνωμένων Εθνών, Κόφι Ανάν, επέκρινε την απόφαση της Βουλής, αλλά εξέφρασε την ικανοποίησή του γιατί δεν κατακρατήθηκε ολόκληρο το ποσόν. Έκθεση του 2004 των Ηνωμένων Εθνών προειδοποιούσε ότι οι αποχετευτικές υποδομές είχαν χτιστεί για να εξυπηρετούν ένα πληθυσμό 50.000. Το 1966 ασκεί έφεση και οι αρχικές κατηγορίες καταρρέουν. Αρθρογραφούσε επί σειρά ετών στον τοπικό τύπο για όλα τα μεγάλα θέματα που έχουν σχέση με το νομό Χανίων, τους πολίτες και την ανάπτυξη της περιοχής. Η κίνηση έχει σαφώς μειωθεί στο κέντρο της πόλης.", Language.ENGLISH, true, Language.GREEK).toDocument().toXML());
            return;
        }
        System.out.println(Resources.getPMG(Language.GREEK, Language.ENGLISH).parse(((FbtTagger) Resources.getTagger(Language.GREEK)).parse(document)).toXML());
    }

    public static void testVNTrigramsDisambiguation() throws IOException {
        Vector<Word[]> decodeTrigrams;
        NgramsDisambiguator ngramsDisambiguator = new NgramsDisambiguator("../enTenTen1.vn/fast-bigrams", "../enTenTen1.vn/fast-trigrams");
        DocProcessor docProcessor = new DocProcessor(EnglishSOM, EnglishWF, true, true, Language.GREEK, Language.ENGLISH);
        Document LoadDocumentFromFile = docProcessor.LoadDocumentFromFile("/home/gtsat/workspaceJ/presemt/ambig.Source-EL40_dev.xml", "utf-8");
        Document document = new Document(0);
        int i = 0;
        Iterator<Element> it = LoadDocumentFromFile.getElements().iterator();
        while (it.hasNext()) {
            Element next = it.next();
            if (next instanceof Sentence) {
                Sentence sentence = (Sentence) next;
                Sentence sentence2 = new Sentence(sentence.getId(), sentence.getIndex(), "");
                Iterator<Element> it2 = ((Sentence) next).getWords().iterator();
                while (it2.hasNext()) {
                    Element next2 = it2.next();
                    if (next2 instanceof Word) {
                        String lowerCase = ((Word) next2).getTag().toLowerCase();
                        if (lowerCase.charAt(0) == 'v' || lowerCase.charAt(0) == 'n') {
                            sentence2.addToVector(next2);
                        }
                    }
                }
                Vector<Vector<Word[]>> vecWordArrays = docProcessor.toVecWordArrays(sentence2.getWords());
                switch (vecWordArrays.size()) {
                    case 1:
                        decodeTrigrams = new Vector<>();
                        decodeTrigrams.add(new Word[]{null, null});
                        break;
                    case 2:
                        decodeTrigrams = ngramsDisambiguator.decodeBigram(vecWordArrays);
                        break;
                    default:
                        decodeTrigrams = ngramsDisambiguator.decodeTrigrams(vecWordArrays);
                        break;
                }
                Sentence sentence3 = new Sentence(i, i, "");
                Iterator<Word[]> it3 = decodeTrigrams.iterator();
                while (it3.hasNext()) {
                    sentence3.addToVector(it3.next()[0]);
                }
                document.addToVector(sentence3);
                i++;
            }
        }
        System.out.println("@@ Document score: " + docProcessor.compareDocs(docProcessor.LoadDocumentFromFile("/home/gtsat/workspaceJ/presemt/RefLEM-EN40_dev.xml", "utf-8"), document) + "\n\n");
    }

    public static void testheadsPCfheadsTrigramsDisambiguation() throws IOException {
        Vector<Word[]> decodeTrigrams;
        NgramsDisambiguator ngramsDisambiguator = new NgramsDisambiguator("../enTenTen1.headsPCfheads/fast-bigrams", "../enTenTen1.headsPCfheads/fast-trigrams");
        DocProcessor docProcessor = new DocProcessor(EnglishSOM, EnglishWF, true, true, Language.GREEK, Language.ENGLISH);
        Document LoadDocumentFromFile = docProcessor.LoadDocumentFromFile("/home/gtsat/workspaceJ/presemt/ambig.Source-EL40_dev.xml", "utf-8");
        Document document = new Document(0);
        int i = 0;
        Iterator<Element> it = LoadDocumentFromFile.getElements().iterator();
        while (it.hasNext()) {
            Element next = it.next();
            if (next instanceof Sentence) {
                Sentence sentence = (Sentence) next;
                Sentence sentence2 = new Sentence(sentence.getId(), sentence.getIndex(), "");
                Iterator<Element> it2 = sentence.getPhrases().iterator();
                while (it2.hasNext()) {
                    Phrase phrase = (Phrase) it2.next();
                    if (phrase.getType().compareTo("PC") == 0) {
                        sentence2.addToVector(phrase.getfHead());
                    }
                    sentence2.addToVector(phrase.getHead());
                }
                Vector<Vector<Word[]>> vecWordArrays = docProcessor.toVecWordArrays(sentence2.getWords());
                switch (vecWordArrays.size()) {
                    case 1:
                        decodeTrigrams = new Vector<>();
                        decodeTrigrams.add(new Word[]{null, null});
                        break;
                    case 2:
                        decodeTrigrams = ngramsDisambiguator.decodeBigram(vecWordArrays);
                        break;
                    default:
                        decodeTrigrams = ngramsDisambiguator.decodeTrigrams(vecWordArrays);
                        break;
                }
                Sentence sentence3 = new Sentence(i, i, "");
                Iterator<Word[]> it3 = decodeTrigrams.iterator();
                while (it3.hasNext()) {
                    sentence3.addToVector(it3.next()[0]);
                }
                document.addToVector(sentence3);
                i++;
            }
        }
        System.out.println("@@ Document score: " + docProcessor.compareDocs(docProcessor.LoadDocumentFromFile("/home/gtsat/workspaceJ/presemt/RefLEM-EN40_dev.xml", "utf-8"), document) + "\n\n");
    }

    public static void testjheadsTrigramsDisambiguation() throws IOException {
        Vector<Word[]> decodeTrigrams;
        NgramsDisambiguator ngramsDisambiguator = new NgramsDisambiguator("../enTenTen1.jheads/fast-bigrams", "../enTenTen1.jheads/fast-trigrams");
        DocProcessor docProcessor = new DocProcessor(EnglishSOM, EnglishWF, true, true, Language.GREEK, Language.ENGLISH);
        Document LoadDocumentFromFile = docProcessor.LoadDocumentFromFile("/home/gtsat/workspaceJ/presemt/ambig.Source-EL40_dev.xml", "utf-8");
        Document document = new Document(0);
        int i = 0;
        Iterator<Element> it = LoadDocumentFromFile.getElements().iterator();
        while (it.hasNext()) {
            Element next = it.next();
            if (next instanceof Sentence) {
                Sentence sentence = (Sentence) next;
                Sentence sentence2 = new Sentence(sentence.getId(), sentence.getIndex(), "");
                Iterator<Element> it2 = sentence.getPhrases().iterator();
                while (it2.hasNext()) {
                    sentence2.addToVector(((Phrase) it2.next()).getHead());
                }
                Vector<Vector<Word[]>> vecWordArrays = ngramsDisambiguator.toVecWordArrays(sentence2.getWords());
                switch (vecWordArrays.size()) {
                    case 1:
                        decodeTrigrams = new Vector<>();
                        decodeTrigrams.add(new Word[]{null, null});
                        break;
                    case 2:
                        decodeTrigrams = ngramsDisambiguator.decodeBigram(vecWordArrays);
                        break;
                    default:
                        decodeTrigrams = ngramsDisambiguator.decodeTrigrams(vecWordArrays);
                        break;
                }
                Sentence sentence3 = new Sentence(i, i, "");
                Iterator<Word[]> it3 = decodeTrigrams.iterator();
                while (it3.hasNext()) {
                    sentence3.addToVector(it3.next()[0]);
                }
                document.addToVector(sentence3);
                i++;
            }
        }
        System.out.println("@@ Document score: " + docProcessor.compareDocs(docProcessor.LoadDocumentFromFile("/home/gtsat/workspaceJ/presemt/RefLEM-EN40_dev.xml", "utf-8"), document) + "\n\n");
    }

    public static void testjheadsThenFHeadsTrigramsDisambiguation() throws IOException {
        NgramsDisambiguator ngramsDisambiguator = new NgramsDisambiguator("../enTenTen1.jheads/fast-bigrams", "../enTenTen1.jheads/fast-trigrams", "../enTenTen1.jheads/fast-bigrams-prepos");
        DocProcessor docProcessor = new DocProcessor(EnglishSOM, EnglishWF, true, true, Language.GREEK, Language.ENGLISH);
        Document LoadDocumentFromFile = docProcessor.LoadDocumentFromFile("/home/gtsat/workspaceJ/presemt/ambig.Source-EL40_dev.xml", "utf-8");
        Document document = new Document(0);
        int i = 0;
        Iterator<Element> it = LoadDocumentFromFile.getElements().iterator();
        while (it.hasNext()) {
            Element next = it.next();
            if (next instanceof Sentence) {
                Sentence sentence = (Sentence) next;
                Sentence sentence2 = new Sentence(sentence.getId(), sentence.getIndex(), "");
                Iterator<Element> it2 = sentence.getPhrases().iterator();
                while (it2.hasNext()) {
                    sentence2.addToVector(((Phrase) it2.next()).getHead());
                }
                Vector<Vector<Word[]>> vecWordArrays = docProcessor.toVecWordArrays(sentence2.getWords());
                Vector vector = new Vector();
                Vector vector2 = new Vector();
                switch (vecWordArrays.size()) {
                    case 1:
                        return;
                    case 2:
                        Iterator<Word[]> it3 = ngramsDisambiguator.decodeBigram(vecWordArrays).iterator();
                        while (it3.hasNext()) {
                            Word[] next2 = it3.next();
                            Vector vector3 = new Vector();
                            vector3.add(next2);
                            vector.add(vector3);
                        }
                        break;
                    default:
                        Iterator<Word[]> it4 = ngramsDisambiguator.decodeTrigrams(vecWordArrays).iterator();
                        while (it4.hasNext()) {
                            Word[] next3 = it4.next();
                            Vector vector4 = new Vector();
                            vector4.add(next3);
                            vector.add(vector4);
                        }
                        break;
                }
                int i2 = 0;
                Iterator<Element> it5 = sentence.getPhrases().iterator();
                while (it5.hasNext()) {
                    Element next4 = it5.next();
                    if (((Phrase) next4).size() != 0) {
                        Element element = ((Phrase) next4).getfHead();
                        Vector<Vector<Word[]>> vector5 = new Vector<>();
                        Vector<Word[]> vector6 = new Vector<>();
                        if (element == null || element == ((Phrase) next4).getHead()) {
                            vector2.add((Word[]) ((Vector) vector.get(i2)).get(0));
                        } else {
                            if (element instanceof Word) {
                                vector6.add(new Word[]{(Word) element});
                            } else if (element instanceof DisjunctiveWord) {
                                Iterator<Element> it6 = ((DisjunctiveWord) element).getElements().iterator();
                                while (it6.hasNext()) {
                                    vector6.add(new Word[]{(Word) it6.next()});
                                }
                            } else if (element instanceof MultiWord) {
                                System.out.println("-----------");
                                return;
                            }
                            vector5.add(vector6);
                            vector5.add((Vector) vector.get(i2));
                            vector2.addAll(ngramsDisambiguator.decodeBigram(vector5, true));
                            i2++;
                        }
                    }
                }
                Sentence sentence3 = new Sentence(i, i, "");
                Iterator it7 = vector2.iterator();
                while (it7.hasNext()) {
                    sentence3.addToVector(((Word[]) it7.next())[0]);
                }
                document.addToVector(sentence3);
                i++;
            }
        }
        Document LoadDocumentFromFile2 = docProcessor.LoadDocumentFromFile("/home/gtsat/workspaceJ/presemt/RefLEM-EN40_dev.xml", "utf-8");
        System.out.println(document.toLemmaString());
        System.out.println("@@ Document score: " + docProcessor.compareDocs(LoadDocumentFromFile2, document) + "\n\n");
    }

    public static void testTrigrams() throws IOException {
        DocProcessor docProcessor = new DocProcessor(EnglishSOM, EnglishWF, true, true, Language.GREEK, Language.ENGLISH);
        Document LoadDocumentFromFile = docProcessor.LoadDocumentFromFile("/home/gtsat/workspaceJ/presemt/ambig.Source-EL40_dev.xml", "utf-8");
        NgramsDisambiguator ngramsDisambiguator = new NgramsDisambiguator("", "");
        int i = 0;
        Document document = new Document(0);
        Iterator<Element> it = LoadDocumentFromFile.getElements().iterator();
        while (it.hasNext()) {
            Element next = it.next();
            Sentence sentence = new Sentence(i, i, "");
            for (Element element : ngramsDisambiguator.decodeVNheads((Sentence) next, true, true)) {
                if (element != null) {
                    sentence.addToVector(element);
                }
            }
            document.addToVector(sentence);
            i++;
        }
        System.out.println(String.valueOf(document.toLemmaString()) + "\n@@ Document score: " + docProcessor.compareDocs(docProcessor.LoadDocumentFromFile("/home/gtsat/workspaceJ/presemt/RefLEM-EN40_dev.xml", "utf-8"), document));
    }

    public static void runHeadDecoder(boolean z, boolean z2) throws IOException {
        DocProcessor docProcessor = new DocProcessor(EnglishSOM, EnglishWF, z, z2, Language.GREEK, Language.ENGLISH);
        Document LoadDocumentFromFile = docProcessor.LoadDocumentFromFile(String.valueOf("/home/gtsat/workspaceJ/presemt/") + "/ambig.Source-EL40_dev.xml", "utf-8");
        int i = 0;
        Document document = new Document(0);
        Iterator<Element> it = LoadDocumentFromFile.getElements().iterator();
        while (it.hasNext()) {
            Element next = it.next();
            Sentence sentence = new Sentence(i, i, "");
            for (Element element : docProcessor.decodeHeads((Sentence) next, z, z2)) {
                if (element != null) {
                    sentence.addToVector(element);
                }
            }
            document.addToVector(sentence);
            i++;
        }
        System.out.println("@@ Document score: " + docProcessor.compareDocs(docProcessor.LoadDocumentFromFile(String.valueOf("/home/gtsat/workspaceJ/presemt/") + "/RefLEM-EN40_dev.xml", "utf-8"), document));
    }

    public static void runExample(boolean z, boolean z2) throws IOException {
        DocProcessor docProcessor = new DocProcessor(EnglishSOM, EnglishWF, z, z2, Language.GREEK, Language.ENGLISH);
        Document LoadDocumentFromFile = docProcessor.LoadDocumentFromFile(String.valueOf("/home/gtsat/workspaceJ/presemt/") + "/ambig.Source-EL7_dev.xml", "utf-8");
        if (saveAmbig) {
            OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(new File(String.valueOf("/home/gtsat/workspaceJ/presemt/") + "ambig.ambig.Source-EL7_dev.xml")), "utf-8");
            outputStreamWriter.write(LoadDocumentFromFile.toXML());
            outputStreamWriter.flush();
            outputStreamWriter.close();
        }
        Document document = (Document) docProcessor.decodeElement(LoadDocumentFromFile);
        System.out.println("@@ Document score: " + docProcessor.compareDocs(docProcessor.LoadDocumentFromFile(String.valueOf("/home/gtsat/workspaceJ/presemt/") + "/RefLEM-EN40_dev.xml", "utf-8"), document));
        System.out.println("@@ AVG disjunction: " + docProcessor.getAvgDisjunction(LoadDocumentFromFile));
        if (saveDecoded) {
            OutputStreamWriter outputStreamWriter2 = new OutputStreamWriter(new FileOutputStream(new File((z || z2) ? String.valueOf("/home/gtsat/workspaceJ/presemt/") + "/decoded.fhp.ambig.Source-EL7_dev.xml" : String.valueOf("/home/gtsat/workspaceJ/presemt/") + "/decoded.sentences.ambig.Source-EL7_dev.xml")), "utf-8");
            outputStreamWriter2.write(document.toXML());
            outputStreamWriter2.flush();
            outputStreamWriter2.close();
        }
    }

    public static void runNgrammSemeval(boolean z, boolean z2) throws IOException {
        DocProcessor docProcessor = new DocProcessor(GermanSOM, GermanWF, z, z2, Language.ENGLISH, Language.GERMAN);
        NgramsDisambiguator ngramsDisambiguator = new NgramsDisambiguator("", "");
        for (String str : new String[]{"bank", "movement", "occupation", "passage", "plant"}) {
            System.err.println("@@ Processing: " + str + " for ${vars}=(" + z + "," + z2 + ").");
            Iterator<Element> it = docProcessor.translateFile(String.valueOf("/home/gtsat/workspaceJ/presemt/") + "headed." + str + ".data.xml", "utf-8", null).getElements().iterator();
            while (it.hasNext()) {
                for (Element element : ngramsDisambiguator.decodeHeads((Sentence) it.next(), true, true)) {
                    if (element == null) {
                        System.out.print("null ");
                    } else if (element instanceof Word) {
                        System.out.print(String.valueOf(((Word) element).toLemmaString()) + " ");
                    } else if (element instanceof MultiWord) {
                        System.out.print(String.valueOf(((MultiWord) element).toLemmaString()) + " ");
                    }
                }
                System.out.println();
            }
        }
    }

    public static void runSOMheadsSemeval(boolean z, boolean z2) throws IOException {
        DocProcessor docProcessor = new DocProcessor(GermanSOM, GermanWF, z, z2, Language.ENGLISH, Language.GERMAN);
        for (String str : new String[]{"bank", "movement", "occupation", "passage", "plant"}) {
            System.err.println("@@ Processing: " + str + " for ${vars}=(" + z + "," + z2 + ").");
            Iterator<Element> it = docProcessor.translateFile(String.valueOf("/home/gtsat/workspaceJ/presemt/") + "headed." + str + ".data.xml", "utf-8", null).getElements().iterator();
            while (it.hasNext()) {
                for (Element element : docProcessor.decodeHeads((Sentence) it.next(), true, true)) {
                    if (element == null) {
                        System.out.print("null ");
                    } else if (element instanceof Word) {
                        System.out.print(String.valueOf(((Word) element).toLemmaString()) + " ");
                    } else if (element instanceof MultiWord) {
                        System.out.print(String.valueOf(((MultiWord) element).toLemmaString()) + " ");
                    }
                }
                System.out.println();
            }
        }
    }

    public static void runSOMSemeval(boolean z, boolean z2) throws IOException {
        DocProcessor docProcessor = new DocProcessor(GermanSOM, GermanWF, z, z2, Language.ENGLISH, Language.GERMAN);
        for (String str : new String[]{"bank", "movement", "occupation", "passage", "plant"}) {
            String str2 = String.valueOf("/home/gtsat/workspaceJ/presemt/") + str + ".data.xml";
            TimeZone timeZone = TimeZone.getTimeZone("Pacific/Wake");
            DateFormat.getDateTimeInstance().setTimeZone(timeZone);
            Calendar.getInstance(timeZone).getTimeInMillis();
            docProcessor.translateFile(str2, "utf-8", null);
        }
    }

    public static void testDocDisambiguator() throws IOException {
        Disambiguator disambiguator = new Disambiguator(GermanSOM, GermanWF, true);
        disambiguator.disambiguateFile("sentExample.xml", "utf-8");
        OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(new File("decoded.testDocDisambiguator.xml")), "utf-8");
        outputStreamWriter.write(((Document) disambiguator.getResult()).toXML());
        outputStreamWriter.flush();
        outputStreamWriter.close();
    }

    public static Document getDocFromFile(String str, String str2) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), str2));
        StringBuilder sb = new StringBuilder();
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                Document document = new Document(0, sb.toString());
                new FromXmlToInternal(document, sb.toString()).transformXML();
                return document;
            }
            sb.append(readLine);
            sb.append("\n");
        }
    }
}
