package ilsp.pmg;

import cc.mallet.fst.CRF;
import iai.anno.AnnotationException;
import iai.globals.Language;
import iai.main.TreeToTreeTranslator;
import iai.resources.Paths;
import iai.resources.Resources;
import iai.resources.ResourcesParseException;
import ilsp.chunker.Chunker;
import ilsp.chunker.XmlHeadPhraseHandler;
import ilsp.components.FromXmlToInternal;
import ilsp.core.Document;
import ilsp.core.Element;
import ilsp.core.Sentence;
import ilsp.core.Word;
import ilsp.ioTools.FileIO;
import ilsp.phraseAligner.resources.PhraseAlignerPaths;
import ilsp.pmg.components.Components;
import ilsp.pmg.components.TagFixer;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.springframework.beans.BeansException;
import org.springframework.context.support.FileSystemXmlApplicationContext;
import org.springframework.transaction.interceptor.RuleBasedTransactionAttribute;
import org.springframework.util.ResourceUtils;
import org.xml.sax.InputSource;

/* loaded from: input_file:ilsp/pmg/PMG.class */
public class PMG extends Chunker {
    public AlgorithmBean algorithmBean;
    private Language sLang;
    private Language tLang;
    private CRF crfModel;
    private boolean existHeadCriteriaFile;

    public PMG(Language language, Language language2) {
        this.algorithmBean = null;
        this.headParserSL = new XmlHeadPhraseHandler(language);
        this.headParserTL = new XmlHeadPhraseHandler(language2);
        this.sLang = language;
        this.tLang = language2;
        String readFileToString = FileIO.readFileToString(Paths.getPMGHeadFile(language, language2));
        try {
            SAXParser newSAXParser = SAXParserFactory.newInstance().newSAXParser();
            newSAXParser.parse(new InputSource(new StringReader(readFileToString)), this.headParserSL);
            newSAXParser.parse(new InputSource(new StringReader(readFileToString)), this.headParserTL);
            this.existHeadCriteriaFile = true;
        } catch (Exception e) {
            this.existHeadCriteriaFile = false;
            System.err.println("PMG ERROR: Cannot read HeadCriteria xml file.");
        }
        try {
            FileSystemXmlApplicationContext fileSystemXmlApplicationContext = new FileSystemXmlApplicationContext(ResourceUtils.FILE_URL_PREFIX + Paths.getPMGContextFile(language, language2));
            this.algorithmBean = (AlgorithmBean) fileSystemXmlApplicationContext.getBean(fileSystemXmlApplicationContext.getBeanNamesForType(AlgorithmBean.class)[0]);
        } catch (ArrayIndexOutOfBoundsException e2) {
            System.out.print("\nERROR: ArrayIndexOutOfBoundsException.");
            this.algorithmBean = new AlgorithmBean();
        } catch (BeansException e3) {
            System.out.print("\nWARNING: Application Context cannot be used.");
            this.algorithmBean = new AlgorithmBean();
        }
        try {
            this.crfModel = Components.deserializeCRF(Paths.getPMGModelFile(this.sLang, this.tLang), this.algorithmBean.getMetaData());
        } catch (IOException e4) {
        } catch (ClassNotFoundException e5) {
        } catch (IllegalArgumentException e6) {
        }
    }

    public void parse(String str, String str2) {
        Document document = null;
        try {
            document = (Document) FileIO.readXMLFileToElement(str2, "UTF-8");
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        Document parse = parse(document);
        Components.saveToFile(str, str.toLowerCase().endsWith(".xml") ? "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n" + parse.toXML("") : parse.toString());
        System.out.print("\nThe output was saved @ " + str);
    }

    public Document parseText(String str, String str2) {
        String readFileToString = FileIO.readFileToString(new File(str), str2, 1);
        Document document = new Document(0, readFileToString);
        if (this.sLang == Language.GREEK) {
            document = Resources.documentFromGreekText(readFileToString, this.sLang, this.tLang);
        } else {
            try {
                document = Resources.annotatedFromRaw(readFileToString, this.sLang, true, this.tLang).toDocument();
            } catch (AnnotationException e) {
                e.printStackTrace();
            } catch (ResourcesParseException e2) {
                e2.printStackTrace();
            } catch (IllegalArgumentException e3) {
                e3.printStackTrace();
            }
        }
        return document;
    }

    public String parse(String str) {
        Document document = new Document(0);
        new FromXmlToInternal(document, str.trim()).transformXML();
        return "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n" + parse(document).toXML("");
    }

    @Override // ilsp.chunker.Chunker
    public String parseToString(Document document) {
        return parse(document).toXML("");
    }

    @Override // ilsp.chunker.Chunker
    public Document parse(Document document) {
        Components components = new Components(this);
        TagFixer tagFixer = new TagFixer(this.sLang, this.tLang);
        this.algorithmBean.setCrf(this.crfModel);
        Document m861clone = document.m861clone();
        Document fixTags = tagFixer.fixTags(document);
        String testing = this.algorithmBean.testing(components.unparsedDocument2unparsedFlat(fixTags));
        Document parsedFlat2parsedDocument = components.parsedFlat2parsedDocument(testing, fixTags);
        System.out.print("\n\nPrinting all sentences with the fixed Tags");
        Iterator<Element> it = parsedFlat2parsedDocument.iterator();
        while (it.hasNext()) {
            Sentence sentence = (Sentence) it.next();
            System.out.print("\nSentence #" + sentence.getId() + ": " + sentence.toString());
            Iterator<Word> it2 = sentence.getWordList().iterator();
            while (it2.hasNext()) {
                System.out.print(" " + it2.next().getTag());
            }
        }
        return components.parsedFlat2parsedDocument(testing, m861clone);
    }

    public void train(String str) {
        Components components = new Components(this);
        String str2 = null;
        TagFixer tagFixer = new TagFixer(this.sLang, this.tLang);
        if (str.endsWith(".dat")) {
            str2 = components.readFile(str, "UTF-8");
            trainFlatString(str2);
        } else {
            try {
                str2 = components.parsedDocument2parsedFlat(tagFixer.fixTags((Document) FileIO.readXMLFileToElement(str, "UTF-8")));
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }
        }
        trainFlatString(str2);
    }

    public boolean existHeadCriteriaFile() {
        return this.existHeadCriteriaFile;
    }

    private void trainFlatString(String str) {
        this.algorithmBean.setTagReplacementsTraining();
        CRF training = this.algorithmBean.training(str);
        String metaData = this.algorithmBean.getMetaData();
        String pMGModelFile = Paths.getPMGModelFile(this.sLang, this.tLang);
        Components.serializeCRF(pMGModelFile, training, metaData);
        System.out.print("TRAINING: Total Iterations: " + this.algorithmBean.getNumIterations());
        System.out.print("\nThe Training Model saved @ \"" + pMGModelFile + "\"");
    }

    private static void usage() {
        System.out.println("Usage:  PMG [-options] -lang <languages> [-encoding <file_encoding>] [-input <input_FILE>] [-output <output_FILE>]\n");
        System.out.println("where \"options\" include:");
        System.out.println("    -train\t to train a new CRF model");
        System.out.println("    -parse\t to parse text with the existent CRF model");
        System.out.println("    -parseXML\t to parse a XML text with the existent CRF model");
        System.out.println("where \"languages\" is the language pair in form \"SL-TL\"");
        System.err.println("where \"file_encoding\" utf-8 or iso-8859-7");
        System.out.println("where \"input_FILE\" the path for input file ");
        System.out.println("where \"output_FILE\" path for output file, it is used only with \"-parse\" and \"-parseXML\" options\n");
        System.out.println("Example: -train    -lang EN-DE ");
        System.out.println("Example: -parse    -lang DE-EL -encoding UTF-8 -input path\\unparsed.txt -output path\\parsed.xml");
        System.out.println("Example: -parseXML -lang DE-EL -encoding UTF-8 -input path\\unparsed.xml -output path\\parsed.xml");
        System.exit(0);
    }

    public static void main(String[] strArr) throws InterruptedException {
        if (strArr.length != 3 && strArr.length != 9) {
            usage();
        }
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        if (!str2.equalsIgnoreCase("-lang")) {
            usage();
        }
        String[] split = str3.split(RuleBasedTransactionAttribute.PREFIX_ROLLBACK_RULE);
        if (split.length != 2) {
            usage();
        }
        Language fromShortForm = Language.fromShortForm(split[0].toLowerCase());
        Language fromShortForm2 = Language.fromShortForm(split[1].toLowerCase());
        if (str.toLowerCase().equalsIgnoreCase("-train")) {
            if (strArr.length != 3) {
                usage();
            }
            new PMG(fromShortForm, fromShortForm2).train(PhraseAlignerPaths.getInstance(fromShortForm, fromShortForm2).getParsedFullPath());
            return;
        }
        if (strArr.length != 9) {
            usage();
        }
        if (!strArr[3].equalsIgnoreCase("-encoding")) {
            usage();
        }
        String lowerCase = strArr[4].toLowerCase();
        if (!lowerCase.equalsIgnoreCase("utf-8") && !lowerCase.equalsIgnoreCase("iso-8859-7")) {
            usage();
        }
        if (!strArr[5].equalsIgnoreCase(TreeToTreeTranslator.INPUT)) {
            usage();
        }
        String str4 = strArr[6];
        if (!strArr[7].equalsIgnoreCase(TreeToTreeTranslator.OUTPUT)) {
            usage();
        }
        String str5 = strArr[8];
        if (str.toLowerCase().equalsIgnoreCase("-parseXML")) {
            new PMG(fromShortForm, fromShortForm2).parse(str5, str4);
        } else {
            if (!str.toLowerCase().equalsIgnoreCase("-parse")) {
                usage();
                return;
            }
            Document parseText = new PMG(fromShortForm, fromShortForm2).parseText(str4, lowerCase);
            FileIO.writeFile(new File(str5), str5.toLowerCase().endsWith(".xml") ? "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n" + parseText.toXML("") : parseText.toString(), lowerCase);
            System.out.print("\nThe parsed text was saved @ " + str5);
        }
    }

    @Override // ilsp.chunker.Chunker
    protected String toXmlInternal(String str) {
        return str;
    }

    @Override // ilsp.chunker.Chunker
    protected String toXmlInternal(File file) throws IOException {
        return null;
    }
}
