/*
 * Decompiled with CFR 0.152.
 */
package org.apache.uima.annotator.dict_annot.dictionary.impl;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.StringTokenizer;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.internal.util.CommandLineParser;
import org.apache.uima.pear.tools.PackageBrowser;
import org.apache.uima.pear.tools.PackageInstaller;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.XMLInputSource;

public class DictionaryCreator {
    private static final String INPUT_FILE_PARAM = "-input";
    private static final String OUTPUT_FILE_PARAM = "-output";
    private static final String INPUT_FILE_ENCODING_PARAM = "-encoding";
    private static final String INPUT_FILE_LANGUAGE_PARAM = "-lang";
    private static final String TOKENIZER_PARAM = "-tokenizer";
    private static final String TOKEN_TYPE_PARAM = "-tokenType";
    private static final String SEPARATOR_CHAR_PARAM = "-separator";
    private static final String SEPARATOR_CHAR = " ";
    private static final String OUTPUT_SEPARATOR_CHAR = "|";
    private static HashMap<Character, String> entities = new HashMap(5);

    private static final CommandLineParser createCmdLineParser() {
        CommandLineParser parser = new CommandLineParser();
        parser.addParameter(INPUT_FILE_PARAM, true);
        parser.addParameter(INPUT_FILE_LANGUAGE_PARAM, true);
        parser.addParameter(OUTPUT_FILE_PARAM, true);
        parser.addParameter(INPUT_FILE_ENCODING_PARAM, true);
        parser.addParameter(TOKENIZER_PARAM, true);
        parser.addParameter(TOKEN_TYPE_PARAM, true);
        parser.addParameter(SEPARATOR_CHAR_PARAM, true);
        return parser;
    }

    private static final void printUsage() {
        System.out.println("Usage: java org.apache.uima.annotator.dict_annot.dictionary.impl.DictionaryCreator -input <InputFile> -encoding <InputFileEncoding> -output <OutputFile> [-tokenizer <TokenizerPear> -tokenType <tokenType>] [-separator <separatorChar>] ");
        System.out.println("Additional optional parameters:");
        System.out.println("  -lang <dictionaryLanguage>");
    }

    private static final boolean checkCmdLineSyntax(CommandLineParser clp) {
        boolean error = false;
        if (!clp.isInArgsList(INPUT_FILE_PARAM)) {
            System.err.println("InputFile parameter -input is missing");
            error = true;
        }
        if (!clp.isInArgsList(INPUT_FILE_ENCODING_PARAM)) {
            System.err.println("InputFile encoding parameter -encoding is missing");
            error = true;
        }
        if (!clp.isInArgsList(OUTPUT_FILE_PARAM)) {
            System.err.println("OutputFile parameter -output is missing");
            error = true;
        }
        if (clp.isInArgsList(TOKENIZER_PARAM) && !clp.isInArgsList(TOKEN_TYPE_PARAM)) {
            System.err.println("If a tokenizer is used, the -tokenType paramter must be specified");
            error = true;
        }
        return !error;
    }

    public static void main(String[] args) {
        CommandLineParser clp = DictionaryCreator.createCmdLineParser();
        try {
            clp.parseCmdLine(args);
            if (!DictionaryCreator.checkCmdLineSyntax(clp)) {
                DictionaryCreator.printUsage();
                System.exit(-1);
            }
        }
        catch (Exception e) {
            System.err.println("Error parsing command line: " + e.getMessage());
        }
        String inputFile = clp.getParamArgument(INPUT_FILE_PARAM);
        String language = clp.getParamArgument(INPUT_FILE_LANGUAGE_PARAM);
        String encoding = clp.getParamArgument(INPUT_FILE_ENCODING_PARAM);
        String outputFile = clp.getParamArgument(OUTPUT_FILE_PARAM);
        String tokenizerFile = clp.getParamArgument(TOKENIZER_PARAM);
        String tokenTypeStr = clp.getParamArgument(TOKEN_TYPE_PARAM);
        String separatorChar = clp.getParamArgument(SEPARATOR_CHAR_PARAM);
        try {
            DictionaryCreator.createDictionary(inputFile, encoding, outputFile, language, tokenizerFile, tokenTypeStr, separatorChar);
            System.out.println("The dictionary was sucessfully created at: " + outputFile);
        }
        catch (Exception ex) {
            ex.printStackTrace();
        }
    }

    public static boolean createDictionary(String inputFile, String encoding, String outputFile, String language, String tokenizerFile, String tokenTypeStr, String separatorChar) throws Exception {
        String outputSeparatorChar = OUTPUT_SEPARATOR_CHAR;
        File inFile = new File(inputFile);
        if (!inFile.canRead()) {
            throw new Exception("Error: Input file " + inputFile + " cannot be read!");
        }
        AnalysisEngine ae = null;
        Type tokenType = null;
        CAS cas = null;
        File tempDir = null;
        if (tokenizerFile != null) {
            File pearFile = new File(tokenizerFile);
            if (!pearFile.canRead()) {
                throw new Exception("Error: Tokenizer file " + tokenizerFile + " cannot be read!");
            }
            if (tokenTypeStr == null) {
                throw new Exception("Error: Tokenizer tokenType not specified");
            }
            try {
                tempDir = new File(System.getProperty("java.io.tmpdir"), "~tokenizer_temp_install");
                tempDir.deleteOnExit();
                tempDir.mkdir();
                PackageBrowser instPear = PackageInstaller.installPackage((File)tempDir, (File)pearFile, (boolean)true);
                XMLInputSource in = new XMLInputSource(instPear.getComponentPearDescPath());
                ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
                ae = UIMAFramework.produceAnalysisEngine((ResourceSpecifier)specifier);
                cas = ae.newCAS();
                tokenType = cas.getTypeSystem().getType(tokenTypeStr);
            }
            catch (Exception ex) {
                throw new Exception("Error creating tokenizer: " + ex.getMessage(), ex);
            }
        }
        if (separatorChar == null) {
            separatorChar = SEPARATOR_CHAR;
        } else {
            outputSeparatorChar = separatorChar;
        }
        BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(inputFile), encoding));
        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(outputFile), "UTF-8"));
        writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
        writer.write("<dictionary xmlns=\"http://incubator.apache.org/uima\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"dictionary.xsd\">\n");
        writer.write("<typeCollection>\n");
        writer.write("<dictionaryMetaData caseNormalization=\"true\" multiWordEntries=\"true\" multiWordSeparator=\"" + outputSeparatorChar + "\"/>\n");
        if (language != null) {
            writer.write("<languageId>" + language + "</languageId>\n");
        }
        writer.write("<typeDescription>\n");
        writer.write("<typeName> ADD DICTIONARY OUTPUT TYPE HERE</typeName>\n");
        writer.write("</typeDescription>\n");
        writer.write("<entries>\n");
        String line = reader.readLine();
        while (line != null) {
            StringBuffer multiWordString = new StringBuffer();
            if (ae != null) {
                cas.setDocumentText(line);
                if (language != null) {
                    cas.setDocumentLanguage(language);
                }
                ae.process(cas);
                FSIterator it = cas.getAnnotationIndex(tokenType).iterator();
                while (it.hasNext()) {
                    multiWordString.append(((AnnotationFS)it.next()).getCoveredText());
                    multiWordString.append(outputSeparatorChar);
                }
                cas.reset();
            } else {
                StringTokenizer tokenizer = new StringTokenizer(line, separatorChar);
                while (tokenizer.hasMoreTokens()) {
                    multiWordString.append(tokenizer.nextToken());
                    multiWordString.append(outputSeparatorChar);
                }
            }
            String multiWordTokenString = multiWordString.toString().trim();
            if (multiWordTokenString.endsWith(outputSeparatorChar)) {
                int separatorLength = outputSeparatorChar.length();
                int length = multiWordTokenString.length();
                multiWordTokenString = multiWordTokenString.substring(0, length - separatorLength);
            }
            multiWordTokenString = DictionaryCreator.replaceXMLEntities(multiWordTokenString);
            writer.write("<entry>\n");
            writer.write("<key>" + multiWordTokenString + "</key>\n");
            writer.write("</entry>\n");
            line = reader.readLine();
        }
        reader.close();
        writer.write("</entries>\n");
        writer.write("</typeCollection>\n");
        writer.write("</dictionary>\n");
        writer.close();
        if (tempDir != null) {
            ArrayList files;
            FileUtils.deleteRecursive((File)tempDir);
            if (tempDir != null && (files = FileUtils.getFiles((File)tempDir, (boolean)true)) != null) {
                for (int i = 0; i < files.size(); ++i) {
                    ((File)files.get(i)).deleteOnExit();
                }
            }
        }
        return true;
    }

    private static String replaceXMLEntities(String text) {
        StringBuffer buffer = new StringBuffer();
        for (int i = 0; i < text.length(); ++i) {
            char character = text.charAt(i);
            if (entities.containsKey(Character.valueOf(character))) {
                buffer.append(entities.get(Character.valueOf(character)));
                continue;
            }
            buffer.append(character);
        }
        return buffer.toString();
    }

    static {
        entities.put(Character.valueOf('<'), "&lt;");
        entities.put(Character.valueOf('>'), "&gt;");
        entities.put(Character.valueOf('&'), "&amp;");
        entities.put(Character.valueOf('\"'), "&quot;");
        entities.put(Character.valueOf('\''), "&apos;");
    }
}

