/*
 * Decompiled with CFR 0.152.
 */
package org.apache.uima.tika;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.language.LanguageIdentifier;
import org.apache.tika.language.LanguageProfile;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.tika.FeatureValue;
import org.apache.uima.tika.MarkupHandler;
import org.apache.uima.tika.SourceDocumentAnnotation;
import org.apache.uima.util.Level;
import org.xml.sax.ContentHandler;

public class MarkupAnnotator
extends CasAnnotator_ImplBase {
    private static final String ORIGINAL_VIEW_PARAM_NAME = "ORIGINAL_VIEW_PARAM_NAME";
    private static final String TEXT_VIEW_PARAM_NAME = "TEXT_VIEW_PARAM_NAME";
    private static final String SET_TEXT_VIEW_DEFAULT_PARAM_NAME = "SET_TEXT_VIEW_DEFAULT_PARAM_NAME";
    private static final String tika_file_param = "tikaConfigFile";
    private String originalViewName = "_InitialView";
    private String textViewName = "textView";
    private Boolean makeTextDefaultView = true;
    private TikaConfig config = null;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.originalViewName = (String)aContext.getConfigParameterValue(ORIGINAL_VIEW_PARAM_NAME);
        this.textViewName = (String)aContext.getConfigParameterValue(TEXT_VIEW_PARAM_NAME);
        if (this.textViewName == null) {
            this.getContext().getLogger().log(Level.WARNING, new StringBuffer("Parameter TEXT_VIEW_PARAM_NAME is null; setting to \"textView\"").toString());
            this.textViewName = "textView";
        } else {
            this.getContext().getLogger().log(Level.WARNING, new StringBuffer("Parameter TEXT_VIEW_PARAM_NAME is ").append(this.textViewName).toString());
        }
        this.makeTextDefaultView = (Boolean)aContext.getConfigParameterValue(SET_TEXT_VIEW_DEFAULT_PARAM_NAME);
        if (this.makeTextDefaultView == null) {
            this.getContext().getLogger().log(Level.WARNING, new StringBuffer("Parameter SET_TEXT_VIEW_DEFAULT_PARAM_NAME is null; setting to \"true\"").toString());
            this.makeTextDefaultView = new Boolean(true);
        } else {
            this.getContext().getLogger().log(Level.WARNING, new StringBuffer("Parameter SET_TEXT_VIEW_DEFAULT_PARAM_NAME is ").append(this.makeTextDefaultView).toString());
        }
        URL tikaConfigURL = null;
        try {
            tikaConfigURL = this.getContext().getResourceURL(tika_file_param);
            this.config = new TikaConfig(tikaConfigURL);
        }
        catch (Exception e1) {
            this.getContext().getLogger().log(Level.WARNING, new StringBuffer("Failed to load TIKA config file from ").append(tikaConfigURL).append(" due to ").append(e1.getLocalizedMessage()).toString());
            this.config = null;
        }
        if (this.config == null) {
            try {
                this.config = TikaConfig.getDefaultConfig();
            }
            catch (Exception e) {
                throw new ResourceInitializationException((Throwable)e);
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void process(CAS cas) throws AnalysisEngineProcessException {
        CAS originalCas = null;
        try {
            originalCas = cas.getView(this.originalViewName);
        }
        catch (Exception e) {
            String viewName = cas.getViewName();
            this.getContext().getLogger().log(Level.WARNING, new StringBuffer("can't find view ").append(this.originalViewName).append(" using ").append(viewName).append(" instead").toString());
            originalCas = cas.getCurrentView();
        }
        InputStream originalStream = originalCas.getSofa().getSofaDataStream();
        AutoDetectParser parser = new AutoDetectParser(this.config);
        Metadata md = new Metadata();
        MarkupHandler handler = new MarkupHandler();
        try {
            parser.parse(originalStream, (ContentHandler)handler, md);
        }
        catch (Exception e) {
            this.getContext().getLogger().log(Level.WARNING, new StringBuffer("Problem converting file : ").append(e.getMessage()).toString());
            return;
        }
        finally {
            try {
                originalStream.close();
            }
            catch (IOException e) {}
        }
        CAS plainTextView = cas.createView(this.textViewName);
        handler.populateCAS(plainTextView);
        JCas ptv = null;
        try {
            ptv = plainTextView.getJCas();
        }
        catch (CASException e) {
            e.printStackTrace();
            return;
        }
        this.extractLanguage(ptv);
        Type docAnnotationType = ptv.getTypeSystem().getType("org.apache.uima.tika.SourceDocumentAnnotation");
        FSIterator iter = ptv.getAnnotationIndex(docAnnotationType).iterator();
        SourceDocumentAnnotation docAnnotation = null;
        docAnnotation = iter.hasNext() ? (SourceDocumentAnnotation)((Object)iter.next()) : new SourceDocumentAnnotation(ptv);
        if (docAnnotation.getFeatures() == null) {
            docAnnotation.setFeatures((FSArray)cas.createArrayFS(md.size()));
        }
        for (int i = 0; i < md.size(); ++i) {
            String name = md.names()[i];
            String value = md.get(name);
            FeatureValue fv = new FeatureValue(ptv);
            fv.setName(name);
            fv.setValue(value);
            docAnnotation.setFeatures(i, fv);
        }
        docAnnotation.addToIndexes();
    }

    private void extractLanguage(JCas plainTextView) {
        try {
            LanguageIdentifier li = new LanguageIdentifier(new LanguageProfile(plainTextView.getDocumentText()));
            if (li.getLanguage() != null && !"".equals(li.getLanguage())) {
                plainTextView.setDocumentLanguage(li.getLanguage());
            }
        }
        catch (Exception e) {
            this.getContext().getLogger().log(Level.WARNING, new StringBuffer("Could not extract language due to ").append(e.getLocalizedMessage()).toString());
        }
        this.getContext().getLogger().log(Level.INFO, new StringBuffer("Extracted language: ").append(plainTextView.getDocumentLanguage()).toString());
    }
}

