/*
 * Decompiled with CFR 0.152.
 */
package org.apache.any23.plugin.htmlscraper;

import de.l3s.boilerpipe.BoilerpipeExtractor;
import de.l3s.boilerpipe.BoilerpipeProcessingException;
import de.l3s.boilerpipe.extractors.ArticleExtractor;
import de.l3s.boilerpipe.extractors.CanolaExtractor;
import de.l3s.boilerpipe.extractors.DefaultExtractor;
import de.l3s.boilerpipe.extractors.LargestContentExtractor;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.plugin.htmlscraper.HTMLScraperExtractorFactory;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;

public class HTMLScraperExtractor
implements Extractor.ContentExtractor {
    public static final IRI PAGE_CONTENT_DE_PROPERTY = SimpleValueFactory.getInstance().createIRI("http://vocab.sindice.net/any23#pagecontent/de");
    public static final IRI PAGE_CONTENT_AE_PROPERTY = SimpleValueFactory.getInstance().createIRI("http://vocab.sindice.net/any23#pagecontent/ae");
    public static final IRI PAGE_CONTENT_LCE_PROPERTY = SimpleValueFactory.getInstance().createIRI("http://vocab.sindice.net/any23#pagecontent/lce");
    public static final IRI PAGE_CONTENT_CE_PROPERTY = SimpleValueFactory.getInstance().createIRI("http://vocab.sindice.net/any23#pagecontent/ce");
    private final List<ExtractionRule> extractionRules = new ArrayList<ExtractionRule>();

    public HTMLScraperExtractor() {
        this.loadDefaultRules();
    }

    public void addTextExtractor(String name, IRI property, BoilerpipeExtractor extractor) {
        this.extractionRules.add(new ExtractionRule(name, property, extractor));
    }

    public String[] getTextExtractors() {
        ArrayList<String> extractors = new ArrayList<String>();
        for (ExtractionRule er : this.extractionRules) {
            extractors.add(er.name);
        }
        return extractors.toArray(new String[extractors.size()]);
    }

    public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, InputStream inputStream, ExtractionResult extractionResult) throws IOException, ExtractionException {
        try {
            IRI documentIRI = extractionContext.getDocumentIRI();
            for (ExtractionRule extractionRule : this.extractionRules) {
                String content = extractionRule.boilerpipeExtractor.getText((Reader)new InputStreamReader(inputStream));
                extractionResult.writeTriple((Resource)documentIRI, extractionRule.property, (Value)SimpleValueFactory.getInstance().createLiteral(content));
            }
        }
        catch (BoilerpipeProcessingException bpe) {
            throw new ExtractionException("Error while applying text processor " + ArticleExtractor.class, (Throwable)bpe);
        }
    }

    public ExtractorDescription getDescription() {
        return HTMLScraperExtractorFactory.getDescriptionInstance();
    }

    public void setStopAtFirstError(boolean b) {
    }

    private void loadDefaultRules() {
        this.addTextExtractor("default-extractor", PAGE_CONTENT_DE_PROPERTY, (BoilerpipeExtractor)DefaultExtractor.getInstance());
        this.addTextExtractor("article-extractor", PAGE_CONTENT_AE_PROPERTY, (BoilerpipeExtractor)ArticleExtractor.getInstance());
        this.addTextExtractor("large-content-extractor", PAGE_CONTENT_LCE_PROPERTY, (BoilerpipeExtractor)LargestContentExtractor.getInstance());
        this.addTextExtractor("canola-extractor", PAGE_CONTENT_CE_PROPERTY, (BoilerpipeExtractor)CanolaExtractor.getInstance());
    }

    class ExtractionRule {
        public final String name;
        public final IRI property;
        public final BoilerpipeExtractor boilerpipeExtractor;

        ExtractionRule(String name, IRI property, BoilerpipeExtractor boilerpipeExtractor) {
            if (name == null) {
                throw new NullPointerException("name cannot be null.");
            }
            if (property == null) {
                throw new NullPointerException("property cannot be null.");
            }
            if (boilerpipeExtractor == null) {
                throw new NullPointerException("extractor cannot be null.");
            }
            this.name = name;
            this.property = property;
            this.boilerpipeExtractor = boilerpipeExtractor;
        }
    }
}

