/*
 * Decompiled with CFR 0.152.
 */
package org.apache.any23;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import org.apache.any23.ExtractionReport;
import org.apache.any23.configuration.Configuration;
import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.ExtractorGroup;
import org.apache.any23.extractor.ExtractorRegistryImpl;
import org.apache.any23.extractor.SingleDocumentExtraction;
import org.apache.any23.extractor.SingleDocumentExtractionReport;
import org.apache.any23.http.AcceptHeaderBuilder;
import org.apache.any23.http.DefaultHTTPClient;
import org.apache.any23.http.DefaultHTTPClientConfiguration;
import org.apache.any23.http.HTTPClient;
import org.apache.any23.mime.MIMEType;
import org.apache.any23.mime.MIMETypeDetector;
import org.apache.any23.mime.TikaMIMETypeDetector;
import org.apache.any23.mime.purifier.Purifier;
import org.apache.any23.mime.purifier.WhiteSpacesPurifier;
import org.apache.any23.source.DocumentSource;
import org.apache.any23.source.FileDocumentSource;
import org.apache.any23.source.HTTPDocumentSource;
import org.apache.any23.source.LocalCopyFactory;
import org.apache.any23.source.MemCopyFactory;
import org.apache.any23.source.StringDocumentSource;
import org.apache.any23.writer.TripleHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Any23 {
    public static final String VERSION = DefaultConfiguration.singleton().getPropertyOrFail("any23.core.version");
    public static final String DEFAULT_HTTP_CLIENT_USER_AGENT = DefaultConfiguration.singleton().getPropertyOrFail("any23.http.user.agent.default");
    protected static final Logger logger = LoggerFactory.getLogger(Any23.class);
    private final Configuration configuration;
    private final String defaultUserAgent;
    private MIMETypeDetector mimeTypeDetector = new TikaMIMETypeDetector((Purifier)new WhiteSpacesPurifier());
    private HTTPClient httpClient = new DefaultHTTPClient();
    private boolean httpClientInitialized = false;
    private final ExtractorGroup factories;
    private LocalCopyFactory streamCache;
    private String userAgent;

    public Any23(Configuration configuration, ExtractorGroup extractorGroup) {
        if (configuration == null) {
            throw new NullPointerException("configuration must be not null.");
        }
        this.configuration = configuration;
        logger.debug(configuration.getConfigurationDump());
        this.defaultUserAgent = configuration.getPropertyOrFail("any23.http.user.agent.default");
        this.factories = extractorGroup == null ? ExtractorRegistryImpl.getInstance().getExtractorGroup() : extractorGroup;
        this.setCacheFactory(new MemCopyFactory());
    }

    public Any23(ExtractorGroup extractorGroup) {
        this((Configuration)DefaultConfiguration.singleton(), extractorGroup);
    }

    public Any23(Configuration configuration, String ... extractorNames) {
        this(configuration, extractorNames == null ? null : ExtractorRegistryImpl.getInstance().getExtractorGroup(Arrays.asList(extractorNames)));
    }

    public Any23(String ... extractorNames) {
        this((Configuration)DefaultConfiguration.singleton(), extractorNames);
    }

    public Any23(Configuration configuration) {
        this(configuration, (String[])null);
    }

    public Any23() {
        this((Configuration)DefaultConfiguration.singleton());
    }

    public void setHTTPUserAgent(String userAgent) {
        if (this.httpClientInitialized) {
            throw new IllegalStateException("Cannot change HTTP configuration after client has been initialized");
        }
        if (userAgent == null) {
            userAgent = this.defaultUserAgent;
        }
        if (userAgent.trim().length() == 0) {
            throw new IllegalArgumentException(String.format("Invalid user agent: '%s'", userAgent));
        }
        this.userAgent = userAgent;
    }

    public String getHTTPUserAgent() {
        return this.userAgent;
    }

    public void setHTTPClient(HTTPClient httpClient) {
        if (httpClient == null) {
            throw new NullPointerException("httpClient cannot be null.");
        }
        if (this.httpClientInitialized) {
            throw new IllegalStateException("Cannot change HTTP configuration after client has been initialized");
        }
        this.httpClient = httpClient;
    }

    public HTTPClient getHTTPClient() throws IOException {
        if (!this.httpClientInitialized) {
            if (this.userAgent == null) {
                throw new IOException("Must call " + Any23.class.getSimpleName() + ".setHTTPUserAgent(String) before extracting from HTTP IRI");
            }
            this.httpClient.init(new DefaultHTTPClientConfiguration(this.getAcceptHeader()));
            this.httpClientInitialized = true;
        }
        return this.httpClient;
    }

    public void setCacheFactory(LocalCopyFactory cache) {
        if (cache == null) {
            throw new NullPointerException("cache cannot be null.");
        }
        this.streamCache = cache;
    }

    public void setMIMETypeDetector(MIMETypeDetector detector) {
        this.mimeTypeDetector = detector;
    }

    public DocumentSource createDocumentSource(String documentIRI) throws URISyntaxException, IOException {
        if (documentIRI == null) {
            throw new NullPointerException("documentIRI cannot be null.");
        }
        if (documentIRI.toLowerCase().startsWith("file:")) {
            return new FileDocumentSource(new File(new URI(documentIRI)));
        }
        if (documentIRI.toLowerCase().startsWith("http:") || documentIRI.toLowerCase().startsWith("https:")) {
            return new HTTPDocumentSource(this.getHTTPClient(), documentIRI);
        }
        throw new IllegalArgumentException(String.format("Unsupported protocol for document IRI: '%s' . Check that document IRI contains a protocol.", documentIRI));
    }

    public ExtractionReport extract(ExtractionParameters eps, DocumentSource in, TripleHandler outputHandler, String encoding) throws IOException, ExtractionException {
        SingleDocumentExtraction ex = new SingleDocumentExtraction(this.configuration, in, this.factories, outputHandler);
        ex.setMIMETypeDetector(this.mimeTypeDetector);
        ex.setLocalCopyFactory(this.streamCache);
        ex.setParserEncoding(encoding);
        SingleDocumentExtractionReport sder = ex.run(eps);
        return new ExtractionReport(ex.getMatchingExtractors(), ex.getParserEncoding(), ex.getDetectedMIMEType(), sder.getValidationReport(), sder.getExtractorToIssues());
    }

    public ExtractionReport extract(String in, String documentIRI, String contentType, String encoding, TripleHandler outputHandler) throws IOException, ExtractionException {
        return this.extract(new StringDocumentSource(in, documentIRI, contentType, encoding), outputHandler);
    }

    public ExtractionReport extract(String in, String documentIRI, TripleHandler outputHandler) throws IOException, ExtractionException {
        return this.extract(new StringDocumentSource(in, documentIRI), outputHandler);
    }

    public ExtractionReport extract(File file, TripleHandler outputHandler) throws IOException, ExtractionException {
        return this.extract(new FileDocumentSource(file), outputHandler);
    }

    public ExtractionReport extract(ExtractionParameters eps, String documentIRI, TripleHandler outputHandler) throws IOException, ExtractionException {
        try {
            return this.extract(eps, this.createDocumentSource(documentIRI), outputHandler);
        }
        catch (URISyntaxException ex) {
            throw new ExtractionException("Error while extracting data from document IRI.", (Throwable)ex);
        }
    }

    public ExtractionReport extract(String documentIRI, TripleHandler outputHandler) throws IOException, ExtractionException {
        return this.extract((ExtractionParameters)null, documentIRI, outputHandler);
    }

    public ExtractionReport extract(DocumentSource in, TripleHandler outputHandler, String encoding) throws IOException, ExtractionException {
        return this.extract(null, in, outputHandler, encoding);
    }

    public ExtractionReport extract(DocumentSource in, TripleHandler outputHandler) throws IOException, ExtractionException {
        return this.extract(null, in, outputHandler, null);
    }

    public ExtractionReport extract(ExtractionParameters eps, DocumentSource in, TripleHandler outputHandler) throws IOException, ExtractionException {
        return this.extract(eps, in, outputHandler, null);
    }

    private String getAcceptHeader() {
        ArrayList<MIMEType> mimeTypes = new ArrayList<MIMEType>();
        for (ExtractorFactory factory : this.factories) {
            mimeTypes.addAll(factory.getSupportedMIMETypes());
        }
        return new AcceptHeaderBuilder(mimeTypes).getAcceptHeader();
    }
}

