/*
 * Decompiled with CFR 0.152.
 */
package org.apache.any23.extractor.rdf;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.html.JsoupUtils;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFHandlerException;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.helpers.BasicParserSettings;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Entities;
import org.jsoup.nodes.Node;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class BaseRDFExtractor
implements Extractor.ContentExtractor {
    private static final Logger LOG = LoggerFactory.getLogger(BaseRDFExtractor.class);
    private boolean verifyDataType;
    private boolean stopAtFirstError;

    public BaseRDFExtractor() {
        this(false, false);
    }

    public BaseRDFExtractor(boolean verifyDataType, boolean stopAtFirstError) {
        this.verifyDataType = verifyDataType;
        this.stopAtFirstError = stopAtFirstError;
    }

    protected abstract RDFParser getParser(ExtractionContext var1, ExtractionResult var2);

    public boolean isVerifyDataType() {
        return this.verifyDataType;
    }

    public void setVerifyDataType(boolean verifyDataType) {
        this.verifyDataType = verifyDataType;
    }

    public boolean isStopAtFirstError() {
        return this.stopAtFirstError;
    }

    public void setStopAtFirstError(boolean b) {
        this.stopAtFirstError = b;
    }

    public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, InputStream in, ExtractionResult extractionResult) throws IOException, ExtractionException {
        try {
            RDFParser parser = this.getParser(extractionContext, extractionResult);
            parser.getParserConfig().setNonFatalErrors(new HashSet());
            parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, (Object)true);
            parser.getParserConfig().addNonFatalError(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES);
            parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, (Object)true);
            parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES);
            parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, (Object)false);
            parser.getParserConfig().addNonFatalError(BasicParserSettings.NORMALIZE_DATATYPE_VALUES);
            RDFFormat format = parser.getRDFFormat();
            String iri = extractionContext.getDocumentIRI().stringValue();
            if (format.hasFileExtension("xhtml") || format.hasMIMEType("application/xhtml+xml")) {
                Charset charset = format.getCharset();
                if (charset == null) {
                    charset = StandardCharsets.UTF_8;
                }
                Document doc = JsoupUtils.parse(in, iri, null);
                doc.outputSettings().prettyPrint(false).syntax(Document.OutputSettings.Syntax.xml).escapeMode(Entities.EscapeMode.xhtml).charset(charset);
                NodeTraversor.traverse((NodeVisitor)new NodeVisitor(){

                    public void head(Node node, int depth) {
                        if (node instanceof DataNode) {
                            ((DataNode)node).setWholeData("");
                        }
                    }

                    public void tail(Node node, int depth) {
                    }
                }, (Node)doc);
                in = new ByteArrayInputStream(doc.toString().getBytes(charset));
            } else if (format.hasFileExtension("jsonld") || format.hasMIMEType("application/ld+json")) {
                in = new JsonCommentStripperInputStream(in);
            }
            parser.parse(in, iri);
        }
        catch (RDFHandlerException ex) {
            throw new IllegalStateException("Unexpected exception.", ex);
        }
        catch (RDFParseException ex) {
            LOG.error("Error while parsing RDF document.", (Object)ex, (Object)extractionResult);
        }
    }

    private static class JsonCommentStripperInputStream
    extends InputStream {
        private int prevChar;
        private boolean inQuote;
        private boolean inCDATA;
        private final PushbackInputStream wrapped;

        JsonCommentStripperInputStream(InputStream in) {
            this.wrapped = new PushbackInputStream(in, 16);
        }

        private boolean isNextOrUnread(int ... next) throws IOException {
            int i = -1;
            for (int test : next) {
                int c = this.wrapped.read();
                if (c != test) {
                    if (c != -1) {
                        this.wrapped.unread(c);
                    }
                    while (i >= 0) {
                        this.wrapped.unread(next[i--]);
                    }
                    return false;
                }
                ++i;
            }
            return true;
        }

        @Override
        public int read() throws IOException {
            this.prevChar = this.privateRead();
            return this.prevChar;
        }

        private int privateRead() throws IOException {
            PushbackInputStream stream = this.wrapped;
            int c = stream.read();
            if (this.inQuote) {
                if (c == 34 && this.prevChar != 92) {
                    this.inQuote = false;
                }
                return c;
            }
            switch (c) {
                case 47: {
                    if (this.isNextOrUnread(47)) {
                        while ((c = stream.read()) != -1 && c != 13 && c != 10) {
                        }
                        return c;
                    }
                    if (this.isNextOrUnread(42)) {
                        while (true) {
                            if ((c = stream.read()) == -1) {
                                return c;
                            }
                            if (c != 42) continue;
                            c = stream.read();
                            if (c == -1) {
                                return c;
                            }
                            if (c == 47) break;
                        }
                        return 32;
                    }
                    return c;
                }
                case 60: {
                    if (this.isNextOrUnread(33, 91, 67, 68, 65, 84, 65, 91)) {
                        this.inCDATA = true;
                        return 32;
                    }
                    return c;
                }
                case 35: {
                    while ((c = stream.read()) != -1 && c != 13 && c != 10) {
                    }
                    return c;
                }
                case 93: {
                    if (this.inCDATA) {
                        if (this.isNextOrUnread(93, 62)) {
                            this.inCDATA = false;
                            return 32;
                        }
                        return c;
                    }
                    return c;
                }
                case 34: {
                    this.inQuote = true;
                    return c;
                }
            }
            return c;
        }
    }
}

