/*
 * Decompiled with CFR 0.152.
 */
package org.apache.any23.extractor.rdf;

import com.fasterxml.jackson.core.JsonLocation;
import com.fasterxml.jackson.core.JsonProcessingException;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Iterator;
import java.util.regex.Pattern;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.IssueReport;
import org.apache.any23.extractor.html.JsoupUtils;
import org.apache.any23.extractor.rdf.JsonCleaningInputStream;
import org.eclipse.rdf4j.common.net.ParsedIRI;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFParser;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Comment;
import org.jsoup.nodes.DataNode;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.DocumentType;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Entities;
import org.jsoup.nodes.Node;
import org.jsoup.select.NodeFilter;
import org.jsoup.select.NodeTraversor;

public abstract class BaseRDFExtractor
implements Extractor.ContentExtractor {
    private boolean verifyDataType;
    private boolean stopAtFirstError;
    private static final Pattern invalidXMLCharacters = Pattern.compile("[^\t\r\n -\ud7ff\ue000-\ufffd\ud800\udc00-\udbff\udfff]");

    public BaseRDFExtractor() {
        this(false, false);
    }

    public BaseRDFExtractor(boolean verifyDataType, boolean stopAtFirstError) {
        this.verifyDataType = verifyDataType;
        this.stopAtFirstError = stopAtFirstError;
    }

    protected abstract RDFParser getParser(ExtractionContext var1, ExtractionResult var2);

    public boolean isVerifyDataType() {
        return this.verifyDataType;
    }

    public void setVerifyDataType(boolean verifyDataType) {
        this.verifyDataType = verifyDataType;
    }

    public boolean isStopAtFirstError() {
        return this.stopAtFirstError;
    }

    public void setStopAtFirstError(boolean b) {
        this.stopAtFirstError = b;
    }

    public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, InputStream in, ExtractionResult extractionResult) throws IOException, ExtractionException {
        try {
            RDFParser parser = this.getParser(extractionContext, extractionResult);
            RDFFormat format = parser.getRDFFormat();
            final String iri = extractionContext.getDocumentIRI().stringValue();
            if (format.hasFileExtension("xhtml") || format.hasMIMEType("application/xhtml+xml")) {
                Charset charset = format.getCharset();
                if (charset == null) {
                    charset = StandardCharsets.UTF_8;
                }
                Document doc = JsoupUtils.parse(in, iri, null);
                doc.outputSettings().prettyPrint(false).syntax(Document.OutputSettings.Syntax.xml).escapeMode(Entities.EscapeMode.xhtml).charset(charset);
                NodeTraversor.filter((NodeFilter)new NodeFilter(){
                    final HashSet<String> tmpAttributeKeys = new HashSet();

                    public NodeFilter.FilterResult head(Node node, int depth) {
                        if (node instanceof Element) {
                            block9: {
                                HashSet<String> attributeKeys = this.tmpAttributeKeys;
                                Iterator it = node.attributes().iterator();
                                while (it.hasNext()) {
                                    Attribute attr = (Attribute)it.next();
                                    String oldKey = attr.getKey();
                                    String newKey = oldKey.replaceAll("[^-a-zA-Z0-9_:.]", "");
                                    int prefixlen = newKey.lastIndexOf(58) + 1;
                                    String prefix = newKey.substring(0, prefixlen).toLowerCase();
                                    if ((newKey = (prefix.startsWith("xml") ? prefix : "") + newKey.substring(prefixlen)).matches("[a-zA-Z_:][-a-zA-Z0-9_:.]*") && !newKey.startsWith("xmlns:xml") && attributeKeys.add(newKey)) {
                                        if (newKey.equals(oldKey)) continue;
                                        attr.setKey(newKey);
                                        continue;
                                    }
                                    it.remove();
                                }
                                attributeKeys.clear();
                                String tagName = ((Element)node).tagName().replaceAll("[^-a-zA-Z0-9_:.]", "");
                                tagName = tagName.substring(tagName.lastIndexOf(58) + 1);
                                ((Element)node).tagName(tagName.matches("[a-zA-Z_:][-a-zA-Z0-9_:.]*") ? tagName : "div");
                                if ("base".equalsIgnoreCase(tagName) && node.hasAttr("href")) {
                                    String absHref;
                                    String href;
                                    block8: {
                                        href = node.attr("href");
                                        try {
                                            ParsedIRI parsedHref = ParsedIRI.create((String)href.trim());
                                            if (parsedHref.isAbsolute()) {
                                                absHref = parsedHref.toString();
                                                break block8;
                                            }
                                            parsedHref = ParsedIRI.create((String)iri.trim()).resolve(parsedHref);
                                            if (!parsedHref.isAbsolute()) break block9;
                                            absHref = parsedHref.toString();
                                        }
                                        catch (RuntimeException e) {
                                            break block9;
                                        }
                                    }
                                    if (!absHref.equals(href)) {
                                        node.attr("href", absHref);
                                    }
                                }
                            }
                            return NodeFilter.FilterResult.CONTINUE;
                        }
                        return node instanceof DataNode || node instanceof Comment || node instanceof DocumentType ? NodeFilter.FilterResult.REMOVE : NodeFilter.FilterResult.CONTINUE;
                    }

                    public NodeFilter.FilterResult tail(Node node, int depth) {
                        return NodeFilter.FilterResult.CONTINUE;
                    }
                }, (Node)doc);
                String finalOutput = invalidXMLCharacters.matcher(doc.toString()).replaceAll("");
                in = new ByteArrayInputStream(finalOutput.getBytes(charset));
            } else if (format.hasFileExtension("jsonld") || format.hasMIMEType("application/ld+json")) {
                in = new JsonCleaningInputStream(in);
            }
            parser.parse(in, iri);
        }
        catch (Exception ex) {
            Throwable cause = ex.getCause();
            if (cause instanceof JsonProcessingException) {
                JsonProcessingException err = (JsonProcessingException)cause;
                JsonLocation loc = err.getLocation();
                if (loc == null) {
                    extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, err.getOriginalMessage(), -1L, -1L);
                } else {
                    extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, err.getOriginalMessage(), (long)loc.getLineNr(), (long)loc.getColumnNr());
                }
            }
            extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, BaseRDFExtractor.toString(ex), -1L, -1L);
        }
    }

    private static String toString(Throwable th) {
        StringWriter writer = new StringWriter();
        try (PrintWriter pw = new PrintWriter(writer);){
            th.printStackTrace(pw);
        }
        String string = writer.toString();
        if (string.length() > 1024) {
            return string.substring(0, 1021) + "...";
        }
        return string;
    }
}

