/*
 * Decompiled with CFR 0.152.
 */
package org.apache.any23.extractor.html;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.List;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.IssueReport;
import org.apache.any23.extractor.html.DomUtils;
import org.apache.any23.extractor.html.HTMLDocument;
import org.apache.any23.extractor.html.TurtleHTMLExtractorFactory;
import org.apache.any23.extractor.rdf.RDFParserFactory;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RDFParser;
import org.w3c.dom.Document;
import org.w3c.dom.Node;

public class TurtleHTMLExtractor
implements Extractor.TagSoupDOMExtractor {
    private RDFParser turtleParser;

    public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, Document in, ExtractionResult out) throws IOException, ExtractionException {
        HTMLDocument htmlDocument = new HTMLDocument(in);
        IRI documentIRI = extractionContext.getDocumentIRI();
        List<Node> scriptNodes = htmlDocument.findAll(".//SCRIPT[contains(@type,'text/turtle')]");
        this.processScriptNodes(documentIRI, extractionContext, out, scriptNodes);
        scriptNodes = htmlDocument.findAll(".//SCRIPT[contains(@type,'text/n3')]");
        this.processScriptNodes(documentIRI, extractionContext, out, scriptNodes);
        scriptNodes = htmlDocument.findAll(".//SCRIPT[contains(@type,'text/plain')]");
        this.processScriptNodes(documentIRI, extractionContext, out, scriptNodes);
    }

    public ExtractorDescription getDescription() {
        return TurtleHTMLExtractorFactory.getDescriptionInstance();
    }

    private void processScriptNodes(IRI documentIRI, ExtractionContext ec, ExtractionResult er, List<Node> ns) {
        if (ns.size() > 0 && this.turtleParser == null) {
            this.turtleParser = RDFParserFactory.getInstance().getTurtleParserInstance(true, false, ec, er);
        }
        for (Node n : ns) {
            this.processScriptNode(this.turtleParser, documentIRI, n, er);
        }
    }

    private void processScriptNode(RDFParser turtleParser, IRI documentIRI, Node n, ExtractionResult er) {
        Node idAttribute = n.getAttributes().getNamedItem("id");
        String graphName = documentIRI.stringValue() + (idAttribute == null ? "" : "#" + idAttribute.getTextContent());
        try {
            turtleParser.parse((Reader)new StringReader(n.getTextContent()), graphName);
        }
        catch (RDFParseException rdfpe) {
            er.notifyIssue(IssueReport.IssueLevel.ERROR, String.format("An error occurred while parsing turtle content within script node: %s", Arrays.toString(DomUtils.getXPathListForNode(n))), rdfpe.getLineNumber(), rdfpe.getColumnNumber());
        }
        catch (Exception e) {
            er.notifyIssue(IssueReport.IssueLevel.ERROR, "An error occurred while processing RDF data.", -1L, -1L);
        }
    }
}

