/*
 * Decompiled with CFR 0.152.
 */
package org.apache.any23.extractor.microdata;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.IssueReport;
import org.apache.any23.extractor.html.DomUtils;
import org.apache.any23.extractor.microdata.ItemProp;
import org.apache.any23.extractor.microdata.ItemPropValue;
import org.apache.any23.extractor.microdata.ItemScope;
import org.apache.any23.extractor.microdata.MicrodataExtractorFactory;
import org.apache.any23.extractor.microdata.MicrodataParser;
import org.apache.any23.extractor.microdata.MicrodataParserException;
import org.apache.any23.extractor.microdata.MicrodataParserReport;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.DCTerms;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class MicrodataExtractor
implements Extractor.TagSoupDOMExtractor {
    private static final IRI MICRODATA_ITEM = RDFUtils.iri("http://www.w3.org/1999/xhtml/microdata#item");
    private String documentLanguage;
    private boolean isStrict;
    private String defaultNamespace;

    public ExtractorDescription getDescription() {
        return MicrodataExtractorFactory.getDescriptionInstance();
    }

    public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, Document in, ExtractionResult out) throws IOException, ExtractionException {
        ItemScope[] itemScopes;
        MicrodataParserReport parserReport = MicrodataParser.getMicrodata(in);
        if (parserReport.getErrors().length > 0) {
            this.notifyError(parserReport.getErrors(), out);
        }
        if ((itemScopes = parserReport.getDetectedItemScopes()).length == 0) {
            return;
        }
        this.isStrict = extractionParameters.getFlag("any23.microdata.strict");
        if (!this.isStrict) {
            this.defaultNamespace = extractionParameters.getProperty("any23.microdata.ns.default");
        }
        this.documentLanguage = this.getDocumentLanguage(in);
        IRI documentIRI = extractionContext.getDocumentIRI();
        HashMap<ItemScope, Resource> mappings = new HashMap<ItemScope, Resource>();
        for (ItemScope itemScope : itemScopes) {
            Resource subject = this.processType(itemScope, documentIRI, out, mappings);
            out.writeTriple((Resource)documentIRI, MICRODATA_ITEM, (Value)subject);
        }
        this.processTitle(in, documentIRI, out);
        this.processHREFElements(in, documentIRI, out);
        this.processMetaElements(in, documentIRI, out);
        this.processCiteElements(in, documentIRI, out);
    }

    private String getDocumentLanguage(Document in) {
        String lang = DomUtils.find(in, "string(/HTML/@lang)");
        if ("".equals(lang)) {
            return null;
        }
        return lang;
    }

    private String getLanguage(Node node) {
        Node nodeLang = node.getAttributes().getNamedItem("lang");
        if (nodeLang == null) {
            return this.documentLanguage;
        }
        return nodeLang.getTextContent();
    }

    private void processTitle(Document in, IRI documentIRI, ExtractionResult out) {
        NodeList titles = in.getElementsByTagName("title");
        if (titles.getLength() == 1) {
            Node title = titles.item(0);
            String titleValue = title.getTextContent();
            String lang = this.getLanguage(title);
            Literal object = lang == null ? RDFUtils.literal(titleValue) : RDFUtils.literal(titleValue, lang);
            out.writeTriple((Resource)documentIRI, DCTerms.getInstance().title, (Value)object);
        }
    }

    private void processHREFElements(Document in, IRI documentIRI, ExtractionResult out) {
        NodeList anchors = in.getElementsByTagName("a");
        for (int i = 0; i < anchors.getLength(); ++i) {
            this.processHREFElement(anchors.item(i), documentIRI, out);
        }
        NodeList areas = in.getElementsByTagName("area");
        for (int i = 0; i < areas.getLength(); ++i) {
            this.processHREFElement(areas.item(i), documentIRI, out);
        }
        NodeList links = in.getElementsByTagName("link");
        for (int i = 0; i < links.getLength(); ++i) {
            this.processHREFElement(links.item(i), documentIRI, out);
        }
    }

    private void processHREFElement(Node item, IRI documentIRI, ExtractionResult out) {
        URL absoluteURL;
        Node rel = item.getAttributes().getNamedItem("rel");
        if (rel == null) {
            return;
        }
        Node href = item.getAttributes().getNamedItem("href");
        if (href == null) {
            return;
        }
        if (!this.isAbsoluteURL(href.getTextContent())) {
            try {
                absoluteURL = this.toAbsoluteURL(documentIRI.toString(), href.getTextContent(), '/');
            }
            catch (MalformedURLException e) {
                return;
            }
        }
        try {
            absoluteURL = new URL(href.getTextContent());
        }
        catch (MalformedURLException e) {
            return;
        }
        String[] relTokens = rel.getTextContent().split(" ");
        HashSet<String> tokensWithNoDuplicates = new HashSet<String>();
        for (String relToken : relTokens) {
            if (relToken.contains(":")) continue;
            if ("alternate".equals(relToken) || "stylesheet".equals(relToken)) {
                tokensWithNoDuplicates.add("ALTERNATE-STYLESHEET");
                continue;
            }
            tokensWithNoDuplicates.add(relToken.toLowerCase());
        }
        for (String token : tokensWithNoDuplicates) {
            IRI predicate = this.isAbsoluteURL(token) ? RDFUtils.iri(token) : RDFUtils.iri("http://www.w3.org/1999/xhtml/vocab#" + token);
            out.writeTriple((Resource)documentIRI, predicate, (Value)RDFUtils.iri(absoluteURL.toString()));
        }
    }

    private void processMetaElements(Document in, IRI documentIRI, ExtractionResult out) {
        NodeList metas = in.getElementsByTagName("meta");
        for (int i = 0; i < metas.getLength(); ++i) {
            Node meta = metas.item(i);
            String name = DomUtils.readAttribute(meta, "name", null);
            String content = DomUtils.readAttribute(meta, "content", null);
            if (name == null || content == null) continue;
            if (this.isAbsoluteURL(name)) {
                this.processMetaElement(RDFUtils.iri(name), content, this.getLanguage(meta), documentIRI, out);
                continue;
            }
            this.processMetaElement(name, content, this.getLanguage(meta), documentIRI, out);
        }
    }

    private void processMetaElement(IRI uri, String content, String language, IRI documentIRI, ExtractionResult out) {
        if (content.contains(":")) {
            return;
        }
        Literal subject = language == null ? RDFUtils.literal(content) : RDFUtils.literal(content, language);
        out.writeTriple((Resource)documentIRI, uri, (Value)subject);
    }

    private void processMetaElement(String name, String content, String language, IRI documentIRI, ExtractionResult out) {
        Literal subject = language == null ? RDFUtils.literal(content) : RDFUtils.literal(content, language);
        out.writeTriple((Resource)documentIRI, RDFUtils.iri("http://www.w3.org/1999/xhtml/vocab#" + name.toLowerCase()), (Value)subject);
    }

    private void processCiteElements(Document in, IRI documentIRI, ExtractionResult out) {
        NodeList blockQuotes = in.getElementsByTagName("blockquote");
        for (int i = 0; i < blockQuotes.getLength(); ++i) {
            this.processCiteElement(blockQuotes.item(i), documentIRI, out);
        }
        NodeList quotes = in.getElementsByTagName("q");
        for (int i = 0; i < quotes.getLength(); ++i) {
            this.processCiteElement(quotes.item(i), documentIRI, out);
        }
    }

    private void processCiteElement(Node item, IRI documentIRI, ExtractionResult out) {
        if (item.getAttributes().getNamedItem("cite") != null) {
            out.writeTriple((Resource)documentIRI, DCTerms.getInstance().source, (Value)RDFUtils.iri(item.getAttributes().getNamedItem("cite").getTextContent()));
        }
    }

    private Resource processType(ItemScope itemScope, IRI documentIRI, ExtractionResult out, Map<ItemScope, Resource> mappings) throws ExtractionException {
        Object subject = mappings.containsKey(itemScope) ? mappings.get(itemScope) : (this.isAbsoluteURL(itemScope.getItemId()) ? RDFUtils.iri(itemScope.getItemId()) : RDFUtils.getBNode(Integer.toString(itemScope.hashCode())));
        mappings.put(itemScope, (Resource)subject);
        String itemScopeType = "";
        if (itemScope.getType() != null) {
            String itemType = itemScope.getType().toString();
            out.writeTriple(subject, RDF.TYPE, (Value)RDFUtils.iri(itemType));
            itemScopeType = itemScope.getType().toString();
        }
        for (String propName : itemScope.getProperties().keySet()) {
            List<ItemProp> itemProps = itemScope.getProperties().get(propName);
            for (ItemProp itemProp : itemProps) {
                try {
                    this.processProperty((Resource)subject, propName, itemProp, itemScopeType, documentIRI, mappings, out);
                }
                catch (MalformedURLException e) {
                    throw new ExtractionException("Error while processing on subject '" + subject + "' the itemProp: '" + itemProp + "' ");
                }
            }
        }
        return subject;
    }

    private void processProperty(Resource subject, String propName, ItemProp itemProp, String itemScopeType, IRI documentIRI, Map<ItemScope, Resource> mappings, ExtractionResult out) throws MalformedURLException, ExtractionException {
        Resource value;
        if (!this.isAbsoluteURL(propName) && "".equals(itemScopeType) && this.isStrict) {
            return;
        }
        IRI predicate = !this.isAbsoluteURL(propName) && "".equals(itemScopeType) && !this.isStrict ? RDFUtils.iri(this.toAbsoluteURL(this.defaultNamespace, propName, '/').toString()) : RDFUtils.iri(this.toAbsoluteURL(itemScopeType, propName, '/').toString());
        Object propValue = itemProp.getValue().getContent();
        ItemPropValue.Type propType = itemProp.getValue().getType();
        if (propType.equals((Object)ItemPropValue.Type.Nested)) {
            value = this.processType((ItemScope)propValue, documentIRI, out, mappings);
        } else if (propType.equals((Object)ItemPropValue.Type.Plain)) {
            value = RDFUtils.literal((String)propValue, this.documentLanguage);
        } else if (propType.equals((Object)ItemPropValue.Type.Link)) {
            value = RDFUtils.iri(this.toAbsoluteURL(documentIRI.toString(), (String)propValue, '/').toString());
        } else if (propType.equals((Object)ItemPropValue.Type.Date)) {
            value = RDFUtils.literal(ItemPropValue.formatDateTime((Date)propValue), XMLSchema.DATE);
        } else {
            throw new RuntimeException("Invalid Type '" + (Object)((Object)propType) + "' for ItemPropValue with name: '" + propName + "'");
        }
        out.writeTriple(subject, predicate, (Value)value);
    }

    private boolean isAbsoluteURL(String urlString) {
        boolean result = false;
        try {
            URL url = new URL(urlString);
            String protocol = url.getProtocol();
            if (protocol != null && protocol.trim().length() > 0) {
                result = true;
            }
        }
        catch (MalformedURLException e) {
            return false;
        }
        return result;
    }

    private URL toAbsoluteURL(String ns, String part, char trailing) throws MalformedURLException {
        if (this.isAbsoluteURL(part)) {
            return new URL(part);
        }
        char lastChar = ns.charAt(ns.length() - 1);
        if (lastChar == '#' || lastChar == '/') {
            return new URL(ns + part);
        }
        return new URL(ns + trailing + part);
    }

    private void notifyError(MicrodataParserException[] errors, ExtractionResult out) {
        for (MicrodataParserException mpe : errors) {
            out.notifyIssue(IssueReport.IssueLevel.ERROR, mpe.toJSON(), (long)mpe.getErrorLocationBeginRow(), (long)mpe.getErrorLocationBeginCol());
        }
    }
}

