/*
 * Decompiled with CFR 0.152.
 */
package org.apache.any23.extractor.html;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.html.DomUtils;
import org.apache.any23.rdf.Any23ValueFactoryWrapper;
import org.apache.any23.rdf.RDFUtils;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

public class HTMLDocument {
    private static final XPath xPathEngine = XPathFactory.newInstance().newXPath();
    private static final Logger log = LoggerFactory.getLogger(HTMLDocument.class);
    private Node document;
    private URI baseIRI;
    private final Any23ValueFactoryWrapper valueFactory = new Any23ValueFactoryWrapper((ValueFactory)SimpleValueFactory.getInstance());

    public static TextField readTextField(Node node) {
        TextField result;
        String name = node.getNodeName();
        NamedNodeMap attributes = node.getAttributes();
        if (attributes == null) {
            return new TextField(node.getTextContent(), node);
        }
        List<Node> values = DomUtils.findAllByClassName(node, "value");
        if (!values.isEmpty()) {
            StringBuilder val = new StringBuilder();
            for (Node n : values) {
                val.append(n.getTextContent());
            }
            return new TextField(val.toString().trim(), node);
        }
        if ("ABBR".equals(name) && null != attributes.getNamedItem("title")) {
            result = new TextField(attributes.getNamedItem("title").getNodeValue(), node);
        } else if ("A".equals(name)) {
            if (DomUtils.hasAttribute(node, "rel", "tag")) {
                String href = HTMLDocument.extractRelTag(attributes);
                result = new TextField(href, node);
            } else {
                result = new TextField(node.getTextContent(), node);
            }
        } else {
            result = ("IMG".equals(name) || "AREA".equals(name)) && null != attributes.getNamedItem("alt") ? new TextField(attributes.getNamedItem("alt").getNodeValue(), node) : new TextField(node.getTextContent(), node);
        }
        return result;
    }

    public static void readUrlField(List<TextField> res, Node node) {
        String name = node.getNodeName();
        NamedNodeMap attributes = node.getAttributes();
        if (null == attributes) {
            res.add(new TextField(node.getTextContent(), node));
            return;
        }
        if ("A".equals(name) || "AREA".equals(name)) {
            Node n = attributes.getNamedItem("href");
            if (n != null) {
                res.add(new TextField(n.getNodeValue(), n));
            }
        } else if ("ABBR".equals(name)) {
            Node n = attributes.getNamedItem("title");
            if (n != null) {
                res.add(new TextField(n.getNodeValue(), n));
            }
        } else if ("IMG".equals(name)) {
            Node n = attributes.getNamedItem("src");
            if (n != null) {
                res.add(new TextField(n.getNodeValue(), n));
            } else {
                n = attributes.getNamedItem("srcset");
                if (n != null) {
                    res.add(new TextField(n.getNodeValue().split("[\\s,]+")[0], n));
                }
            }
        } else if ("OBJECT".equals(name)) {
            Node n = attributes.getNamedItem("data");
            if (n != null) {
                res.add(new TextField(n.getNodeValue(), n));
            }
        } else {
            res.add(new TextField(HTMLDocument.extractHCardTextContent(node), node));
        }
    }

    private static String extractHCardTextContent(Node node) {
        StringBuilder sb = new StringBuilder();
        NodeList nodes = node.getChildNodes();
        if (HTMLDocument.extractTextInValue(nodes, sb) == 0) {
            HTMLDocument.extractTextNotInType(nodes, sb);
        }
        return sb.toString();
    }

    private static int extractTextInValue(NodeList nodes, StringBuilder b) {
        int count = 0;
        int len = nodes.getLength();
        for (int i = 0; i < len; ++i) {
            Node n = nodes.item(i);
            if (DomUtils.hasClassName(n, "value")) {
                ++count;
                b.append(n.getTextContent().trim());
                continue;
            }
            count += HTMLDocument.extractTextInValue(n.getChildNodes(), b);
        }
        return count;
    }

    private static void extractTextNotInType(NodeList nodes, StringBuilder b) {
        int len = nodes.getLength();
        for (int i = 0; i < len; ++i) {
            Node n = nodes.item(i);
            if (n.getNodeType() == 3) {
                b.append(n.getNodeValue().trim());
                continue;
            }
            if (DomUtils.hasClassName(n, "type")) continue;
            HTMLDocument.extractTextNotInType(n.getChildNodes(), b);
        }
    }

    public static String extractRelTag(String hrefAttributeContent) {
        int pathLenghtMin1;
        String[] all = hrefAttributeContent.split("[#?]");
        String path = all[0];
        if ('/' == path.charAt(pathLenghtMin1 = path.length() - 1)) {
            path = path.substring(0, pathLenghtMin1);
        }
        return path;
    }

    public static String extractRelTag(NamedNodeMap attributes) {
        return HTMLDocument.extractRelTag(attributes.getNamedItem("href").getNodeValue());
    }

    public static String readNodeContent(Node node, boolean prettify) {
        String content = node.getTextContent();
        return prettify ? content.trim().replaceAll("\\n", " ").replaceAll(" +", " ") : content;
    }

    public HTMLDocument(Node document) {
        if (null == document) {
            throw new IllegalArgumentException("node cannot be null when constructing an HTMLDocument");
        }
        this.document = document;
    }

    public IRI resolveIRI(String uri) throws ExtractionException {
        return this.valueFactory.resolveIRI(uri, this.getBaseIRI());
    }

    public String find(String xpath) {
        return DomUtils.find(this.getDocument(), xpath);
    }

    public Node findNodeById(String id) {
        return DomUtils.findNodeById(this.getDocument(), id);
    }

    public List<Node> findAll(String xpath) {
        return DomUtils.findAll(this.getDocument(), xpath);
    }

    public String findMicroformattedValue(String objectTag, String object, String fieldTag, String field, String key) {
        Node node = this.findMicroformattedObjectNode(objectTag, object);
        if (null == node) {
            return "";
        }
        if (DomUtils.hasClassName(node, field)) {
            return node.getTextContent();
        }
        try {
            String xpath = ".//" + fieldTag + "[contains(@class, '" + field + "')]/" + key;
            String value = (String)xPathEngine.evaluate(xpath, node, XPathConstants.STRING);
            if (null == value) {
                return "";
            }
            return value;
        }
        catch (XPathExpressionException ex) {
            throw new RuntimeException("Should not happen, XPath expression is built locally", ex);
        }
    }

    public Node getDocument() {
        return this.document;
    }

    public TextField getSingularTextField(String className) {
        TextField[] res = this.getPluralTextField(className);
        if (res.length == 0) {
            return new TextField("", null);
        }
        return res[0];
    }

    public TextField[] getPluralTextField(String className) {
        ArrayList<TextField> res = new ArrayList<TextField>();
        List<Node> nodes = DomUtils.findAllByClassName(this.getDocument(), className);
        for (Node node : nodes) {
            res.add(HTMLDocument.readTextField(node));
        }
        return res.toArray(new TextField[res.size()]);
    }

    public TextField getSingularUrlField(String className) {
        TextField[] res = this.getPluralUrlField(className);
        if (res.length < 1) {
            return new TextField("", null);
        }
        return res[0];
    }

    public TextField[] getPluralUrlField(String className) {
        ArrayList<TextField> res = new ArrayList<TextField>();
        List<Node> nodes = DomUtils.findAllByClassName(this.getDocument(), className);
        for (Node node : nodes) {
            HTMLDocument.readUrlField(res, node);
        }
        return res.toArray(new TextField[res.size()]);
    }

    public Node findMicroformattedObjectNode(String objectTag, String name) {
        List<Node> nodes = DomUtils.findAllByTagAndClassName(this.getDocument(), objectTag, name);
        if (nodes.isEmpty()) {
            return null;
        }
        return nodes.get(0);
    }

    public String readAttribute(String attribute) {
        return DomUtils.readAttribute(this.getDocument(), attribute);
    }

    public List<Node> findAllByClassName(String clazz) {
        return DomUtils.findAllByClassName(this.getDocument(), clazz);
    }

    public String getText() {
        NodeList children = this.getDocument().getChildNodes();
        if (children.getLength() == 1 && children.item(0) instanceof Text) {
            return children.item(0).getTextContent();
        }
        return null;
    }

    public String getDefaultLanguage() {
        Node html;
        String xpathLanguageSelector = "/HTML";
        try {
            html = (Node)xPathEngine.evaluate("/HTML", this.document, XPathConstants.NODE);
        }
        catch (XPathExpressionException xpeee) {
            throw new IllegalStateException();
        }
        if (html == null) {
            return null;
        }
        Node langAttribute = html.getAttributes().getNamedItem("xml:lang");
        return langAttribute == null ? null : langAttribute.getTextContent();
    }

    public String[] getPathToLocalRoot() {
        return DomUtils.getXPathListForNode(this.document);
    }

    public TextField[] extractRelTagNodes() {
        List<Node> relTagNodes = DomUtils.findAllByAttributeName(this.getDocument(), "rel");
        ArrayList<TextField> result = new ArrayList<TextField>();
        for (Node relTagNode : relTagNodes) {
            HTMLDocument.readUrlField(result, relTagNode);
        }
        return result.toArray(new TextField[result.size()]);
    }

    private URI getBaseIRI() throws ExtractionException {
        if (this.baseIRI == null) {
            Document doc;
            Document document = doc = this.document instanceof Document ? (Document)this.document : this.document.getOwnerDocument();
            if (doc == null) {
                throw new ExtractionException("Node " + this.document.getNodeName() + " was not associated with a document.");
            }
            String uri = doc.getDocumentURI();
            if (uri == null) {
                throw new ExtractionException("document URI is null, this should not happen");
            }
            try {
                this.baseIRI = new URI(RDFUtils.fixAbsoluteIRI(uri));
            }
            catch (IllegalArgumentException ex) {
                throw new ExtractionException("Error in base IRI: " + uri, (Throwable)ex);
            }
            catch (URISyntaxException ex) {
                throw new ExtractionException("Error in base IRI: " + uri, (Throwable)ex);
            }
        }
        return this.baseIRI;
    }

    public static class TextField {
        private String value;
        private Node source;

        public TextField(String value, Node source) {
            this.value = value;
            this.source = source;
        }

        public String value() {
            return this.value;
        }

        public Node source() {
            return this.source;
        }
    }
}

