/*
 * Decompiled with CFR 0.152.
 */
package org.apache.any23.extractor.html;

import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import javax.xml.transform.TransformerException;
import org.apache.any23.extractor.html.DocumentReport;
import org.apache.any23.extractor.html.SpanCloserInputStream;
import org.apache.any23.extractor.html.TagSoupParsingConfiguration;
import org.apache.any23.validator.DefaultValidator;
import org.apache.any23.validator.ValidatorException;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XNIException;
import org.cyberneko.html.parsers.DOMParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class TagSoupParser {
    public static final String ELEMENT_LOCATION = "Element-Location";
    private static final String AUGMENTATIONS_FEATURE = "http://cyberneko.org/html/features/augmentations";
    private static final Logger logger = LoggerFactory.getLogger(TagSoupParser.class);
    private final InputStream input;
    private final String documentIRI;
    private final String encoding;
    private final TagSoupParsingConfiguration config;
    private Document result = null;

    public TagSoupParser(InputStream input, String documentIRI) {
        this.input = input;
        this.documentIRI = documentIRI;
        this.encoding = null;
        this.config = TagSoupParsingConfiguration.getDefault();
    }

    public TagSoupParser(InputStream input, String documentIRI, String encoding) {
        if (encoding != null && !Charset.isSupported(encoding)) {
            throw new UnsupportedCharsetException(String.format("Charset %s is not supported", encoding));
        }
        this.input = input;
        this.documentIRI = documentIRI;
        this.encoding = encoding;
        this.config = TagSoupParsingConfiguration.getDefault();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public Document getDOM() throws IOException {
        if (this.result == null) {
            long startTime = System.currentTimeMillis();
            try {
                this.result = this.config.parse(this.input, this.documentIRI, this.encoding);
            }
            finally {
                long elapsed = System.currentTimeMillis() - startTime;
                logger.debug("Parsed " + this.documentIRI + " with " + this.config.name() + ", " + elapsed + "ms");
            }
        }
        this.result.setDocumentURI(this.documentIRI);
        return this.result;
    }

    public DocumentReport getValidatedDOM(boolean applyFix) throws IOException, ValidatorException {
        URI dIRI;
        try {
            dIRI = new URI(this.documentIRI);
        }
        catch (IllegalArgumentException | URISyntaxException urise) {
            throw new ValidatorException("Error while performing validation, invalid document IRI.", urise);
        }
        DefaultValidator validator = new DefaultValidator();
        Document document = this.getDOM();
        return new DocumentReport(validator.validate(dIRI, document, applyFix), document);
    }

    static TagSoupParsingConfiguration legacyConfig() {
        return NekoHTML.instance;
    }

    public static class ElementLocation {
        private int beginLineNumber;
        private int beginColumnNumber;
        private int endLineNumber;
        private int endColumnNumber;

        private ElementLocation(int beginLineNumber, int beginColumnNumber, int endLineNumber, int endColumnNumber) {
            this.beginLineNumber = beginLineNumber;
            this.beginColumnNumber = beginColumnNumber;
            this.endLineNumber = endLineNumber;
            this.endColumnNumber = endColumnNumber;
        }

        public int getBeginLineNumber() {
            return this.beginLineNumber;
        }

        public int getBeginColumnNumber() {
            return this.beginColumnNumber;
        }

        public int getEndLineNumber() {
            return this.endLineNumber;
        }

        public int getEndColumnNumber() {
            return this.endColumnNumber;
        }
    }

    private static class NekoHTML
    extends TagSoupParsingConfiguration {
        private static final NekoHTML instance = new NekoHTML();

        private NekoHTML() {
        }

        @Override
        Document parse(InputStream input, String documentIRI, String encoding) throws IOException {
            try {
                return this.parse(input, encoding);
            }
            catch (SAXException ex) {
                throw new RuntimeException("Should not happen, it's a tag soup parser", ex);
            }
            catch (TransformerException ex) {
                throw new RuntimeException("Should not happen, it's a tag soup parser", ex);
            }
            catch (NullPointerException ex) {
                if (ex.getStackTrace()[0].getClassName().equals("java.io.Reader")) {
                    throw new RuntimeException("Bug in NekoHTML, try upgrading to newer release!", ex);
                }
                throw ex;
            }
        }

        private Document parse(InputStream input, String encoding) throws IOException, SAXException, TransformerException {
            DOMParser parser = new DOMParser(){
                private QName currentQName;
                private Augmentations currentAugmentations;

                protected Element createElementNode(QName qName) {
                    Element created = super.createElementNode(qName);
                    if (qName.equals((Object)this.currentQName) && this.currentAugmentations != null) {
                        ElementLocation elementLocation = this.createElementLocation(this.currentAugmentations.getItem(TagSoupParser.AUGMENTATIONS_FEATURE));
                        created.setUserData(TagSoupParser.ELEMENT_LOCATION, elementLocation, null);
                    }
                    return created;
                }

                public void startElement(QName qName, XMLAttributes xmlAttributes, Augmentations augmentations) throws XNIException {
                    super.startElement(qName, xmlAttributes, augmentations);
                    this.currentQName = qName;
                    this.currentAugmentations = augmentations;
                }

                private ElementLocation createElementLocation(Object obj) {
                    if (obj == null) {
                        return null;
                    }
                    String pattern = null;
                    try {
                        pattern = obj.toString();
                        if ("synthesized".equals(pattern)) {
                            return null;
                        }
                        String[] parts = pattern.split(":");
                        return new ElementLocation(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]), Integer.parseInt(parts[3]), Integer.parseInt(parts[4]));
                    }
                    catch (Exception e) {
                        logger.warn(String.format("Unexpected string format for given augmentation: [%s]", pattern), (Throwable)e);
                        return null;
                    }
                }
            };
            parser.setFeature("http://xml.org/sax/features/namespaces", false);
            parser.setFeature("http://cyberneko.org/html/features/scanner/script/strip-cdata-delims", true);
            parser.setFeature(TagSoupParser.AUGMENTATIONS_FEATURE, true);
            if (encoding != null) {
                parser.setProperty("http://cyberneko.org/html/properties/default-encoding", (Object)encoding);
            }
            parser.parse(new InputSource(new SpanCloserInputStream(input)));
            return parser.getDocument();
        }
    }
}

