/*
 * Decompiled with CFR 0.152.
 */
package org.apache.any23.cli;

import com.beust.jcommander.IStringConverter;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.ParameterException;
import com.beust.jcommander.Parameters;
import com.beust.jcommander.converters.FileConverter;
import edu.uci.ics.crawler4j.crawler.Page;
import edu.uci.ics.crawler4j.parser.HtmlParseData;
import edu.uci.ics.crawler4j.parser.ParseData;
import java.io.File;
import java.net.URL;
import java.util.UUID;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.any23.cli.Rover;
import org.apache.any23.plugin.crawler.CrawlerListener;
import org.apache.any23.plugin.crawler.SiteCrawler;
import org.apache.any23.source.DocumentSource;
import org.apache.any23.source.StringDocumentSource;

@Parameters(commandNames={"crawler"}, commandDescription="Any23 Crawler Command Line Tool.")
public class Crawler
extends Rover {
    private final Object roverLock = new Object();
    @Parameter(names={"-pf", "--pagefilter"}, description="Regex used to filter out page URLs during crawling.", converter=PatterConverter.class)
    private Pattern pageFilter = Pattern.compile(".*(\\.(css|js|bmp|gif|jpe?g|png|tiff?|mid|mp2|mp3|mp4|wav|wma|avi|mov|mpeg|ram|m4v|wmv|rm|smil|pdf|swf|zip|rar|gz|xml|txt))$");
    @Parameter(names={"-sf", "--storagefolder"}, description="Folder used to store crawler temporary data.", converter=FileConverter.class)
    private File storageFolder = new File(System.getProperty("java.io.tmpdir"), "crawler-metadata-" + UUID.randomUUID().toString());
    @Parameter(names={"-nc", "--numcrawlers"}, description="Sets the number of crawlers.")
    private int numCrawlers = 10;
    @Parameter(names={"-mp", "--maxpages"}, description="Max number of pages before interrupting crawl.")
    private int maxPages = Integer.MAX_VALUE;
    @Parameter(names={"-md", "--maxdepth"}, description="Max allowed crawler depth.")
    private int maxDepth = Integer.MAX_VALUE;
    @Parameter(names={"-pd", "--politenessdelay"}, description="Politeness delay in milliseconds.")
    private int politenessDelay = Integer.MAX_VALUE;

    public void run() throws Exception {
        super.configure();
        if (this.inputIRIs.size() != 1) {
            throw new IllegalArgumentException("Expected just one seed.");
        }
        URL seed = new URL((String)this.inputIRIs.get(0));
        if (this.storageFolder.isFile()) {
            throw new IllegalStateException(String.format("Storage folder %s can not be a file, must be a directory", this.storageFolder));
        }
        if (!this.storageFolder.exists() && !this.storageFolder.mkdirs()) {
            throw new IllegalStateException(String.format("Storage folder %s can not be created, please verify you have enough permissions", this.storageFolder));
        }
        SiteCrawler siteCrawler = new SiteCrawler(this.storageFolder);
        siteCrawler.setNumOfCrawlers(this.numCrawlers);
        siteCrawler.setMaxPages(this.maxPages);
        siteCrawler.setMaxDepth(this.maxDepth);
        siteCrawler.setPolitenessDelay(this.politenessDelay);
        siteCrawler.addListener(new CrawlerListener(){

            /*
             * WARNING - Removed try catching itself - possible behaviour change.
             */
            @Override
            public void visitedPage(Page page) {
                String pageURL = page.getWebURL().getURL();
                System.err.println(String.format("Processing page: [%s]", pageURL));
                ParseData parseData = page.getParseData();
                if (parseData instanceof HtmlParseData) {
                    HtmlParseData htmlParseData = (HtmlParseData)parseData;
                    try {
                        Object object = Crawler.this.roverLock;
                        synchronized (object) {
                            Crawler.super.performExtraction((DocumentSource)new StringDocumentSource(htmlParseData.getHtml(), pageURL));
                        }
                    }
                    catch (Exception e) {
                        System.err.println(String.format("Error while processing page [%s], error: %s .", pageURL, e.getMessage()));
                    }
                }
            }
        });
        Runtime.getRuntime().addShutdownHook(new Thread(){

            @Override
            public void run() {
                try {
                    System.err.println(Crawler.super.printReports());
                }
                catch (Exception e) {
                    e.printStackTrace(System.err);
                }
            }
        });
        siteCrawler.start(seed, this.pageFilter, true);
    }

    public static final class PatterConverter
    implements IStringConverter<Pattern> {
        public Pattern convert(String value) {
            try {
                return Pattern.compile(value);
            }
            catch (PatternSyntaxException pse) {
                throw new ParameterException(String.format("Invalid page filter, '%s' must be a regular expression.", value));
            }
        }
    }
}

