/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.eval;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.apache.tika.batch.FileResource;
import org.apache.tika.batch.FileResourceConsumer;
import org.apache.tika.batch.fs.FSProperties;
import org.apache.tika.eval.EvalFilePaths;
import org.apache.tika.eval.db.ColInfo;
import org.apache.tika.eval.db.Cols;
import org.apache.tika.eval.db.TableInfo;
import org.apache.tika.eval.io.ExtractReaderException;
import org.apache.tika.eval.io.IDBWriter;
import org.apache.tika.eval.langid.Language;
import org.apache.tika.eval.langid.LanguageIDWrapper;
import org.apache.tika.eval.textstats.BasicTokenCountStatsCalculator;
import org.apache.tika.eval.textstats.CommonTokens;
import org.apache.tika.eval.textstats.CompositeTextStatsCalculator;
import org.apache.tika.eval.textstats.ContentLengthCalculator;
import org.apache.tika.eval.textstats.TextStatsCalculator;
import org.apache.tika.eval.textstats.TokenEntropy;
import org.apache.tika.eval.textstats.TokenLengths;
import org.apache.tika.eval.textstats.TopNTokens;
import org.apache.tika.eval.textstats.UnicodeBlockCounter;
import org.apache.tika.eval.tokens.AnalyzerManager;
import org.apache.tika.eval.tokens.CommonTokenCountManager;
import org.apache.tika.eval.tokens.CommonTokenResult;
import org.apache.tika.eval.tokens.TokenCounts;
import org.apache.tika.eval.tokens.TokenIntPair;
import org.apache.tika.eval.util.ContentTagParser;
import org.apache.tika.eval.util.ContentTags;
import org.apache.tika.eval.util.EvalExceptionUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.PagedText;
import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
import org.apache.tika.sax.RecursiveParserWrapperHandler;
import org.apache.tika.sax.ToXMLContentHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;

public abstract class AbstractProfiler
extends FileResourceConsumer {
    private static final Logger LOG = LoggerFactory.getLogger(AbstractProfiler.class);
    private static final String[] EXTRACT_EXTENSIONS = new String[]{".json", ".txt", ""};
    private static final String[] COMPRESSION_EXTENSIONS = new String[]{"", ".bz2", ".gzip", ".zip"};
    static final long NON_EXISTENT_FILE_LENGTH = -1L;
    public static TableInfo REF_EXTRACT_EXCEPTION_TYPES = new TableInfo("ref_extract_exception_types", new ColInfo(Cols.EXTRACT_EXCEPTION_ID, 4), new ColInfo(Cols.EXTRACT_EXCEPTION_DESCRIPTION, 12, 128));
    public static TableInfo REF_PARSE_ERROR_TYPES = new TableInfo("ref_parse_error_types", new ColInfo(Cols.PARSE_ERROR_ID, 4), new ColInfo(Cols.PARSE_ERROR_DESCRIPTION, 12, 128));
    public static TableInfo REF_PARSE_EXCEPTION_TYPES = new TableInfo("ref_parse_exception_types", new ColInfo(Cols.PARSE_EXCEPTION_ID, 4), new ColInfo(Cols.PARSE_EXCEPTION_DESCRIPTION, 12, 128));
    public static final String TRUE = Boolean.toString(true);
    public static final String FALSE = Boolean.toString(false);
    private static final String ZERO = "0";
    protected static final AtomicInteger ID = new AtomicInteger();
    private static final String UNKNOWN_EXTENSION = "unk";
    private static final String DIGEST_KEY = "X-TIKA:digest:MD5";
    private static final Map<String, Cols> UC_TAGS_OF_INTEREST = AbstractProfiler.initTags();
    private static CommonTokenCountManager COMMON_TOKEN_COUNT_MANAGER;
    private String lastExtractExtension = null;
    AnalyzerManager analyzerManager;
    public static TableInfo MIME_TABLE;
    private static Pattern FILE_NAME_CLEANER;
    static final int FILE_PATH_MAX_LEN = 1024;
    int maxContentLength = 10000000;
    int maxContentLengthForLangId = 50000;
    int maxTokens = 200000;
    private static final Pattern ACCESS_PERMISSION_EXCEPTION;
    private static final Pattern ENCRYPTION_EXCEPTION;
    private static LanguageIDWrapper LANG_ID;
    CompositeTextStatsCalculator compositeTextStatsCalculator;
    protected IDBWriter writer;

    private static Map<String, Cols> initTags() {
        HashMap<String, Cols> tmp = new HashMap<String, Cols>();
        tmp.put("A", Cols.TAGS_A);
        tmp.put("B", Cols.TAGS_B);
        tmp.put("DIV", Cols.TAGS_DIV);
        tmp.put("I", Cols.TAGS_I);
        tmp.put("IMG", Cols.TAGS_IMG);
        tmp.put("LI", Cols.TAGS_LI);
        tmp.put("OL", Cols.TAGS_OL);
        tmp.put("P", Cols.TAGS_P);
        tmp.put("TABLE", Cols.TAGS_TABLE);
        tmp.put("TD", Cols.TAGS_TD);
        tmp.put("TITLE", Cols.TAGS_TITLE);
        tmp.put("TR", Cols.TAGS_TR);
        tmp.put("U", Cols.TAGS_U);
        tmp.put("UL", Cols.TAGS_UL);
        return Collections.unmodifiableMap(tmp);
    }

    public static void loadCommonTokens(Path p, String defaultLangCode) throws IOException {
        COMMON_TOKEN_COUNT_MANAGER = new CommonTokenCountManager(p, defaultLangCode);
    }

    public AbstractProfiler(ArrayBlockingQueue<FileResource> fileQueue, IDBWriter writer) {
        super(fileQueue);
        this.writer = writer;
        LanguageIDWrapper.setMaxTextLength(this.maxContentLengthForLangId);
        this.compositeTextStatsCalculator = this.initAnalyzersAndTokenCounter(this.maxTokens, LANG_ID);
    }

    private CompositeTextStatsCalculator initAnalyzersAndTokenCounter(int maxTokens, LanguageIDWrapper langIder) {
        this.analyzerManager = AnalyzerManager.newInstance(maxTokens);
        ArrayList<TextStatsCalculator> calculators = new ArrayList<TextStatsCalculator>();
        calculators.add(new CommonTokens(COMMON_TOKEN_COUNT_MANAGER));
        calculators.add(new TokenEntropy());
        calculators.add(new TokenLengths());
        calculators.add(new TopNTokens(10));
        calculators.add(new BasicTokenCountStatsCalculator());
        calculators.add(new ContentLengthCalculator());
        calculators.add(new UnicodeBlockCounter(this.maxContentLengthForLangId));
        return new CompositeTextStatsCalculator(calculators, this.analyzerManager.getGeneralAnalyzer(), langIder);
    }

    public void setMaxContentLength(int maxContentLength) {
        this.maxContentLength = maxContentLength;
    }

    public void setMaxContentLengthForLangId(int maxContentLengthForLangId) {
        this.maxContentLengthForLangId = maxContentLengthForLangId;
        LanguageIDWrapper.setMaxTextLength(maxContentLengthForLangId);
    }

    public void setMaxTokens(int maxTokens) {
        this.maxTokens = maxTokens;
        this.initAnalyzersAndTokenCounter(maxTokens, new LanguageIDWrapper());
    }

    protected void writeExtractException(TableInfo extractExceptionTable, String containerId, String filePath, ExtractReaderException.TYPE type) throws IOException {
        HashMap<Cols, String> data = new HashMap<Cols, String>();
        data.put(Cols.CONTAINER_ID, containerId);
        data.put(Cols.FILE_PATH, filePath);
        data.put(Cols.EXTRACT_EXCEPTION_ID, Integer.toString(type.ordinal()));
        this.writer.writeRow(extractExceptionTable, data);
    }

    protected void writeProfileData(EvalFilePaths fps, int i, ContentTags contentTags, Metadata m, String fileId, String containerId, List<Integer> numAttachments, TableInfo profileTable) {
        HashMap<Cols, String> data = new HashMap<Cols, String>();
        data.put(Cols.ID, fileId);
        data.put(Cols.CONTAINER_ID, containerId);
        data.put(Cols.MD5, m.get(DIGEST_KEY));
        if (i < numAttachments.size()) {
            data.put(Cols.NUM_ATTACHMENTS, Integer.toString(numAttachments.get(i)));
        }
        data.put(Cols.ELAPSED_TIME_MILLIS, this.getTime(m));
        data.put(Cols.NUM_METADATA_VALUES, Integer.toString(this.countMetadataValues(m)));
        Integer nPages = m.getInt(PagedText.N_PAGES);
        if (nPages != null) {
            data.put(Cols.NUM_PAGES, Integer.toString(nPages));
        }
        if (i == 0) {
            data.put(Cols.IS_EMBEDDED, FALSE);
            data.put(Cols.FILE_NAME, fps.getRelativeSourceFilePath().getFileName().toString());
        } else {
            data.put(Cols.IS_EMBEDDED, TRUE);
            data.put(Cols.FILE_NAME, AbstractProfiler.getFileName(m.get(AbstractRecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH)));
        }
        String ext = FilenameUtils.getExtension((String)data.get((Object)Cols.FILE_NAME));
        ext = ext == null ? "" : ext.toLowerCase(Locale.US);
        data.put(Cols.FILE_EXTENSION, ext);
        long srcFileLen = this.getSourceFileLength(m);
        if (srcFileLen > -1L) {
            data.put(Cols.LENGTH, Long.toString(srcFileLen));
        } else {
            data.put(Cols.LENGTH, "");
        }
        int numMetadataValues = this.countMetadataValues(m);
        data.put(Cols.NUM_METADATA_VALUES, Integer.toString(numMetadataValues));
        data.put(Cols.ELAPSED_TIME_MILLIS, this.getTime(m));
        String content = contentTags.getContent();
        if (content == null || content.trim().length() == 0) {
            data.put(Cols.HAS_CONTENT, FALSE);
        } else {
            data.put(Cols.HAS_CONTENT, TRUE);
        }
        this.getFileTypes(m, data);
        try {
            this.writer.writeRow(profileTable, data);
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static String getFileName(String path) {
        if (path == null) {
            return "";
        }
        try {
            return FilenameUtils.getName(path);
        }
        catch (IllegalArgumentException e) {
            LOG.warn("{} in {}", (Object)e.getMessage(), (Object)path);
            path = path.replaceAll("\u0000", " ");
            try {
                return FilenameUtils.getName(path);
            }
            catch (IllegalArgumentException e2) {
                LOG.warn("Again: {} in {}", (Object)e2.getMessage(), (Object)path);
                return "";
            }
        }
    }

    protected void writeExceptionData(String fileId, Metadata m, TableInfo exceptionTable) {
        HashMap<Cols, String> data = new HashMap<Cols, String>();
        this.getExceptionStrings(m, data);
        if (data.keySet().size() > 0) {
            try {
                data.put(Cols.ID, fileId);
                this.writer.writeRow(exceptionTable, data);
            }
            catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    protected Map<Class, Object> calcTextStats(ContentTags contentTags) {
        HashMap<Cols, String> data = new HashMap<Cols, String>();
        String content = AbstractProfiler.truncateContent(contentTags, this.maxContentLength, data);
        if (content == null || content.trim().length() == 0) {
            content = "";
        }
        return this.compositeTextStatsCalculator.calculate(content);
    }

    protected void writeContentData(String fileId, Map<Class, Object> textStats, TableInfo contentsTable) throws IOException {
        SummaryStatistics summStats;
        TokenCounts tokenCounts;
        HashMap<Cols, String> data = new HashMap<Cols, String>();
        data.put(Cols.ID, fileId);
        if (textStats.containsKey(ContentLengthCalculator.class)) {
            data.put(Cols.CONTENT_LENGTH, Integer.toString((Integer)textStats.get(ContentLengthCalculator.class)));
        } else {
            data.put(Cols.CONTENT_LENGTH, ZERO);
        }
        this.langid(textStats, data);
        this.writeTokenCounts(textStats, data);
        CommonTokenResult commonTokenResult = (CommonTokenResult)textStats.get(CommonTokens.class);
        if (commonTokenResult != null) {
            data.put(Cols.COMMON_TOKENS_LANG, commonTokenResult.getLangCode());
            data.put(Cols.NUM_UNIQUE_COMMON_TOKENS, Integer.toString(commonTokenResult.getUniqueCommonTokens()));
            data.put(Cols.NUM_COMMON_TOKENS, Integer.toString(commonTokenResult.getCommonTokens()));
            data.put(Cols.NUM_UNIQUE_ALPHABETIC_TOKENS, Integer.toString(commonTokenResult.getUniqueAlphabeticTokens()));
            data.put(Cols.NUM_ALPHABETIC_TOKENS, Integer.toString(commonTokenResult.getAlphabeticTokens()));
        }
        if ((tokenCounts = (TokenCounts)textStats.get(BasicTokenCountStatsCalculator.class)) != null) {
            data.put(Cols.NUM_UNIQUE_TOKENS, Integer.toString(tokenCounts.getTotalUniqueTokens()));
            data.put(Cols.NUM_TOKENS, Integer.toString(tokenCounts.getTotalTokens()));
        }
        if (textStats.get(TokenEntropy.class) != null) {
            data.put(Cols.TOKEN_ENTROPY_RATE, Double.toString((Double)textStats.get(TokenEntropy.class)));
        }
        if ((summStats = (SummaryStatistics)textStats.get(TokenLengths.class)) != null) {
            data.put(Cols.TOKEN_LENGTH_SUM, Integer.toString((int)summStats.getSum()));
            data.put(Cols.TOKEN_LENGTH_MEAN, Double.toString(summStats.getMean()));
            data.put(Cols.TOKEN_LENGTH_STD_DEV, Double.toString(summStats.getStandardDeviation()));
        }
        this.unicodeBlocks(textStats, data);
        try {
            this.writer.writeRow(contentsTable, data);
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    void writeTagData(String fileId, ContentTags contentTags, TableInfo tagsTable) {
        Map<String, Integer> tags = contentTags.getTags();
        if (tags.size() == 0 && !contentTags.getParseException()) {
            return;
        }
        HashMap<Cols, String> data = new HashMap<Cols, String>();
        data.put(Cols.ID, fileId);
        for (Map.Entry<String, Cols> e : UC_TAGS_OF_INTEREST.entrySet()) {
            Integer count = tags.get(e.getKey());
            if (count == null) {
                data.put(e.getValue(), ZERO);
                continue;
            }
            data.put(e.getValue(), Integer.toString(count));
        }
        if (contentTags.getParseException()) {
            data.put(Cols.TAGS_PARSE_EXCEPTION, TRUE);
        } else {
            data.put(Cols.TAGS_PARSE_EXCEPTION, FALSE);
        }
        try {
            this.writer.writeRow(tagsTable, data);
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    String getTime(Metadata m) {
        String elapsed = "-1";
        String v = m.get(AbstractRecursiveParserWrapperHandler.PARSE_TIME_MILLIS);
        if (v != null) {
            return v;
        }
        return elapsed;
    }

    int countMetadataValues(Metadata m) {
        if (m == null) {
            return 0;
        }
        int i = 0;
        for (String n : m.names()) {
            i += m.getValues(n).length;
        }
        return i;
    }

    void getExceptionStrings(Metadata metadata, Map<Cols, String> data) {
        String fullTrace = metadata.get(RecursiveParserWrapperHandler.CONTAINER_EXCEPTION);
        if (fullTrace == null) {
            fullTrace = metadata.get(AbstractRecursiveParserWrapperHandler.EMBEDDED_EXCEPTION);
        }
        if (fullTrace != null) {
            Matcher matcher = ACCESS_PERMISSION_EXCEPTION.matcher(fullTrace);
            if (matcher.find()) {
                data.put(Cols.PARSE_EXCEPTION_ID, Integer.toString(EXCEPTION_TYPE.ACCESS_PERMISSION.ordinal()));
                return;
            }
            matcher = ENCRYPTION_EXCEPTION.matcher(fullTrace);
            if (matcher.find()) {
                data.put(Cols.PARSE_EXCEPTION_ID, Integer.toString(EXCEPTION_TYPE.ENCRYPTION.ordinal()));
                return;
            }
            data.put(Cols.PARSE_EXCEPTION_ID, Integer.toString(EXCEPTION_TYPE.RUNTIME.ordinal()));
            data.put(Cols.ORIG_STACK_TRACE, fullTrace);
            String sortTrace = EvalExceptionUtils.normalize(fullTrace);
            data.put(Cols.SORT_STACK_TRACE, sortTrace);
        }
    }

    protected static String truncateContent(ContentTags contentTags, int maxLength, Map<Cols, String> data) {
        data.put(Cols.CONTENT_TRUNCATED_AT_MAX_LEN, "FALSE");
        if (contentTags == null) {
            return "";
        }
        String c = contentTags.getContent();
        if (maxLength > -1 && c.length() > maxLength) {
            c = c.substring(0, maxLength);
            data.put(Cols.CONTENT_TRUNCATED_AT_MAX_LEN, "TRUE");
        }
        return c;
    }

    protected static ContentTags getContent(EvalFilePaths evalFilePaths, Metadata metadata) {
        if (metadata == null) {
            return ContentTags.EMPTY_CONTENT_TAGS;
        }
        return AbstractProfiler.parseContentAndTags(evalFilePaths, metadata);
    }

    void unicodeBlocks(Map<Class, Object> tokenStats, Map<Cols, String> data) {
        Map blocks = (Map)tokenStats.get(UnicodeBlockCounter.class);
        ArrayList<Pair<String, Integer>> pairs = new ArrayList<Pair<String, Integer>>();
        for (Map.Entry e : blocks.entrySet()) {
            pairs.add(Pair.of((String)e.getKey(), ((MutableInt)e.getValue()).intValue()));
        }
        Collections.sort(pairs, new Comparator<Pair<String, Integer>>(){

            @Override
            public int compare(Pair<String, Integer> o1, Pair<String, Integer> o2) {
                return o2.getValue().compareTo(o1.getValue());
            }
        });
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < 20 && i < pairs.size(); ++i) {
            if (i > 0) {
                sb.append(" | ");
            }
            sb.append((String)((Pair)pairs.get(i)).getKey() + ": " + ((Pair)pairs.get(i)).getValue());
        }
        data.put(Cols.UNICODE_CHAR_BLOCKS, sb.toString());
    }

    void langid(Map<Class, Object> stats, Map<Cols, String> data) {
        List probabilities = (List)stats.get(LanguageIDWrapper.class);
        if (probabilities.size() > 0) {
            data.put(Cols.LANG_ID_1, ((Language)probabilities.get(0)).getLanguage());
            data.put(Cols.LANG_ID_PROB_1, Double.toString(((Language)probabilities.get(0)).getConfidence()));
        }
        if (probabilities.size() > 1) {
            data.put(Cols.LANG_ID_2, ((Language)probabilities.get(1)).getLanguage());
            data.put(Cols.LANG_ID_PROB_2, Double.toString(((Language)probabilities.get(1)).getConfidence()));
        }
    }

    void getFileTypes(Metadata metadata, Map<Cols, String> output) {
        if (metadata == null) {
            return;
        }
        String type = metadata.get("Content-Type");
        if (type == null) {
            return;
        }
        int mimeId = this.writer.getMimeId(type);
        output.put(Cols.MIME_ID, Integer.toString(mimeId));
    }

    void writeTokenCounts(Map<Class, Object> textStats, Map<Cols, String> data) {
        TokenIntPair[] tokenIntPairs = (TokenIntPair[])textStats.get(TopNTokens.class);
        int i = 0;
        StringBuilder sb = new StringBuilder();
        for (TokenIntPair t : tokenIntPairs) {
            if (i++ > 0) {
                sb.append(" | ");
            }
            sb.append(t.getToken() + ": " + t.getValue());
        }
        data.put(Cols.TOP_N_TOKENS, sb.toString());
    }

    public void closeWriter() throws IOException {
        this.writer.close();
    }

    protected EvalFilePaths getPathsFromExtractCrawl(Metadata metadata, Path extracts) {
        String relExtractFilePath = metadata.get(FSProperties.FS_REL_PATH);
        Matcher m = FILE_NAME_CLEANER.matcher(relExtractFilePath);
        Path relativeSourceFilePath = Paths.get(m.replaceAll(""), new String[0]);
        Path extractFile = extracts.resolve(relExtractFilePath);
        if (!Files.isRegularFile(extractFile, new LinkOption[0])) {
            extractFile = this.findFile(extracts, relativeSourceFilePath);
        }
        return new EvalFilePaths(relativeSourceFilePath, extractFile);
    }

    protected EvalFilePaths getPathsFromSrcCrawl(Metadata metadata, Path srcDir, Path extracts) {
        Path relativeSourceFilePath = Paths.get(metadata.get(FSProperties.FS_REL_PATH), new String[0]);
        Path extractFile = this.findFile(extracts, relativeSourceFilePath);
        Path inputFile = srcDir.resolve(relativeSourceFilePath);
        long srcLen = -1L;
        try {
            srcLen = Files.size(inputFile);
        }
        catch (IOException e) {
            LOG.warn("Couldn't get length for: {}", (Object)inputFile.toAbsolutePath());
        }
        return new EvalFilePaths(relativeSourceFilePath, extractFile, srcLen);
    }

    private Path findFile(Path extractRootDir, Path relativeSourceFilePath) {
        Path candidate;
        String relSrcFilePathString = relativeSourceFilePath.toString();
        if (this.lastExtractExtension != null && Files.isRegularFile(candidate = extractRootDir.resolve(relSrcFilePathString + this.lastExtractExtension), new LinkOption[0])) {
            return candidate;
        }
        for (String ext : EXTRACT_EXTENSIONS) {
            for (String compress : COMPRESSION_EXTENSIONS) {
                Path candidate2 = extractRootDir.resolve(relSrcFilePathString + ext + compress);
                if (!Files.isRegularFile(candidate2, new LinkOption[0])) continue;
                this.lastExtractExtension = ext + compress;
                return candidate2;
            }
        }
        return null;
    }

    protected long getSourceFileLength(EvalFilePaths fps, List<Metadata> metadataList) {
        if (fps.getSourceFileLength() > -1L) {
            return fps.getSourceFileLength();
        }
        return this.getSourceFileLength(metadataList);
    }

    long getSourceFileLength(List<Metadata> metadataList) {
        if (metadataList == null || metadataList.size() < 1) {
            return -1L;
        }
        return this.getSourceFileLength(metadataList.get(0));
    }

    long getSourceFileLength(Metadata m) {
        String lenString = m.get("Content-Length");
        if (lenString == null) {
            return -1L;
        }
        try {
            return Long.parseLong(lenString);
        }
        catch (NumberFormatException numberFormatException) {
            return -1L;
        }
    }

    protected long getFileLength(Path p) {
        if (p != null && Files.isRegularFile(p, new LinkOption[0])) {
            try {
                return Files.size(p);
            }
            catch (IOException iOException) {
                // empty catch block
            }
        }
        return -1L;
    }

    static List<Integer> countAttachments(List<Metadata> list) {
        int i;
        ArrayList<Integer> ret = new ArrayList<Integer>();
        if (list == null || list.size() == 0) {
            return ret;
        }
        ret.add(list.size() - 1);
        HashMap<String, Integer> counts = new HashMap<String, Integer>();
        for (i = 1; i < list.size(); ++i) {
            String path = list.get(i).get(AbstractRecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH);
            if (path == null) continue;
            String[] parts = path.split("/");
            StringBuilder parent = new StringBuilder();
            for (int end = 1; end < parts.length - 1; ++end) {
                parent.setLength(0);
                AbstractProfiler.join("/", parent, parts, 1, end);
                String parentPath = parent.toString();
                Integer count = (Integer)counts.get(parentPath);
                if (count == null) {
                    count = 1;
                } else {
                    Integer n = count;
                    Integer n2 = count = Integer.valueOf(count + 1);
                }
                counts.put(parentPath, count);
            }
        }
        for (i = 1; i < list.size(); ++i) {
            Integer count = (Integer)counts.get(list.get(i).get(AbstractRecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH));
            if (count == null) {
                count = 0;
            }
            ret.add(i, count);
        }
        return ret;
    }

    private static void join(String delimiter, StringBuilder sb, String[] parts, int start, int end) {
        for (int i = start; i <= end; ++i) {
            sb.append(delimiter);
            sb.append(parts[i]);
        }
    }

    private static ContentTags parseContentAndTags(EvalFilePaths evalFilePaths, Metadata metadata) {
        String s = metadata.get(RecursiveParserWrapperHandler.TIKA_CONTENT);
        if (s == null || s.length() == 0) {
            return ContentTags.EMPTY_CONTENT_TAGS;
        }
        String handlerClass = metadata.get(RecursiveParserWrapperHandler.TIKA_CONTENT_HANDLER);
        if (evalFilePaths.getExtractFile().getFileName().toString().toLowerCase(Locale.ENGLISH).endsWith(".html")) {
            try {
                return ContentTagParser.parseHTML(s, UC_TAGS_OF_INTEREST.keySet());
            }
            catch (IOException | SAXException e) {
                LOG.warn("Problem parsing html in {}; backing off to treat string as text", (Object)evalFilePaths.getExtractFile().toAbsolutePath().toString(), (Object)e);
                return new ContentTags(s, true);
            }
        }
        if (evalFilePaths.getExtractFile().getFileName().toString().toLowerCase(Locale.ENGLISH).endsWith(".xhtml") || handlerClass != null && handlerClass.equals(ToXMLContentHandler.class.getSimpleName())) {
            try {
                return ContentTagParser.parseXML(s, UC_TAGS_OF_INTEREST.keySet());
            }
            catch (IOException | TikaException | SAXException e) {
                LOG.warn("Problem parsing xhtml in {}; backing off to html parser", (Object)evalFilePaths.getExtractFile().toAbsolutePath().toString(), (Object)e);
                try {
                    ContentTags contentTags = ContentTagParser.parseHTML(s, UC_TAGS_OF_INTEREST.keySet());
                    contentTags.setParseException(true);
                    return contentTags;
                }
                catch (IOException | SAXException e2) {
                    LOG.warn("Problem parsing html in {}; backing off to treat string as text", (Object)evalFilePaths.getExtractFile().toAbsolutePath().toString(), (Object)e2);
                    return new ContentTags(s, true);
                }
            }
        }
        return new ContentTags(s);
    }

    static {
        MIME_TABLE = new TableInfo("mimes", new ColInfo(Cols.MIME_ID, 4, "PRIMARY KEY"), new ColInfo(Cols.MIME_STRING, 12, 256), new ColInfo(Cols.FILE_EXTENSION, 12, 12));
        FILE_NAME_CLEANER = Pattern.compile("\\.(json|txt)(\\.(bz2|gz|zip))?$");
        ACCESS_PERMISSION_EXCEPTION = Pattern.compile("org\\.apache\\.tika\\.exception\\.AccessPermissionException");
        ENCRYPTION_EXCEPTION = Pattern.compile("org\\.apache\\.tika.exception\\.EncryptedDocumentException");
        LANG_ID = new LanguageIDWrapper();
    }

    public static enum PARSE_ERROR_TYPE {
        OOM,
        TIMEOUT;

    }

    public static enum EXCEPTION_TYPE {
        RUNTIME,
        ENCRYPTION,
        ACCESS_PERMISSION,
        UNSUPPORTED_VERSION;

    }
}

