/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.eval.io;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.compressors.z.ZCompressorInputStream;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.eval.io.ExtractReaderException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.serialization.JsonMetadataList;
import org.apache.tika.mime.MediaType;
import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ExtractReader {
    private static final Logger LOG = LoggerFactory.getLogger(ExtractReader.class);
    public static final long IGNORE_LENGTH = -1L;
    private TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
    private final ALTER_METADATA_LIST alterMetadataList;
    private final long minExtractLength;
    private final long maxExtractLength;

    public ExtractReader() {
        this(ALTER_METADATA_LIST.AS_IS, -1L, -1L);
    }

    public ExtractReader(ALTER_METADATA_LIST alterMetadataList) {
        this(alterMetadataList, -1L, -1L);
    }

    public ExtractReader(ALTER_METADATA_LIST alterMetadataList, long minExtractLength, long maxExtractLength) {
        this.alterMetadataList = alterMetadataList;
        this.minExtractLength = minExtractLength;
        this.maxExtractLength = maxExtractLength;
        if (maxExtractLength > -1L && minExtractLength >= maxExtractLength) {
            throw new IllegalArgumentException("minExtractLength(" + minExtractLength + ") must be < maxExtractLength(" + maxExtractLength + ")");
        }
    }

    public List<Metadata> loadExtract(Path extractFile) throws ExtractReaderException {
        List<Metadata> metadataList = null;
        if (extractFile == null || !Files.isRegularFile(extractFile, new LinkOption[0])) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
        }
        FileSuffixes fileSuffixes = ExtractReader.parseSuffixes(extractFile.getFileName().toString());
        if (fileSuffixes.txtOrJson == null) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.INCORRECT_EXTRACT_FILE_SUFFIX);
        }
        if (!Files.isRegularFile(extractFile, new LinkOption[0])) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
        }
        long length = -1L;
        try {
            length = Files.size(extractFile);
        }
        catch (IOException e) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
        }
        if (length == 0L) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.ZERO_BYTE_EXTRACT_FILE);
        }
        if (this.minExtractLength > -1L && length < this.minExtractLength) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_SHORT);
        }
        if (this.maxExtractLength > -1L && length > this.maxExtractLength) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_LONG);
        }
        BufferedReader reader = null;
        InputStream is = null;
        try {
            is = Files.newInputStream(extractFile, new OpenOption[0]);
            if (fileSuffixes.compression != null) {
                if (fileSuffixes.compression.equals("bz2")) {
                    is = new BZip2CompressorInputStream(is);
                } else if (fileSuffixes.compression.equals("gz") || fileSuffixes.compression.equals("gzip")) {
                    is = new GzipCompressorInputStream(is);
                } else if (fileSuffixes.compression.equals("zip")) {
                    is = new ZCompressorInputStream(is);
                } else {
                    LOG.warn("Can't yet process compression of type: {}", (Object)fileSuffixes.compression);
                    return metadataList;
                }
            }
            reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
        }
        catch (IOException e) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
        }
        try {
            if (fileSuffixes.txtOrJson.equals("json")) {
                metadataList = JsonMetadataList.fromJson(reader);
                if (this.alterMetadataList.equals((Object)ALTER_METADATA_LIST.FIRST_ONLY) && metadataList.size() > 1) {
                    while (metadataList.size() > 1) {
                        metadataList.remove(metadataList.size() - 1);
                    }
                } else if (this.alterMetadataList.equals((Object)ALTER_METADATA_LIST.CONCATENATE_CONTENT_INTO_FIRST) && metadataList.size() > 1) {
                    StringBuilder sb = new StringBuilder();
                    Metadata containerMetadata = metadataList.get(0);
                    for (int i = 0; i < metadataList.size(); ++i) {
                        Metadata m = metadataList.get(i);
                        String c = m.get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT);
                        if (c == null) continue;
                        sb.append(c);
                        sb.append(" ");
                    }
                    containerMetadata.set(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT, sb.toString());
                    while (metadataList.size() > 1) {
                        metadataList.remove(metadataList.size() - 1);
                    }
                }
            } else {
                metadataList = this.generateListFromTextFile(reader, fileSuffixes);
            }
        }
        catch (IOException e) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
        }
        catch (TikaException e) {
            throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_PARSE_EXCEPTION);
        }
        finally {
            IOUtils.closeQuietly(reader);
            IOUtils.closeQuietly(is);
        }
        return metadataList;
    }

    private List<Metadata> generateListFromTextFile(Reader reader, FileSuffixes fileSuffixes) throws IOException {
        ArrayList<Metadata> metadataList = new ArrayList<Metadata>();
        String content = IOUtils.toString(reader);
        Metadata m = new Metadata();
        m.set(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT, content);
        m.set("resourceName", fileSuffixes.originalFileName);
        MediaType mimeType = this.tikaConfig.getMimeRepository().detect(null, m);
        if (mimeType != null) {
            m.set("Content-Type", mimeType.toString());
        }
        metadataList.add(m);
        return metadataList;
    }

    protected static FileSuffixes parseSuffixes(String fName) {
        FileSuffixes fileSuffixes = new FileSuffixes();
        if (fName == null) {
            return fileSuffixes;
        }
        Matcher m = Pattern.compile("^(.*?)\\.(json|txt)(?:\\.(bz2|gz(?:ip)?|zip))?$").matcher(fName);
        if (m.find()) {
            fileSuffixes.originalFileName = m.group(1);
            fileSuffixes.txtOrJson = m.group(2);
            fileSuffixes.compression = m.group(3);
        }
        return fileSuffixes;
    }

    private static class FileSuffixes {
        String compression;
        String txtOrJson;
        String originalFileName;

        private FileSuffixes() {
        }
    }

    public static enum ALTER_METADATA_LIST {
        AS_IS,
        FIRST_ONLY,
        CONCATENATE_CONTENT_INTO_FIRST;

    }
}

