package org.apache.hudi.common.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.HoodieAvroWriteSupport;
import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hudi.common.bloom.BloomFilterFactory;
import org.apache.hudi.common.bloom.BloomFilterTypeCode;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.MetadataNotFoundException;
import org.apache.hudi.org.apache.avro.Schema;
import org.apache.hudi.org.apache.avro.generic.GenericRecord;
import org.apache.parquet.avro.AvroParquetReader;
import org.apache.parquet.avro.AvroReadSupport;
import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.schema.MessageType;

/* loaded from: input_file:org/apache/hudi/common/util/ParquetUtils.class */
public class ParquetUtils {

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/hudi/common/util/ParquetUtils$RecordKeysFilterFunction.class */
    public static class RecordKeysFilterFunction implements Function<String, Boolean> {
        private final Set<String> candidateKeys;

        RecordKeysFilterFunction(Set<String> set) {
            this.candidateKeys = set;
        }

        @Override // java.util.function.Function
        public Boolean apply(String str) {
            return Boolean.valueOf(this.candidateKeys.contains(str));
        }
    }

    public static Set<String> readRowKeysFromParquet(Configuration configuration, Path path) {
        return filterParquetRowKeys(configuration, path, new HashSet());
    }

    public static Set<String> filterParquetRowKeys(Configuration configuration, Path path, Set<String> set) {
        return filterParquetRowKeys(configuration, path, set, HoodieAvroUtils.getRecordKeySchema());
    }

    private static Set<String> filterParquetRowKeys(Configuration configuration, Path path, Set<String> set, Schema schema) {
        Option empty = Option.empty();
        if (set != null && !set.isEmpty()) {
            empty = Option.of(new RecordKeysFilterFunction(set));
        }
        Configuration configuration2 = new Configuration(configuration);
        configuration2.addResource(FSUtils.getFs(path.toString(), configuration2).getConf());
        AvroReadSupport.setAvroReadSchema(configuration2, schema);
        AvroReadSupport.setRequestedProjection(configuration2, schema);
        HashSet hashSet = new HashSet();
        try {
            ParquetReader build = AvroParquetReader.builder(path).withConf(configuration2).build();
            Throwable th = null;
            try {
                try {
                    for (Object read = build.read(); read != null; read = build.read()) {
                        if (read instanceof GenericRecord) {
                            String obj = ((GenericRecord) read).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
                            if (!empty.isPresent() || ((RecordKeysFilterFunction) empty.get()).apply(obj).booleanValue()) {
                                hashSet.add(obj);
                            }
                        }
                    }
                    if (build != null) {
                        if (0 != 0) {
                            try {
                                build.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            build.close();
                        }
                    }
                    return hashSet;
                } finally {
                }
            } finally {
            }
        } catch (IOException e) {
            throw new HoodieIOException("Failed to read row keys from Parquet " + path, e);
        }
    }

    public static List<HoodieKey> fetchRecordKeyPartitionPathFromParquet(Configuration configuration, Path path) {
        ArrayList arrayList = new ArrayList();
        try {
            if (!path.getFileSystem(configuration).exists(path)) {
                return new ArrayList();
            }
            Configuration configuration2 = new Configuration(configuration);
            configuration2.addResource(FSUtils.getFs(path.toString(), configuration2).getConf());
            Schema recordKeyPartitionPathSchema = HoodieAvroUtils.getRecordKeyPartitionPathSchema();
            AvroReadSupport.setAvroReadSchema(configuration2, recordKeyPartitionPathSchema);
            AvroReadSupport.setRequestedProjection(configuration2, recordKeyPartitionPathSchema);
            ParquetReader build = AvroParquetReader.builder(path).withConf(configuration2).build();
            Object read = build.read();
            while (read != null) {
                if (read instanceof GenericRecord) {
                    arrayList.add(new HoodieKey(((GenericRecord) read).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(), ((GenericRecord) read).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString()));
                    read = build.read();
                }
            }
            return arrayList;
        } catch (IOException e) {
            throw new HoodieIOException("Failed to read from Parquet file " + path, e);
        }
    }

    public static ParquetMetadata readMetadata(Configuration configuration, Path path) {
        try {
            return ParquetFileReader.readFooter(FSUtils.getFs(path.toString(), configuration).getConf(), path);
        } catch (IOException e) {
            throw new HoodieIOException("Failed to read footer for parquet " + path, e);
        }
    }

    public static MessageType readSchema(Configuration configuration, Path path) {
        return readMetadata(configuration, path).getFileMetaData().getSchema();
    }

    private static Map<String, String> readParquetFooter(Configuration configuration, boolean z, Path path, String... strArr) {
        HashMap hashMap = new HashMap();
        Map keyValueMetaData = readMetadata(configuration, path).getFileMetaData().getKeyValueMetaData();
        for (String str : strArr) {
            if (keyValueMetaData.containsKey(str)) {
                hashMap.put(str, keyValueMetaData.get(str));
            } else if (z) {
                throw new MetadataNotFoundException("Could not find index in Parquet footer. Looked for key " + str + " in " + path);
            }
        }
        return hashMap;
    }

    public static Schema readAvroSchema(Configuration configuration, Path path) {
        return new AvroSchemaConverter(configuration).convert(readSchema(configuration, path));
    }

    public static BloomFilter readBloomFilterFromParquetMetadata(Configuration configuration, Path path) {
        Map<String, String> readParquetFooter = readParquetFooter(configuration, false, path, HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, HoodieAvroWriteSupport.OLD_HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, HoodieAvroWriteSupport.HOODIE_BLOOM_FILTER_TYPE_CODE);
        String str = readParquetFooter.get(HoodieAvroWriteSupport.HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY);
        if (null == str) {
            str = readParquetFooter.get(HoodieAvroWriteSupport.OLD_HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY);
        }
        BloomFilter bloomFilter = null;
        if (str != null) {
            bloomFilter = readParquetFooter.containsKey(HoodieAvroWriteSupport.HOODIE_BLOOM_FILTER_TYPE_CODE) ? BloomFilterFactory.fromString(str, readParquetFooter.get(HoodieAvroWriteSupport.HOODIE_BLOOM_FILTER_TYPE_CODE)) : BloomFilterFactory.fromString(str, BloomFilterTypeCode.SIMPLE.name());
        }
        return bloomFilter;
    }

    public static String[] readMinMaxRecordKeys(Configuration configuration, Path path) {
        Map<String, String> readParquetFooter = readParquetFooter(configuration, true, path, HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER, HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER);
        if (readParquetFooter.size() != 2) {
            throw new HoodieException(String.format("Could not read min/max record key out of footer correctly from %s. read) : %s", path, readParquetFooter));
        }
        return new String[]{readParquetFooter.get(HoodieAvroWriteSupport.HOODIE_MIN_RECORD_KEY_FOOTER), readParquetFooter.get(HoodieAvroWriteSupport.HOODIE_MAX_RECORD_KEY_FOOTER)};
    }

    public static List<GenericRecord> readAvroRecords(Configuration configuration, Path path) {
        ParquetReader parquetReader = null;
        ArrayList arrayList = new ArrayList();
        try {
            try {
                parquetReader = AvroParquetReader.builder(path).withConf(configuration).build();
                for (Object read = parquetReader.read(); read != null; read = parquetReader.read()) {
                    if (read instanceof GenericRecord) {
                        arrayList.add((GenericRecord) read);
                    }
                }
                if (parquetReader != null) {
                    try {
                        parquetReader.close();
                    } catch (IOException e) {
                    }
                }
                return arrayList;
            } catch (IOException e2) {
                throw new HoodieIOException("Failed to read avro records from Parquet " + path, e2);
            }
        } catch (Throwable th) {
            if (parquetReader != null) {
                try {
                    parquetReader.close();
                } catch (IOException e3) {
                }
            }
            throw th;
        }
    }

    public static long getRowCount(Configuration configuration, Path path) {
        long j = 0;
        Iterator it = readMetadata(configuration, path).getBlocks().iterator();
        while (it.hasNext()) {
            j += ((BlockMetaData) it.next()).getRowCount();
        }
        return j;
    }
}
