package org.apache.hudi.org.apache.parquet.hadoop.util;

import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hudi.org.apache.parquet.column.ColumnDescriptor;
import org.apache.hudi.org.apache.parquet.column.ColumnReader;
import org.apache.hudi.org.apache.parquet.column.ColumnWriteStore;
import org.apache.hudi.org.apache.parquet.column.ColumnWriter;
import org.apache.hudi.org.apache.parquet.column.ParquetProperties;
import org.apache.hudi.org.apache.parquet.column.impl.ColumnReadStoreImpl;
import org.apache.hudi.org.apache.parquet.column.page.PageReadStore;
import org.apache.hudi.org.apache.parquet.hadoop.CodecFactory;
import org.apache.hudi.org.apache.parquet.hadoop.ColumnChunkPageWriteStore;
import org.apache.hudi.org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.hudi.org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.hudi.org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.hudi.org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.hudi.org.apache.parquet.hadoop.util.CompressionConverter;
import org.apache.hudi.org.apache.parquet.io.api.Converter;
import org.apache.hudi.org.apache.parquet.io.api.GroupConverter;
import org.apache.hudi.org.apache.parquet.io.api.PrimitiveConverter;
import org.apache.hudi.org.apache.parquet.schema.GroupType;
import org.apache.hudi.org.apache.parquet.schema.MessageType;
import org.apache.hudi.org.apache.parquet.schema.Type;

/* loaded from: input_file:org/apache/hudi/org/apache/parquet/hadoop/util/ColumnMasker.class */
public class ColumnMasker {

    /* loaded from: input_file:org/apache/hudi/org/apache/parquet/hadoop/util/ColumnMasker$DummyConverter.class */
    private static final class DummyConverter extends PrimitiveConverter {
        private DummyConverter() {
        }

        @Override // org.apache.hudi.org.apache.parquet.io.api.Converter
        public GroupConverter asGroupConverter() {
            return new DummyGroupConverter();
        }
    }

    /* loaded from: input_file:org/apache/hudi/org/apache/parquet/hadoop/util/ColumnMasker$DummyGroupConverter.class */
    private static final class DummyGroupConverter extends GroupConverter {
        private DummyGroupConverter() {
        }

        @Override // org.apache.hudi.org.apache.parquet.io.api.GroupConverter
        public void start() {
        }

        @Override // org.apache.hudi.org.apache.parquet.io.api.GroupConverter
        public void end() {
        }

        @Override // org.apache.hudi.org.apache.parquet.io.api.GroupConverter
        public Converter getConverter(int i) {
            return new DummyConverter();
        }
    }

    /* loaded from: input_file:org/apache/hudi/org/apache/parquet/hadoop/util/ColumnMasker$MaskMode.class */
    public enum MaskMode {
        NULLIFY("nullify"),
        HASH("hash"),
        REDACT("redact");

        private String mode;

        MaskMode(String str) {
            this.mode = str;
        }

        public String getMode() {
            return this.mode;
        }

        public static MaskMode fromString(String str) {
            for (MaskMode maskMode : values()) {
                if (maskMode.mode.equalsIgnoreCase(str)) {
                    return maskMode;
                }
            }
            return null;
        }
    }

    public void processBlocks(CompressionConverter.TransParquetFileReader transParquetFileReader, ParquetFileWriter parquetFileWriter, ParquetMetadata parquetMetadata, MessageType messageType, List<String> list, MaskMode maskMode) throws IOException {
        Set<ColumnPath> convertToColumnPaths = convertToColumnPaths(list);
        int i = 0;
        PageReadStore readNextRowGroup = transParquetFileReader.readNextRowGroup();
        while (readNextRowGroup != null) {
            parquetFileWriter.startBlock(readNextRowGroup.getRowCount());
            List<ColumnChunkMetaData> columns = parquetMetadata.getBlocks().get(i).getColumns();
            Map map = (Map) messageType.getColumns().stream().collect(Collectors.toMap(columnDescriptor -> {
                return ColumnPath.get(columnDescriptor.getPath());
            }, columnDescriptor2 -> {
                return columnDescriptor2;
            }));
            ColumnReadStoreImpl columnReadStoreImpl = new ColumnReadStoreImpl(readNextRowGroup, new DummyGroupConverter(), messageType, parquetMetadata.getFileMetaData().getCreatedBy());
            for (int i2 = 0; i2 < columns.size(); i2++) {
                ColumnChunkMetaData columnChunkMetaData = columns.get(i2);
                processChunk((ColumnDescriptor) map.get(columnChunkMetaData.getPath()), columnChunkMetaData, columnReadStoreImpl, transParquetFileReader, parquetFileWriter, messageType, convertToColumnPaths, maskMode);
            }
            parquetFileWriter.endBlock();
            readNextRowGroup = transParquetFileReader.readNextRowGroup();
            i++;
        }
    }

    private void processChunk(ColumnDescriptor columnDescriptor, ColumnChunkMetaData columnChunkMetaData, ColumnReadStoreImpl columnReadStoreImpl, CompressionConverter.TransParquetFileReader transParquetFileReader, ParquetFileWriter parquetFileWriter, MessageType messageType, Set<ColumnPath> set, MaskMode maskMode) throws IOException {
        transParquetFileReader.setStreamPosition(columnChunkMetaData.getStartingPos());
        if (!set.contains(columnChunkMetaData.getPath())) {
            parquetFileWriter.appendColumnChunk(columnDescriptor, transParquetFileReader.getStream(), columnChunkMetaData, transParquetFileReader.readBloomFilter(columnChunkMetaData), transParquetFileReader.readColumnIndex(columnChunkMetaData), transParquetFileReader.readOffsetIndex(columnChunkMetaData));
            return;
        }
        if (!maskMode.equals(MaskMode.NULLIFY)) {
            throw new UnsupportedOperationException("Only nullify is supported for now");
        }
        if (columnDescriptor.getPrimitiveType().getRepetition().equals(Type.Repetition.REQUIRED)) {
            throw new IOException("Required column [" + columnDescriptor.getPrimitiveType().getName() + "] cannot be nullified");
        }
        nullifyColumn(columnDescriptor, columnChunkMetaData, columnReadStoreImpl, parquetFileWriter, messageType);
    }

    private void nullifyColumn(ColumnDescriptor columnDescriptor, ColumnChunkMetaData columnChunkMetaData, ColumnReadStoreImpl columnReadStoreImpl, ParquetFileWriter parquetFileWriter, MessageType messageType) throws IOException {
        long valueCount = columnChunkMetaData.getValueCount();
        int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel();
        ColumnReader columnReader = columnReadStoreImpl.getColumnReader(columnDescriptor);
        ParquetProperties build = ParquetProperties.builder().withWriterVersion(columnChunkMetaData.getEncodingStats().usesV2Pages() ? ParquetProperties.WriterVersion.PARQUET_2_0 : ParquetProperties.WriterVersion.PARQUET_1_0).build();
        CodecFactory.BytesCompressor compressor = new CodecFactory(new Configuration(), build.getPageSizeThreshold()).getCompressor(columnChunkMetaData.getCodec());
        MessageType newSchema = newSchema(messageType, columnDescriptor);
        ColumnChunkPageWriteStore columnChunkPageWriteStore = new ColumnChunkPageWriteStore(compressor, newSchema, build.getAllocator(), build.getColumnIndexTruncateLength());
        ColumnWriteStore newColumnWriteStore = build.newColumnWriteStore(newSchema, columnChunkPageWriteStore);
        ColumnWriter columnWriter = newColumnWriteStore.getColumnWriter(columnDescriptor);
        for (int i = 0; i < valueCount; i++) {
            int currentRepetitionLevel = columnReader.getCurrentRepetitionLevel();
            int currentDefinitionLevel = columnReader.getCurrentDefinitionLevel();
            if (currentDefinitionLevel != maxDefinitionLevel) {
                columnWriter.writeNull(currentRepetitionLevel, currentDefinitionLevel);
            } else {
                if (currentDefinitionLevel == 0) {
                    throw new IOException("definition level is detected to be 0 for column " + columnChunkMetaData.getPath().toDotString() + " to be nullified");
                }
                if (currentRepetitionLevel == 0) {
                    columnWriter.writeNull(currentRepetitionLevel, currentDefinitionLevel - 1);
                }
            }
            newColumnWriteStore.endRecord();
        }
        newColumnWriteStore.flush();
        columnChunkPageWriteStore.flushToFileWriter(parquetFileWriter);
        newColumnWriteStore.close();
        columnWriter.close();
    }

    private MessageType newSchema(MessageType messageType, ColumnDescriptor columnDescriptor) {
        Type extractField;
        String[] path = columnDescriptor.getPath();
        Type type = messageType.getType(path);
        if (path.length == 1) {
            return new MessageType(messageType.getName(), type);
        }
        for (Type type2 : messageType.getFields()) {
            if (!type2.isPrimitive() && (extractField = extractField(type2.asGroupType(), type)) != null) {
                return new MessageType(messageType.getName(), extractField);
            }
        }
        throw new RuntimeException("No field is found");
    }

    private Type extractField(GroupType groupType, Type type) {
        if (type.equals((Object) groupType)) {
            return type;
        }
        for (Type type2 : groupType.asGroupType().getFields()) {
            if (!type2.isPrimitive()) {
                Type extractField = extractField(type2.asGroupType(), type);
                if (extractField != null) {
                    return extractField;
                }
            } else if (type2.equals((Object) type)) {
                return new GroupType(groupType.getRepetition(), groupType.getName(), type);
            }
        }
        return null;
    }

    public static Set<ColumnPath> convertToColumnPaths(List<String> list) {
        HashSet hashSet = new HashSet();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            hashSet.add(ColumnPath.fromDotString(it.next()));
        }
        return hashSet;
    }
}
