package org.apache.hudi.table.action.commit;

import java.lang.invoke.SerializedLambda;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieFileGroupId;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecordLocation;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.util.NumericUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.WorkloadProfile;
import org.apache.hudi.table.WorkloadStat;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.Partitioner;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;

/* loaded from: input_file:org/apache/hudi/table/action/commit/UpsertPartitioner.class */
public class UpsertPartitioner<T extends HoodieRecordPayload<T>> extends Partitioner {
    private static final Logger LOG = LogManager.getLogger(UpsertPartitioner.class);
    private WorkloadProfile profile;
    protected final HoodieTable table;
    protected final HoodieWriteConfig config;
    protected List<SmallFile> smallFiles = new ArrayList();
    private int totalBuckets = 0;
    private HashMap<String, Integer> updateLocationToBucket = new HashMap<>();
    private HashMap<String, List<InsertBucketCumulativeWeightPair>> partitionPathToInsertBucketInfos = new HashMap<>();
    private HashMap<Integer, BucketInfo> bucketInfoMap = new HashMap<>();

    public UpsertPartitioner(WorkloadProfile workloadProfile, HoodieEngineContext hoodieEngineContext, HoodieTable hoodieTable, HoodieWriteConfig hoodieWriteConfig) {
        this.profile = workloadProfile;
        this.table = hoodieTable;
        this.config = hoodieWriteConfig;
        assignUpdates(workloadProfile);
        assignInserts(workloadProfile, hoodieEngineContext);
        LOG.info("Total Buckets :" + this.totalBuckets + ", buckets info => " + this.bucketInfoMap + ", \nPartition to insert buckets => " + this.partitionPathToInsertBucketInfos + ", \nUpdateLocations mapped to buckets =>" + this.updateLocationToBucket);
    }

    private void assignUpdates(WorkloadProfile workloadProfile) {
        for (Map.Entry entry : workloadProfile.getPartitionPathStatMap().entrySet()) {
            Iterator it = ((WorkloadStat) entry.getValue()).getUpdateLocationToCount().entrySet().iterator();
            while (it.hasNext()) {
                addUpdateBucket((String) entry.getKey(), (String) ((Map.Entry) it.next()).getKey());
            }
        }
    }

    private int addUpdateBucket(String str, String str2) {
        int i = this.totalBuckets;
        this.updateLocationToBucket.put(str2, Integer.valueOf(i));
        this.bucketInfoMap.put(Integer.valueOf(this.totalBuckets), new BucketInfo(BucketType.UPDATE, str2, str));
        this.totalBuckets++;
        return i;
    }

    private Map<String, Set<String>> getPartitionPathToPendingClusteringFileGroupsId() {
        return (Map) this.table.getFileSystemView().getFileGroupsInPendingClustering().map(pair -> {
            return Pair.of(((HoodieFileGroupId) pair.getKey()).getPartitionPath(), ((HoodieFileGroupId) pair.getKey()).getFileId());
        }).collect(Collectors.groupingBy((v0) -> {
            return v0.getKey();
        }, Collectors.mapping((v0) -> {
            return v0.getValue();
        }, Collectors.toSet())));
    }

    private List<SmallFile> filterSmallFilesInClustering(Set<String> set, List<SmallFile> list) {
        return !set.isEmpty() ? (List) list.stream().filter(smallFile -> {
            return !set.contains(smallFile.location.getFileId());
        }).collect(Collectors.toList()) : list;
    }

    private void assignInserts(WorkloadProfile workloadProfile, HoodieEngineContext hoodieEngineContext) {
        int addUpdateBucket;
        Set<String> partitionPaths = workloadProfile.getPartitionPaths();
        long averageBytesPerRecord = averageBytesPerRecord(this.table.getMetaClient().getActiveTimeline().getCommitTimeline().filterCompletedInstants(), this.config);
        LOG.info("AvgRecordSize => " + averageBytesPerRecord);
        Map<String, List<SmallFile>> smallFilesForPartitions = getSmallFilesForPartitions(new ArrayList(partitionPaths), hoodieEngineContext);
        Map<String, Set<String>> partitionPathToPendingClusteringFileGroupsId = getPartitionPathToPendingClusteringFileGroupsId();
        for (String str : partitionPaths) {
            WorkloadStat workloadStat = workloadProfile.getWorkloadStat(str);
            if (workloadStat.getNumInserts() > 0) {
                List<SmallFile> filterSmallFilesInClustering = filterSmallFilesInClustering(partitionPathToPendingClusteringFileGroupsId.getOrDefault(str, Collections.emptySet()), smallFilesForPartitions.get(str));
                this.smallFiles.addAll(filterSmallFilesInClustering);
                LOG.info("For partitionPath : " + str + " Small Files => " + filterSmallFilesInClustering);
                long numInserts = workloadStat.getNumInserts();
                ArrayList arrayList = new ArrayList();
                ArrayList arrayList2 = new ArrayList();
                for (SmallFile smallFile : filterSmallFilesInClustering) {
                    long min = Math.min((this.config.getParquetMaxFileSize() - smallFile.sizeBytes) / averageBytesPerRecord, numInserts);
                    if (min > 0) {
                        if (this.updateLocationToBucket.containsKey(smallFile.location.getFileId())) {
                            addUpdateBucket = this.updateLocationToBucket.get(smallFile.location.getFileId()).intValue();
                            LOG.info("Assigning " + min + " inserts to existing update bucket " + addUpdateBucket);
                        } else {
                            addUpdateBucket = addUpdateBucket(str, smallFile.location.getFileId());
                            LOG.info("Assigning " + min + " inserts to new update bucket " + addUpdateBucket);
                        }
                        arrayList.add(Integer.valueOf(addUpdateBucket));
                        arrayList2.add(Long.valueOf(min));
                        numInserts -= min;
                        if (numInserts <= 0) {
                            break;
                        }
                    }
                }
                if (numInserts > 0) {
                    long copyOnWriteInsertSplitSize = this.config.getCopyOnWriteInsertSplitSize();
                    if (this.config.shouldAutoTuneInsertSplits()) {
                        copyOnWriteInsertSplitSize = this.config.getParquetMaxFileSize() / averageBytesPerRecord;
                    }
                    int ceil = (int) Math.ceil((1.0d * numInserts) / copyOnWriteInsertSplitSize);
                    LOG.info("After small file assignment: unassignedInserts => " + numInserts + ", totalInsertBuckets => " + ceil + ", recordsPerBucket => " + copyOnWriteInsertSplitSize);
                    for (int i = 0; i < ceil; i++) {
                        arrayList.add(Integer.valueOf(this.totalBuckets));
                        if (i < ceil - 1) {
                            arrayList2.add(Long.valueOf(copyOnWriteInsertSplitSize));
                        } else {
                            arrayList2.add(Long.valueOf(numInserts - ((ceil - 1) * copyOnWriteInsertSplitSize)));
                        }
                        this.bucketInfoMap.put(Integer.valueOf(this.totalBuckets), new BucketInfo(BucketType.INSERT, FSUtils.createNewFileIdPfx(), str));
                        this.totalBuckets++;
                    }
                }
                ArrayList arrayList3 = new ArrayList();
                double d = 0.0d;
                for (int i2 = 0; i2 < arrayList.size(); i2++) {
                    InsertBucket insertBucket = new InsertBucket();
                    insertBucket.bucketNumber = ((Integer) arrayList.get(i2)).intValue();
                    insertBucket.weight = (1.0d * ((Long) arrayList2.get(i2)).longValue()) / workloadStat.getNumInserts();
                    d += insertBucket.weight;
                    arrayList3.add(new InsertBucketCumulativeWeightPair(insertBucket, Double.valueOf(d)));
                }
                LOG.info("Total insert buckets for partition path " + str + " => " + arrayList3);
                this.partitionPathToInsertBucketInfos.put(str, arrayList3);
            }
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v12, types: [java.util.Map] */
    private Map<String, List<SmallFile>> getSmallFilesForPartitions(List<String> list, HoodieEngineContext hoodieEngineContext) {
        JavaSparkContext sparkContext = HoodieSparkEngineContext.getSparkContext(hoodieEngineContext);
        HashMap hashMap = new HashMap();
        if (list != null && list.size() > 0) {
            hoodieEngineContext.setJobStatus(getClass().getSimpleName(), "Getting small files from partitions");
            hashMap = sparkContext.parallelize(list, list.size()).mapToPair(str -> {
                return new Tuple2(str, getSmallFiles(str));
            }).collectAsMap();
        }
        return hashMap;
    }

    protected List<SmallFile> getSmallFiles(String str) {
        ArrayList arrayList = new ArrayList();
        HoodieTimeline filterCompletedInstants = this.table.getMetaClient().getCommitsTimeline().filterCompletedInstants();
        if (!filterCompletedInstants.empty()) {
            for (HoodieBaseFile hoodieBaseFile : (List) this.table.getBaseFileOnlyView().getLatestBaseFilesBeforeOrOn(str, ((HoodieInstant) filterCompletedInstants.lastInstant().get()).getTimestamp()).collect(Collectors.toList())) {
                if (hoodieBaseFile.getFileSize() < this.config.getParquetSmallFileLimit()) {
                    String fileName = hoodieBaseFile.getFileName();
                    SmallFile smallFile = new SmallFile();
                    smallFile.location = new HoodieRecordLocation(FSUtils.getCommitTime(fileName), FSUtils.getFileId(fileName));
                    smallFile.sizeBytes = hoodieBaseFile.getFileSize();
                    arrayList.add(smallFile);
                }
            }
        }
        return arrayList;
    }

    public BucketInfo getBucketInfo(int i) {
        return this.bucketInfoMap.get(Integer.valueOf(i));
    }

    public List<InsertBucketCumulativeWeightPair> getInsertBuckets(String str) {
        return this.partitionPathToInsertBucketInfos.get(str);
    }

    public int numPartitions() {
        return this.totalBuckets;
    }

    public int getPartition(Object obj) {
        Tuple2 tuple2 = (Tuple2) obj;
        if (((Option) tuple2._2()).isPresent()) {
            return this.updateLocationToBucket.get(((HoodieRecordLocation) ((Option) tuple2._2()).get()).getFileId()).intValue();
        }
        String partitionPath = ((HoodieKey) tuple2._1()).getPartitionPath();
        List<InsertBucketCumulativeWeightPair> list = this.partitionPathToInsertBucketInfos.get(partitionPath);
        int binarySearch = Collections.binarySearch(list, new InsertBucketCumulativeWeightPair(new InsertBucket(), Double.valueOf((1.0d * Math.floorMod(NumericUtils.getMessageDigestHash("MD5", ((HoodieKey) tuple2._1()).getRecordKey()), r0)) / Math.max(1L, this.profile.getWorkloadStat(partitionPath).getNumInserts()))));
        return binarySearch >= 0 ? ((InsertBucket) list.get(binarySearch).getKey()).bucketNumber : ((-1) * binarySearch) - 1 < list.size() ? ((InsertBucket) list.get(((-1) * binarySearch) - 1).getKey()).bucketNumber : ((InsertBucket) list.get(0).getKey()).bucketNumber;
    }

    /* JADX WARN: Code restructure failed: missing block: B:13:0x0074, code lost:
    
        r7 = (long) java.lang.Math.ceil((1.0d * r0) / r0);
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    protected static long averageBytesPerRecord(org.apache.hudi.common.table.timeline.HoodieTimeline r5, org.apache.hudi.config.HoodieWriteConfig r6) {
        /*
            r0 = r6
            int r0 = r0.getCopyOnWriteRecordSizeEstimate()
            long r0 = (long) r0
            r7 = r0
            r0 = r6
            double r0 = r0.getRecordSizeEstimationThreshold()
            r1 = r6
            int r1 = r1.getParquetSmallFileLimit()
            double r1 = (double) r1
            double r0 = r0 * r1
            long r0 = (long) r0
            r9 = r0
            r0 = r5
            boolean r0 = r0.empty()     // Catch: java.lang.Throwable -> L8b
            if (r0 != 0) goto L88
            r0 = r5
            java.util.stream.Stream r0 = r0.getReverseOrderedInstants()     // Catch: java.lang.Throwable -> L8b
            java.util.Iterator r0 = r0.iterator()     // Catch: java.lang.Throwable -> L8b
            r11 = r0
        L29:
            r0 = r11
            boolean r0 = r0.hasNext()     // Catch: java.lang.Throwable -> L8b
            if (r0 == 0) goto L88
            r0 = r11
            java.lang.Object r0 = r0.next()     // Catch: java.lang.Throwable -> L8b
            org.apache.hudi.common.table.timeline.HoodieInstant r0 = (org.apache.hudi.common.table.timeline.HoodieInstant) r0     // Catch: java.lang.Throwable -> L8b
            r12 = r0
            r0 = r5
            r1 = r12
            org.apache.hudi.common.util.Option r0 = r0.getInstantDetails(r1)     // Catch: java.lang.Throwable -> L8b
            java.lang.Object r0 = r0.get()     // Catch: java.lang.Throwable -> L8b
            byte[] r0 = (byte[]) r0     // Catch: java.lang.Throwable -> L8b
            java.lang.Class<org.apache.hudi.common.model.HoodieCommitMetadata> r1 = org.apache.hudi.common.model.HoodieCommitMetadata.class
            java.lang.Object r0 = org.apache.hudi.common.model.HoodieCommitMetadata.fromBytes(r0, r1)     // Catch: java.lang.Throwable -> L8b
            org.apache.hudi.common.model.HoodieCommitMetadata r0 = (org.apache.hudi.common.model.HoodieCommitMetadata) r0     // Catch: java.lang.Throwable -> L8b
            r13 = r0
            r0 = r13
            long r0 = r0.fetchTotalBytesWritten()     // Catch: java.lang.Throwable -> L8b
            r14 = r0
            r0 = r13
            long r0 = r0.fetchTotalRecordsWritten()     // Catch: java.lang.Throwable -> L8b
            r16 = r0
            r0 = r14
            r1 = r9
            int r0 = (r0 > r1 ? 1 : (r0 == r1 ? 0 : -1))
            if (r0 <= 0) goto L85
            r0 = r16
            r1 = 0
            int r0 = (r0 > r1 ? 1 : (r0 == r1 ? 0 : -1))
            if (r0 <= 0) goto L85
            r0 = 4607182418800017408(0x3ff0000000000000, double:1.0)
            r1 = r14
            double r1 = (double) r1     // Catch: java.lang.Throwable -> L8b
            double r0 = r0 * r1
            r1 = r16
            double r1 = (double) r1     // Catch: java.lang.Throwable -> L8b
            double r0 = r0 / r1
            double r0 = java.lang.Math.ceil(r0)     // Catch: java.lang.Throwable -> L8b
            long r0 = (long) r0     // Catch: java.lang.Throwable -> L8b
            r7 = r0
            goto L88
        L85:
            goto L29
        L88:
            goto L97
        L8b:
            r11 = move-exception
            org.apache.log4j.Logger r0 = org.apache.hudi.table.action.commit.UpsertPartitioner.LOG
            java.lang.String r1 = "Error trying to compute average bytes/record "
            r2 = r11
            r0.error(r1, r2)
        L97:
            r0 = r7
            return r0
        */
        throw new UnsupportedOperationException("Method not decompiled: org.apache.hudi.table.action.commit.UpsertPartitioner.averageBytesPerRecord(org.apache.hudi.common.table.timeline.HoodieTimeline, org.apache.hudi.config.HoodieWriteConfig):long");
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -2006600824:
                if (implMethodName.equals("lambda$getSmallFilesForPartitions$f1d92f9e$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 7 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("org/apache/hudi/table/action/commit/UpsertPartitioner") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;)Lscala/Tuple2;")) {
                    UpsertPartitioner upsertPartitioner = (UpsertPartitioner) serializedLambda.getCapturedArg(0);
                    return str -> {
                        return new Tuple2(str, getSmallFiles(str));
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
