package org.apache.hudi.utilities.sources.helpers;

import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.lang.invoke.SerializedLambda;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.AvroConversionUtils;
import org.apache.hudi.common.config.ConfigProperty;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.ConfigUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.hadoop.fs.HadoopFSUtils;
import org.apache.hudi.storage.StorageConfiguration;
import org.apache.hudi.utilities.config.CloudSourceConfig;
import org.apache.hudi.utilities.config.S3EventsHoodieIncrSourceConfig;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.InputBatch;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.MapPartitionsFunction;
import org.apache.spark.sql.DataFrameReader;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon.class */
public class CloudObjectsSelectorCommon {
    private static final Logger LOG = LoggerFactory.getLogger(CloudObjectsSelectorCommon.class);
    public static final String S3_OBJECT_KEY = "s3.object.key";
    public static final String S3_OBJECT_SIZE = "s3.object.size";
    public static final String S3_BUCKET_NAME = "s3.bucket.name";
    public static final String GCS_OBJECT_KEY = "name";
    public static final String GCS_OBJECT_SIZE = "size";
    private static final String SPACE_DELIMTER = " ";
    private static final String GCS_PREFIX = "gs://";
    private final TypedProperties properties;

    /* loaded from: input_file:org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon$Type.class */
    public enum Type {
        S3,
        GCS
    }

    public CloudObjectsSelectorCommon(TypedProperties typedProperties) {
        this.properties = typedProperties;
    }

    public static MapPartitionsFunction<Row, CloudObjectMetadata> getCloudObjectMetadataPerPartition(String str, StorageConfiguration<Configuration> storageConfiguration, boolean z) {
        return it -> {
            ArrayList arrayList = new ArrayList();
            it.forEachRemaining(row -> {
                getUrlForFile(row, str, storageConfiguration, z).ifPresent(str2 -> {
                    long longValue;
                    LOG.info("Adding file: " + str2);
                    Object obj = row.get(2);
                    if (obj instanceof String) {
                        longValue = Long.parseLong((String) obj);
                    } else if (obj instanceof Integer) {
                        longValue = ((Integer) obj).longValue();
                    } else {
                        if (!(obj instanceof Long)) {
                            throw new HoodieIOException("unexpected object size's type in Cloud storage events: " + obj.getClass());
                        }
                        longValue = ((Long) obj).longValue();
                    }
                    arrayList.add(new CloudObjectMetadata(str2, longValue));
                });
            });
            return arrayList.iterator();
        };
    }

    private static Option<String> getUrlForFile(Row row, String str, StorageConfiguration<Configuration> storageConfiguration, boolean z) {
        Configuration unwrapCopy = storageConfiguration.unwrapCopy();
        String string = row.getString(0);
        String str2 = str + string + "/" + row.getString(1);
        try {
            String decode = URLDecoder.decode(str2, StandardCharsets.UTF_8.name());
            if (z && !checkIfFileExists(str, string, decode, unwrapCopy)) {
                return Option.empty();
            }
            return Option.of(decode);
        } catch (Exception e) {
            LOG.warn(String.format("Failed to generate path to cloud file %s", str2), e);
            throw new HoodieException(String.format("Failed to generate path to cloud file %s", str2), e);
        }
    }

    private static boolean checkIfFileExists(String str, String str2, String str3, Configuration configuration) {
        try {
            return HadoopFSUtils.getFs(str + str2, configuration).exists(new Path(str3));
        } catch (IOException e) {
            String format = String.format("Error while checking path exists for %s ", str3);
            LOG.error(format, e);
            throw new HoodieIOException(format, e);
        }
    }

    public static String generateFilter(Type type, TypedProperties typedProperties) {
        String str;
        Object obj;
        String fileFormat = CloudDataFetcher.getFileFormat(typedProperties);
        Option<String> propVal = getPropVal(typedProperties, CloudSourceConfig.SELECT_RELATIVE_PATH_PREFIX);
        Option<String> propVal2 = getPropVal(typedProperties, CloudSourceConfig.IGNORE_RELATIVE_PATH_PREFIX);
        Option<String> propVal3 = getPropVal(typedProperties, CloudSourceConfig.IGNORE_RELATIVE_PATH_SUBSTR);
        if (type.equals(Type.S3)) {
            str = S3_OBJECT_KEY;
            obj = S3_OBJECT_SIZE;
            propVal = propVal.or(() -> {
                return getPropVal(typedProperties, S3EventsHoodieIncrSourceConfig.S3_KEY_PREFIX);
            });
            propVal2 = propVal2.or(() -> {
                return getPropVal(typedProperties, S3EventsHoodieIncrSourceConfig.S3_IGNORE_KEY_PREFIX);
            });
            propVal3 = propVal3.or(() -> {
                return getPropVal(typedProperties, S3EventsHoodieIncrSourceConfig.S3_IGNORE_KEY_SUBSTRING);
            });
        } else {
            str = "name";
            obj = "size";
        }
        StringBuilder sb = new StringBuilder(String.format("%s > 0", obj));
        if (propVal.isPresent()) {
            sb.append(" ").append(String.format("and %s like '%s%%'", str, propVal.get()));
        }
        if (propVal2.isPresent()) {
            sb.append(" ").append(String.format("and %s not like '%s%%'", str, propVal2.get()));
        }
        if (propVal3.isPresent()) {
            sb.append(" ").append(String.format("and %s not like '%%%s%%'", str, propVal3.get()));
        }
        String str2 = str;
        getPropVal(typedProperties, CloudSourceConfig.CLOUD_DATAFILE_EXTENSION).or(() -> {
            return Option.of(fileFormat);
        }).map(str3 -> {
            return sb.append(" ").append(String.format("and %s like '%%%s'", str2, str3));
        });
        return sb.toString();
    }

    public static List<CloudObjectMetadata> getObjectMetadata(Type type, JavaSparkContext javaSparkContext, Dataset<Row> dataset, boolean z, TypedProperties typedProperties) {
        StorageConfiguration<Configuration> storageConfWithCopy = HadoopFSUtils.getStorageConfWithCopy(javaSparkContext.hadoopConfiguration());
        if (type == Type.GCS) {
            return dataset.select("bucket", new String[]{"name", "size"}).distinct().mapPartitions(getCloudObjectMetadataPerPartition(GCS_PREFIX, storageConfWithCopy, z), Encoders.kryo(CloudObjectMetadata.class)).collectAsList();
        }
        if (type != Type.S3) {
            throw new UnsupportedOperationException("Invalid cloud type " + type);
        }
        return dataset.select(S3_BUCKET_NAME, new String[]{S3_OBJECT_KEY, S3_OBJECT_SIZE}).distinct().mapPartitions(getCloudObjectMetadataPerPartition(ConfigUtils.getStringWithAltKeys(typedProperties, S3EventsHoodieIncrSourceConfig.S3_FS_PREFIX, true).toLowerCase() + "://", storageConfWithCopy, z), Encoders.kryo(CloudObjectMetadata.class)).collectAsList();
    }

    public Option<Dataset<Row>> loadAsDataset(SparkSession sparkSession, List<CloudObjectMetadata> list, String str, Option<SchemaProvider> option, int i) {
        Schema sourceSchema;
        if (LOG.isDebugEnabled()) {
            LOG.debug("Extracted distinct files " + list.size() + " and some samples " + list.stream().map((v0) -> {
                return v0.getPath();
            }).limit(10L).collect(Collectors.toList()));
        }
        if (CollectionUtils.isNullOrEmpty(list)) {
            return Option.empty();
        }
        DataFrameReader format = sparkSession.read().format(str);
        String stringWithAltKeys = ConfigUtils.getStringWithAltKeys(this.properties, CloudSourceConfig.SPARK_DATASOURCE_OPTIONS, true);
        if (option.isPresent() && (sourceSchema = option.get().getSourceSchema()) != null && !sourceSchema.equals(InputBatch.NULL_SCHEMA)) {
            format = format.schema(AvroConversionUtils.convertAvroSchemaToStructType(sourceSchema));
        }
        if (StringUtils.isNullOrEmpty(stringWithAltKeys)) {
            stringWithAltKeys = ConfigUtils.getStringWithAltKeys(this.properties, S3EventsHoodieIncrSourceConfig.SPARK_DATASOURCE_OPTIONS, true);
        }
        if (StringUtils.nonEmpty(stringWithAltKeys)) {
            try {
                Map map = (Map) new ObjectMapper().readValue(stringWithAltKeys, Map.class);
                LOG.info(String.format("sparkOptions loaded: %s", map));
                format = format.options(map);
            } catch (IOException e) {
                throw new HoodieException(String.format("Failed to parse sparkOptions: %s", stringWithAltKeys), e);
            }
        }
        ArrayList arrayList = new ArrayList();
        Iterator<CloudObjectMetadata> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getPath());
        }
        Dataset load = this.properties.getBoolean(CloudSourceConfig.SPARK_DATASOURCE_READER_COMMA_SEPARATED_PATH_FORMAT.key(), false) ? format.load(String.join(",", arrayList)) : format.load((String[]) arrayList.toArray(new String[list.size()]));
        if (ConfigUtils.containsConfigProperty(this.properties, (ConfigProperty<?>) CloudSourceConfig.PATH_BASED_PARTITION_FIELDS)) {
            for (String str2 : ConfigUtils.getStringWithAltKeys(this.properties, CloudSourceConfig.PATH_BASED_PARTITION_FIELDS).split(",")) {
                String format2 = String.format("%s=", str2);
                LOG.info(String.format("Adding column %s to dataset", str2));
                load = load.withColumn(str2, functions.split(functions.split(functions.input_file_name(), format2).getItem(1), "/").getItem(0));
            }
        }
        return Option.of(coalesceOrRepartition(load, i));
    }

    private static Dataset<Row> coalesceOrRepartition(Dataset dataset, int i) {
        int numPartitions = dataset.rdd().getNumPartitions();
        LOG.info(String.format("existing number of partitions=%d, required number of partitions=%d", Integer.valueOf(numPartitions), Integer.valueOf(i)));
        return numPartitions < i ? dataset.repartition(i) : dataset.coalesce(i);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static Option<String> getPropVal(TypedProperties typedProperties, ConfigProperty<String> configProperty) {
        String stringWithAltKeys = ConfigUtils.getStringWithAltKeys(typedProperties, configProperty, true);
        return !StringUtils.isNullOrEmpty(stringWithAltKeys) ? Option.of(stringWithAltKeys) : Option.empty();
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case 400602188:
                if (implMethodName.equals("lambda$getCloudObjectMetadataPerPartition$c528e10$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/MapPartitionsFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/util/Iterator;)Ljava/util/Iterator;") && serializedLambda.getImplClass().equals("org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;Lorg/apache/hudi/storage/StorageConfiguration;ZLjava/util/Iterator;)Ljava/util/Iterator;")) {
                    String str = (String) serializedLambda.getCapturedArg(0);
                    StorageConfiguration storageConfiguration = (StorageConfiguration) serializedLambda.getCapturedArg(1);
                    boolean booleanValue = ((Boolean) serializedLambda.getCapturedArg(2)).booleanValue();
                    return it -> {
                        List arrayList = new ArrayList();
                        it.forEachRemaining(row -> {
                            getUrlForFile(row, str, storageConfiguration, booleanValue).ifPresent(str2 -> {
                                long longValue;
                                LOG.info("Adding file: " + str2);
                                Object obj = row.get(2);
                                if (obj instanceof String) {
                                    longValue = Long.parseLong((String) obj);
                                } else if (obj instanceof Integer) {
                                    longValue = ((Integer) obj).longValue();
                                } else {
                                    if (!(obj instanceof Long)) {
                                        throw new HoodieIOException("unexpected object size's type in Cloud storage events: " + obj.getClass());
                                    }
                                    longValue = ((Long) obj).longValue();
                                }
                                arrayList.add(new CloudObjectMetadata(str2, longValue));
                            });
                        });
                        return arrayList.iterator();
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
