package org.apache.hudi.utilities.sources;

import java.util.Arrays;
import java.util.List;
import org.apache.http.cookie.ClientCookie;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.DFSPathSelector;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrameReader;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.avro.SchemaConverters;
import org.apache.spark.sql.types.StructType;

/* loaded from: input_file:org/apache/hudi/utilities/sources/CsvDFSSource.class */
public class CsvDFSSource extends RowSource {
    private static final long serialVersionUID = 1;
    protected static final String CSV_SRC_CONFIG_PREFIX = "hoodie.deltastreamer.csv.";
    protected static final List<String> CSV_CONFIG_KEYS = Arrays.asList("sep", "encoding", "quote", "escape", "charToEscapeQuoteEscaping", ClientCookie.COMMENT_ATTR, "header", "enforceSchema", "inferSchema", "samplingRatio", "ignoreLeadingWhiteSpace", "ignoreTrailingWhiteSpace", "nullValue", "emptyValue", "nanValue", "positiveInf", "negativeInf", "dateFormat", "timestampFormat", "maxColumns", "maxCharsPerColumn", "mode", "columnNameOfCorruptRecord", "multiLine");
    private final transient DFSPathSelector pathSelector;
    private final StructType sourceSchema;

    public CsvDFSSource(TypedProperties typedProperties, JavaSparkContext javaSparkContext, SparkSession sparkSession, SchemaProvider schemaProvider) {
        super(typedProperties, javaSparkContext, sparkSession, schemaProvider);
        this.pathSelector = DFSPathSelector.createSourceSelector(typedProperties, javaSparkContext.hadoopConfiguration());
        if (schemaProvider != null) {
            this.sourceSchema = SchemaConverters.toSqlType(schemaProvider.getSourceSchema()).dataType();
        } else {
            this.sourceSchema = null;
        }
    }

    @Override // org.apache.hudi.utilities.sources.RowSource
    protected Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> option, long j) {
        Pair<Option<String>, String> nextFilePathsAndMaxModificationTime = this.pathSelector.getNextFilePathsAndMaxModificationTime(this.sparkContext, option, j);
        return Pair.of(fromFiles(nextFilePathsAndMaxModificationTime.getLeft()), nextFilePathsAndMaxModificationTime.getRight());
    }

    private Option<Dataset<Row>> fromFiles(Option<String> option) {
        if (!option.isPresent()) {
            return Option.empty();
        }
        DataFrameReader format = this.sparkSession.read().format("csv");
        CSV_CONFIG_KEYS.forEach(str -> {
            String string = this.props.getString(CSV_SRC_CONFIG_PREFIX + str, null);
            if (string != null) {
                format.option(str, string);
            }
        });
        if (this.sourceSchema != null) {
            format.schema(this.sourceSchema);
        }
        format.option("inferSchema", Boolean.toString(this.sourceSchema == null));
        return Option.of(format.load(option.get().split(",")));
    }
}
