package org.kitesdk.data.spi.filesystem;

import au.com.bytecode.opencsv.CSVParser;
import au.com.bytecode.opencsv.CSVReader;
import au.com.bytecode.opencsv.CSVWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.Set;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.file.DataFileConstants;
import org.kitesdk.data.DatasetException;
import org.kitesdk.data.spi.Compatibility;
import org.kitesdk.shaded.com.google.common.annotations.VisibleForTesting;
import org.kitesdk.shaded.com.google.common.base.CharMatcher;
import org.kitesdk.shaded.com.google.common.base.Preconditions;
import org.kitesdk.shaded.com.google.common.collect.ImmutableSet;
import org.kitesdk.shaded.com.google.common.collect.Lists;

/* loaded from: input_file:org/kitesdk/data/spi/filesystem/CSVUtil.class */
public class CSVUtil {
    private static final int DEFAULT_INFER_LINES = 25;
    private static final Pattern LONG = Pattern.compile("\\d+");
    private static final Pattern DOUBLE = Pattern.compile("\\d*\\.\\d*[dD]?");
    private static final Pattern FLOAT = Pattern.compile("\\d*\\.\\d*[fF]?");
    private static final Set<String> NO_REQUIRED_FIELDS = ImmutableSet.of();
    private static final CharMatcher NON_PRINTABLE = CharMatcher.inRange(' ', '~').negate();

    public static CSVParser newParser(CSVProperties cSVProperties) {
        return new CSVParser(cSVProperties.delimiter.charAt(0), cSVProperties.quote.charAt(0), cSVProperties.escape.charAt(0), false, true);
    }

    public static CSVReader newReader(InputStream inputStream, CSVProperties cSVProperties) {
        return new CSVReader(new InputStreamReader(inputStream, Charset.forName(cSVProperties.charset)), cSVProperties.delimiter.charAt(0), cSVProperties.quote.charAt(0), cSVProperties.escape.charAt(0), cSVProperties.linesToSkip, false, true);
    }

    public static CSVWriter newWriter(OutputStream outputStream, CSVProperties cSVProperties) {
        return new CSVWriter(new OutputStreamWriter(outputStream, Charset.forName(cSVProperties.charset)), cSVProperties.delimiter.charAt(0), cSVProperties.quote.charAt(0), cSVProperties.escape.charAt(0));
    }

    public static Schema inferNullableSchema(String str, InputStream inputStream, CSVProperties cSVProperties) throws IOException {
        return inferSchemaInternal(str, inputStream, cSVProperties, NO_REQUIRED_FIELDS, true);
    }

    public static Schema inferNullableSchema(String str, InputStream inputStream, CSVProperties cSVProperties, Set<String> set) throws IOException {
        return inferSchemaInternal(str, inputStream, cSVProperties, set, true);
    }

    public static Schema inferSchema(String str, InputStream inputStream, CSVProperties cSVProperties) throws IOException {
        return inferSchemaInternal(str, inputStream, cSVProperties, NO_REQUIRED_FIELDS, false);
    }

    public static Schema inferSchema(String str, InputStream inputStream, CSVProperties cSVProperties, Set<String> set) throws IOException {
        return inferSchemaInternal(str, inputStream, cSVProperties, set, false);
    }

    /* JADX WARN: Multi-variable type inference failed */
    private static Schema inferSchemaInternal(String str, InputStream inputStream, CSVProperties cSVProperties, Set<String> set, boolean z) throws IOException {
        String[] readNext;
        String[] strArr;
        SchemaBuilder.FieldAssembler withDefault;
        CSVReader newReader = newReader(inputStream, cSVProperties);
        if (cSVProperties.useHeader) {
            strArr = newReader.readNext();
            readNext = newReader.readNext();
            Preconditions.checkNotNull(readNext, "No content to infer schema");
        } else if (cSVProperties.header != null) {
            strArr = newParser(cSVProperties).parseLine(cSVProperties.header);
            readNext = newReader.readNext();
            Preconditions.checkNotNull(readNext, "No content to infer schema");
        } else {
            readNext = newReader.readNext();
            Preconditions.checkNotNull(readNext, "No content to infer schema");
            strArr = new String[readNext.length];
            for (int i = 0; i < readNext.length; i++) {
                strArr[i] = "field_" + String.valueOf(i);
            }
        }
        Schema.Type[] typeArr = new Schema.Type[strArr.length];
        String[] strArr2 = new String[strArr.length];
        boolean[] zArr = new boolean[strArr.length];
        boolean[] zArr2 = new boolean[strArr.length];
        for (int i2 = 0; i2 < 25 && readNext != null; i2++) {
            for (int i3 = 0; i3 < strArr.length; i3++) {
                if (i3 < readNext.length) {
                    if (typeArr[i3] == null) {
                        typeArr[i3] = inferFieldType(readNext[i3]);
                        if (typeArr[i3] != null) {
                            strArr2[i3] = readNext[i3];
                        }
                    }
                    if (readNext[i3] == null) {
                        zArr[i3] = true;
                    } else if (readNext[i3].isEmpty()) {
                        zArr2[i3] = true;
                    }
                } else {
                    zArr[i3] = true;
                }
            }
            readNext = newReader.readNext();
        }
        SchemaBuilder.FieldAssembler fields = ((SchemaBuilder.RecordBuilder) SchemaBuilder.record(str).doc("Schema generated by Kite")).fields();
        for (int i4 = 0; i4 < strArr.length; i4++) {
            if (strArr[i4] == null) {
                throw new DatasetException("Bad header for field " + i4 + ": null");
            }
            String trim = strArr[i4].trim();
            if (trim.isEmpty()) {
                throw new DatasetException("Bad header for field " + i4 + ": \"" + trim + CSVProperties.DEFAULT_QUOTE);
            }
            if (!Compatibility.isAvroCompatibleName(trim)) {
                throw new DatasetException("Bad header for field, should start with a character or _ and can contain only alphanumerics and _ " + i4 + ": \"" + trim + CSVProperties.DEFAULT_QUOTE);
            }
            boolean z2 = zArr[i4] || (zArr2[i4] && typeArr[i4] != Schema.Type.STRING);
            if (!set.contains(trim)) {
                SchemaBuilder.GenericDefault type = fields.name(trim).doc("Type inferred from '" + sample(strArr2[i4]) + "'").type(schema(typeArr[i4], z || z2));
                withDefault = (z || z2) ? type.withDefault(null) : type.noDefault();
            } else {
                if (z2) {
                    throw new DatasetException("Found null value for required field: " + trim + " (" + typeArr[i4] + ")");
                }
                withDefault = fields.name(trim).doc("Type inferred from '" + sample(strArr2[i4]) + "'").type(schema(typeArr[i4], false)).noDefault();
            }
            fields = withDefault;
        }
        return (Schema) fields.endRecord();
    }

    @VisibleForTesting
    static String sample(@Nullable String str) {
        return str != null ? NON_PRINTABLE.replaceFrom(str.subSequence(0, Math.min(50, str.length())), '.') : DataFileConstants.NULL_CODEC;
    }

    private static Schema schema(Schema.Type type, boolean z) {
        Schema create = Schema.create(type == null ? Schema.Type.STRING : type);
        if (z || type == null) {
            create = Schema.createUnion(Lists.newArrayList(Schema.create(Schema.Type.NULL), create));
        }
        return create;
    }

    private static Schema.Type inferFieldType(String str) {
        if (str == null || str.isEmpty()) {
            return null;
        }
        return LONG.matcher(str).matches() ? Schema.Type.LONG : DOUBLE.matcher(str).matches() ? Schema.Type.DOUBLE : FLOAT.matcher(str).matches() ? Schema.Type.FLOAT : Schema.Type.STRING;
    }
}
