package org.apache.carbondata.spark.util;

import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier;
import org.apache.carbondata.core.datastore.filesystem.CarbonFile;
import org.apache.carbondata.core.datastore.impl.FileFactory;
import org.apache.carbondata.core.locks.CarbonLockFactory;
import org.apache.carbondata.core.locks.ICarbonLock;
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
import org.apache.carbondata.core.metadata.CarbonTableIdentifier;
import org.apache.carbondata.core.metadata.ColumnIdentifier;
import org.apache.carbondata.core.metadata.datatype.DataTypes;
import org.apache.carbondata.core.metadata.encoder.Encoding;
import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
import org.apache.carbondata.core.statusmanager.SegmentStatus;
import org.apache.carbondata.core.util.CarbonProperties;
import org.apache.carbondata.core.util.CarbonUtil;
import org.apache.carbondata.core.util.DataTypeUtil;
import org.apache.carbondata.core.util.path.CarbonTablePath;
import org.apache.carbondata.processing.exception.DataLoadingException;
import org.apache.carbondata.processing.loading.csvinput.CSVInputFormat;
import org.apache.carbondata.processing.loading.csvinput.StringArrayWritable;
import org.apache.carbondata.processing.loading.exception.NoRetryException;
import org.apache.carbondata.processing.loading.model.CarbonLoadModel;
import org.apache.carbondata.spark.CarbonSparkFactory$;
import org.apache.carbondata.spark.DictionaryDetail;
import org.apache.carbondata.spark.rdd.ArrayParser;
import org.apache.carbondata.spark.rdd.CarbonAllDictionaryCombineRDD;
import org.apache.carbondata.spark.rdd.CarbonBlockDistinctValuesCombineRDD;
import org.apache.carbondata.spark.rdd.CarbonColumnDictGenerateRDD;
import org.apache.carbondata.spark.rdd.CarbonGlobalDictionaryGenerateRDD;
import org.apache.carbondata.spark.rdd.ColumnDistinctValues;
import org.apache.carbondata.spark.rdd.ColumnPartitioner;
import org.apache.carbondata.spark.rdd.DataFormat;
import org.apache.carbondata.spark.rdd.DictionaryLoadModel;
import org.apache.carbondata.spark.rdd.GenericParser;
import org.apache.carbondata.spark.rdd.PrimitiveParser;
import org.apache.carbondata.spark.rdd.StructParser;
import org.apache.carbondata.spark.tasks.DictionaryWriterTask;
import org.apache.carbondata.spark.tasks.DictionaryWriterTask$;
import org.apache.carbondata.spark.tasks.SortIndexWriterTask;
import org.apache.carbondata.spark.tasks.SortIndexWriterTask$;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.security.TokenCache;
import org.apache.log4j.Logger;
import org.apache.spark.Accumulator;
import org.apache.spark.AccumulatorParam$IntAccumulatorParam$;
import org.apache.spark.SparkContext;
import org.apache.spark.SparkException;
import org.apache.spark.deploy.SparkHadoopUtil$;
import org.apache.spark.rdd.NewHadoopRDD;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import scala.Array$;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Some;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.IterableLike;
import scala.collection.JavaConverters$;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayBuffer$;
import scala.collection.mutable.Buffer;
import scala.collection.mutable.HashMap;
import scala.collection.mutable.HashSet;
import scala.collection.mutable.StringBuilder;
import scala.math.Ordering$Int$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.runtime.BooleanRef;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.RichInt$;
import scala.sys.package$;

/* compiled from: GlobalDictionaryUtil.scala */
/* loaded from: input_file:org/apache/carbondata/spark/util/GlobalDictionaryUtil$.class */
public final class GlobalDictionaryUtil$ {
    public static final GlobalDictionaryUtil$ MODULE$ = null;
    private final Logger org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER;
    private final char DEFAULT_SEPARATOR;
    private final char DEFAULT_QUOTE_CHARACTER;

    static {
        new GlobalDictionaryUtil$();
    }

    public Logger org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER() {
        return this.org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER;
    }

    public char DEFAULT_SEPARATOR() {
        return this.DEFAULT_SEPARATOR;
    }

    public char DEFAULT_QUOTE_CHARACTER() {
        return this.DEFAULT_QUOTE_CHARACTER;
    }

    public Tuple2<CarbonDimension[], String[]> pruneDimensions(CarbonDimension[] carbonDimensionArr, String[] strArr, String[] strArr2) {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        ArrayBuffer arrayBuffer2 = new ArrayBuffer();
        Predef$.MODULE$.refArrayOps((CarbonDimension[]) Predef$.MODULE$.refArrayOps(carbonDimensionArr).filter(new GlobalDictionaryUtil$$anonfun$1())).foreach(new GlobalDictionaryUtil$$anonfun$pruneDimensions$1(strArr, strArr2, arrayBuffer, arrayBuffer2));
        return new Tuple2<>(arrayBuffer.toArray(ClassTag$.MODULE$.apply(CarbonDimension.class)), arrayBuffer2.toArray(ClassTag$.MODULE$.apply(String.class)));
    }

    public boolean hasEncoding(CarbonDimension carbonDimension, Encoding encoding, Encoding encoding2) {
        if (Predef$.MODULE$.Boolean2boolean(carbonDimension.isComplex())) {
            return ((IterableLike) JavaConverters$.MODULE$.asScalaBufferConverter(carbonDimension.getListOfChildDimensions()).asScala()).exists(new GlobalDictionaryUtil$$anonfun$hasEncoding$1(encoding, encoding2));
        }
        return carbonDimension.hasEncoding(encoding) && (encoding2 == null || !carbonDimension.hasEncoding(encoding2));
    }

    public void gatherDimensionByEncoding(CarbonLoadModel carbonLoadModel, CarbonDimension carbonDimension, Encoding encoding, Encoding encoding2, ArrayBuffer<CarbonDimension> arrayBuffer, boolean z) {
        if (Predef$.MODULE$.Boolean2boolean(carbonDimension.isComplex())) {
            ((Buffer) JavaConverters$.MODULE$.asScalaBufferConverter(carbonDimension.getListOfChildDimensions()).asScala()).foreach(new GlobalDictionaryUtil$$anonfun$gatherDimensionByEncoding$1(carbonLoadModel, encoding, encoding2, arrayBuffer, z));
            return;
        }
        if (carbonDimension.hasEncoding(encoding)) {
            if (encoding2 == null || !carbonDimension.hasEncoding(encoding2)) {
                if ((!z || carbonLoadModel.getPredefDictFilePath(carbonDimension) == null) && (z || carbonLoadModel.getPredefDictFilePath(carbonDimension) != null)) {
                    return;
                }
                arrayBuffer.$plus$eq(carbonDimension);
            }
        }
    }

    public CarbonDimension[] getPrimDimensionWithDict(CarbonLoadModel carbonLoadModel, CarbonDimension carbonDimension, boolean z) {
        ArrayBuffer<CarbonDimension> arrayBuffer = new ArrayBuffer<>();
        gatherDimensionByEncoding(carbonLoadModel, carbonDimension, Encoding.DICTIONARY, Encoding.DIRECT_DICTIONARY, arrayBuffer, z);
        return (CarbonDimension[]) arrayBuffer.toArray(ClassTag$.MODULE$.apply(CarbonDimension.class));
    }

    public void generateParserForChildrenDimension(CarbonDimension carbonDimension, DataFormat dataFormat, HashMap<String, HashSet<String>> hashMap, GenericParser genericParser) {
        Buffer buffer = (Buffer) JavaConverters$.MODULE$.asScalaBufferConverter(carbonDimension.getListOfChildDimensions()).asScala();
        buffer.indices().foreach$mVc$sp(new GlobalDictionaryUtil$$anonfun$generateParserForChildrenDimension$1(dataFormat, hashMap, genericParser, buffer));
    }

    public Option<GenericParser> generateParserForDimension(Option<CarbonDimension> option, DataFormat dataFormat, HashMap<String, HashSet<String>> hashMap) {
        None$ some;
        None$ none$;
        if (None$.MODULE$.equals(option)) {
            none$ = None$.MODULE$;
        } else {
            if (!(option instanceof Some)) {
                throw new MatchError(option);
            }
            CarbonDimension carbonDimension = (CarbonDimension) ((Some) option).x();
            if (DataTypes.isArrayType(carbonDimension.getDataType())) {
                ArrayParser arrayParser = new ArrayParser(carbonDimension, dataFormat);
                generateParserForChildrenDimension(carbonDimension, dataFormat, hashMap, arrayParser);
                some = new Some(arrayParser);
            } else if (DataTypes.isStructType(carbonDimension.getDataType())) {
                StructParser structParser = new StructParser(carbonDimension, dataFormat);
                generateParserForChildrenDimension(carbonDimension, dataFormat, hashMap, structParser);
                some = new Some(structParser);
            } else {
                some = new Some(new PrimitiveParser(carbonDimension, hashMap.get(carbonDimension.getColumnId())));
            }
            none$ = some;
        }
        return none$;
    }

    public DataFormat createDataFormat(String[] strArr) {
        if (!ArrayUtils.isNotEmpty(strArr)) {
            return null;
        }
        return new DataFormat((String[]) Predef$.MODULE$.refArrayOps(strArr).map(new GlobalDictionaryUtil$$anonfun$createDataFormat$1(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class))), 0, (Pattern[]) Predef$.MODULE$.refArrayOps(strArr).map(new GlobalDictionaryUtil$$anonfun$2(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Pattern.class))));
    }

    public DictionaryLoadModel createDictionaryLoadModel(CarbonLoadModel carbonLoadModel, CarbonTableIdentifier carbonTableIdentifier, CarbonDimension[] carbonDimensionArr, String str, boolean z) {
        ArrayBuffer arrayBuffer = new ArrayBuffer();
        ArrayBuffer arrayBuffer2 = new ArrayBuffer();
        Predef$.MODULE$.refArrayOps(carbonDimensionArr).indices().foreach$mVc$sp(new GlobalDictionaryUtil$$anonfun$createDictionaryLoadModel$1(carbonLoadModel, carbonDimensionArr, z, arrayBuffer, arrayBuffer2));
        CarbonDimension[] carbonDimensionArr2 = (CarbonDimension[]) ((TraversableOnce) arrayBuffer.map(new GlobalDictionaryUtil$$anonfun$3(), ArrayBuffer$.MODULE$.canBuildFrom())).toArray(ClassTag$.MODULE$.apply(CarbonDimension.class));
        DictionaryDetail dictionaryDetail = CarbonSparkFactory$.MODULE$.getDictionaryDetailService().getDictionaryDetail(str, carbonDimensionArr2, carbonLoadModel.getTablePath());
        String[] dictFilePaths = dictionaryDetail.dictFilePaths();
        boolean[] dictFileExists = dictionaryDetail.dictFileExists();
        ColumnIdentifier[] columnIdentifiers = dictionaryDetail.columnIdentifiers();
        String property = CarbonProperties.getInstance().getProperty("hadoop.tmp.dir", System.getProperty("java.io.tmpdir"));
        String property2 = CarbonProperties.getInstance().getProperty("carbon.lock.type", "HDFSLOCK");
        String property3 = CarbonProperties.getInstance().getProperty("spark.deploy.zookeeper.url");
        String str2 = carbonLoadModel.getSerializationNullFormat().split(",", 2)[1];
        if (carbonLoadModel.getLoadMetadataDetails() == null) {
            carbonLoadModel.readAndSetLoadMetadataDetails();
        }
        return new DictionaryLoadModel(AbsoluteTableIdentifier.from(carbonLoadModel.getTablePath(), carbonTableIdentifier), carbonDimensionArr, carbonLoadModel.getTablePath(), str, dictFilePaths, dictFileExists, (boolean[]) arrayBuffer2.toArray(ClassTag$.MODULE$.Boolean()), carbonDimensionArr2, carbonLoadModel.getDelimiters(), columnIdentifiers, carbonLoadModel.getLoadMetadataDetails().size() == 0, property, property2, property3, str2, carbonLoadModel.getDefaultTimestampFormat(), carbonLoadModel.getDefaultDateFormat());
    }

    private RDD<Row> loadInputDataAsDictRdd(SQLContext sQLContext, CarbonLoadModel carbonLoadModel, Option<Dataset<Row>> option, String[] strArr, Configuration configuration) {
        if (option.isDefined()) {
            return ((Dataset) option.get()).select((String) Predef$.MODULE$.refArrayOps(strArr).head(), Predef$.MODULE$.wrapRefArray((Object[]) Predef$.MODULE$.refArrayOps(strArr).tail())).rdd();
        }
        CommonUtil$.MODULE$.configureCSVInputFormat(configuration, carbonLoadModel);
        configuration.set("mapreduce.input.fileinputformat.inputdir", carbonLoadModel.getFactFilePath());
        int[] iArr = (int[]) Predef$.MODULE$.refArrayOps(strArr).map(new GlobalDictionaryUtil$$anonfun$5(Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((String[]) Predef$.MODULE$.refArrayOps(carbonLoadModel.getCsvHeaderColumns()).map(new GlobalDictionaryUtil$$anonfun$4(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).zipWithIndex(Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).toMap(Predef$.MODULE$.$conforms())), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Int()));
        JobConf jobConf = new JobConf(configuration);
        SparkHadoopUtil$.MODULE$.get().addCredentials(jobConf);
        TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), new Path[]{new Path(carbonLoadModel.getFactFilePath())}, jobConf);
        return new NewHadoopRDD(sQLContext.sparkContext(), CSVInputFormat.class, NullWritable.class, StringArrayWritable.class, jobConf).setName("global dictionary").map(new GlobalDictionaryUtil$$anonfun$6(iArr), ClassTag$.MODULE$.apply(Row.class));
    }

    private void checkStatus(CarbonLoadModel carbonLoadModel, SQLContext sQLContext, DictionaryLoadModel dictionaryLoadModel, Tuple2<Object, SegmentStatus>[] tuple2Arr) {
        BooleanRef create = BooleanRef.create(false);
        Predef$.MODULE$.refArrayOps(tuple2Arr).foreach(new GlobalDictionaryUtil$$anonfun$checkStatus$1(dictionaryLoadModel, create, dictionaryLoadModel.table().getCarbonTableIdentifier().getTableName()));
        if (create.elem) {
            org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().error("generate global dictionary files failed");
            throw new Exception("Failed to generate global dictionary files");
        }
        org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().info("generate global dictionary successfully");
    }

    private void setPredefinedColumnDictPath(CarbonLoadModel carbonLoadModel, String str, CarbonTableIdentifier carbonTableIdentifier, CarbonDimension[] carbonDimensionArr) {
        Predef$.MODULE$.refArrayOps(str.split(",")).foreach(new GlobalDictionaryUtil$$anonfun$setPredefinedColumnDictPath$1(carbonLoadModel, carbonTableIdentifier, carbonDimensionArr));
    }

    public void setPredefineDict(CarbonLoadModel carbonLoadModel, CarbonDimension[] carbonDimensionArr, CarbonTableIdentifier carbonTableIdentifier, String str, String str2, String str3) {
        String stringBuilder;
        while (true) {
            String str4 = str.split("\\.")[0];
            StringBuilder append = new StringBuilder().append(str3);
            if ("".equals(str)) {
                stringBuilder = str;
            } else {
                stringBuilder = str3.isEmpty() ? str4 : new StringBuilder().append(".").append(str4).toString();
            }
            String stringBuilder2 = append.append(stringBuilder).toString();
            CarbonDimension[] carbonDimensionArr2 = (CarbonDimension[]) Predef$.MODULE$.refArrayOps(carbonDimensionArr).filter(new GlobalDictionaryUtil$$anonfun$7(stringBuilder2));
            if (carbonDimensionArr2.length == 0) {
                org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().error(new StringBuilder().append(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Column ", " is not a key column "})).s(Predef$.MODULE$.genericWrapArray(new Object[]{stringBuilder2}))).append(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"in ", ".", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{carbonTableIdentifier.getDatabaseName(), carbonTableIdentifier.getTableName()}))).toString());
                throw new DataLoadingException(new StringBuilder().append(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Column ", " is not a key column. "})).s(Predef$.MODULE$.genericWrapArray(new Object[]{stringBuilder2}))).append(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Only key column can be part of dictionary "})).s(Nil$.MODULE$)).append(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"and used in COLUMNDICT option."})).s(Nil$.MODULE$)).toString());
            }
            CarbonDimension carbonDimension = carbonDimensionArr2[0];
            if (!Predef$.MODULE$.Boolean2boolean(carbonDimension.isComplex())) {
                carbonLoadModel.setPredefDictMap(carbonDimension, str2);
                BoxedUnit boxedUnit = BoxedUnit.UNIT;
                return;
            }
            CarbonDimension[] carbonDimensionArr3 = (CarbonDimension[]) ((TraversableOnce) JavaConverters$.MODULE$.asScalaBufferConverter(carbonDimension.getListOfChildDimensions()).asScala()).toArray(ClassTag$.MODULE$.apply(CarbonDimension.class));
            str3 = stringBuilder2;
            str2 = str2;
            str = DataTypes.isArrayType(carbonDimension.getDataType()) ? Predef$.MODULE$.Boolean2boolean(carbonDimensionArr3[0].isComplex()) ? new StringBuilder().append("val.").append(str.substring(str4.length() + 1)).toString() : "val" : str.substring(str4.length() + 1);
            carbonTableIdentifier = carbonTableIdentifier;
            carbonDimensionArr = carbonDimensionArr3;
            carbonLoadModel = carbonLoadModel;
        }
    }

    public String setPredefineDict$default$6() {
        return "";
    }

    public void generatePredefinedColDictionary(String str, CarbonTableIdentifier carbonTableIdentifier, CarbonDimension[] carbonDimensionArr, CarbonLoadModel carbonLoadModel, SQLContext sQLContext, String str2) {
        setPredefinedColumnDictPath(carbonLoadModel, str, carbonTableIdentifier, carbonDimensionArr);
        DictionaryLoadModel createDictionaryLoadModel = createDictionaryLoadModel(carbonLoadModel, carbonTableIdentifier, carbonDimensionArr, str2, true);
        checkStatus(carbonLoadModel, sQLContext, createDictionaryLoadModel, (Tuple2[]) new CarbonGlobalDictionaryGenerateRDD(sQLContext.sparkSession(), RDD$.MODULE$.rddToPairRDDFunctions(new CarbonColumnDictGenerateRDD(carbonLoadModel, createDictionaryLoadModel, sQLContext.sparkSession(), carbonTableIdentifier, carbonDimensionArr, str2), ClassTag$.MODULE$.Int(), ClassTag$.MODULE$.apply(ColumnDistinctValues.class), Ordering$Int$.MODULE$).partitionBy(new ColumnPartitioner(createDictionaryLoadModel.primDimensions().length)), createDictionaryLoadModel).collect());
    }

    public GenericParser[] createDimensionParsers(DictionaryLoadModel dictionaryLoadModel, ArrayBuffer<Tuple2<Object, HashSet<String>>> arrayBuffer) {
        int length = dictionaryLoadModel.dimensions().length;
        int length2 = dictionaryLoadModel.primDimensions().length;
        HashSet[] hashSetArr = new HashSet[length2];
        HashMap hashMap = new HashMap();
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), length2).foreach(new GlobalDictionaryUtil$$anonfun$createDimensionParsers$2(dictionaryLoadModel, arrayBuffer, hashSetArr, hashMap));
        GenericParser[] genericParserArr = new GenericParser[length];
        RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), length).foreach$mVc$sp(new GlobalDictionaryUtil$$anonfun$createDimensionParsers$1(dictionaryLoadModel, hashMap, genericParserArr));
        return genericParserArr;
    }

    public Tuple2<String, String> org$apache$carbondata$spark$util$GlobalDictionaryUtil$$parseRecord(String str, Accumulator<Object> accumulator, String[] strArr) {
        String[] split = str.split(String.valueOf(BoxesRunTime.boxToCharacter(DEFAULT_SEPARATOR())));
        String str2 = "";
        String str3 = "";
        if (Predef$.MODULE$.refArrayOps(split).isEmpty()) {
            org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().error(new StringBuilder().append("Read a bad dictionary record: ").append(str).toString());
            accumulator.$plus$eq(BoxesRunTime.boxToInteger(1));
        } else if (Predef$.MODULE$.refArrayOps(split).size() != 1) {
            try {
                str2 = strArr[new StringOps(Predef$.MODULE$.augmentString(split[0])).toInt()];
                str3 = split[1];
            } catch (Exception unused) {
                org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().error(new StringBuilder().append("Read a bad dictionary record: ").append(str).toString());
                accumulator.$plus$eq(BoxesRunTime.boxToInteger(1));
            }
        } else if (str.contains(",")) {
            try {
                str2 = strArr[new StringOps(Predef$.MODULE$.augmentString(split[0])).toInt()];
            } catch (Exception unused2) {
                org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().error(new StringBuilder().append("Read a bad dictionary record: ").append(str).toString());
                accumulator.$plus$eq(BoxesRunTime.boxToInteger(1));
            }
        } else {
            accumulator.$plus$eq(BoxesRunTime.boxToInteger(1));
        }
        return new Tuple2<>(str2, str3);
    }

    private RDD<Tuple2<String, Iterable<String>>> readAllDictionaryFiles(SQLContext sQLContext, String[] strArr, String[] strArr2, String str, Accumulator<Object> accumulator) {
        try {
            SparkContext sparkContext = sQLContext.sparkContext();
            return RDD$.MODULE$.rddToPairRDDFunctions(sparkContext.textFile(str, sparkContext.textFile$default$2()).map(new GlobalDictionaryUtil$$anonfun$8(strArr, accumulator), ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(String.class), Ordering$String$.MODULE$).groupByKey().filter(new GlobalDictionaryUtil$$anonfun$readAllDictionaryFiles$1(Predef$.MODULE$.refArrayOps(strArr2).toList()));
        } catch (Exception e) {
            org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().error(new StringBuilder().append("Read dictionary files failed. Caused by: ").append(e.getMessage()).toString());
            throw e;
        }
    }

    private boolean validateAllDictionaryPath(String str) {
        FileFactory.FileType fileType = FileFactory.getFileType(str);
        Path path = new Path(str);
        CarbonFile carbonFile = FileFactory.getCarbonFile(path.toString(), fileType);
        CarbonFile carbonFile2 = FileFactory.getCarbonFile(path.getParent().toString(), fileType);
        if (!path.getName().startsWith("*")) {
            if (!carbonFile.exists()) {
                throw new DataLoadingException(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"The given dictionary file path is not found : ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str})));
            }
            if (carbonFile.getSize() > 0) {
                return true;
            }
            org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().warn("No dictionary files found or empty dictionary files! Won't generate new dictionary.");
            return false;
        }
        String substring = path.getName().substring(1);
        if (!carbonFile2.exists()) {
            throw new DataLoadingException(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"The given dictionary file path is not found : ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{str})));
        }
        if (Predef$.MODULE$.refArrayOps(carbonFile2.listFiles()).exists(new GlobalDictionaryUtil$$anonfun$validateAllDictionaryPath$1(substring))) {
            return true;
        }
        org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().warn("No dictionary files found or empty dictionary files! Won't generate new dictionary.");
        return false;
    }

    public void generateGlobalDictionary(SQLContext sQLContext, CarbonLoadModel carbonLoadModel, Configuration configuration, Option<Dataset<Row>> option) {
        try {
            CarbonTable carbonTable = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable();
            CarbonTableIdentifier carbonTableIdentifier = carbonTable.getAbsoluteTableIdentifier().getCarbonTableIdentifier();
            String metadataPath = CarbonTablePath.getMetadataPath(carbonLoadModel.getTablePath());
            CarbonDimension[] carbonDimensionArr = (CarbonDimension[]) ((TraversableOnce) JavaConverters$.MODULE$.asScalaBufferConverter(carbonTable.getDimensionByTableName(carbonTable.getTableName())).asScala()).toArray(ClassTag$.MODULE$.apply(CarbonDimension.class));
            carbonLoadModel.initPredefDictMap();
            String allDictPath = carbonLoadModel.getAllDictPath();
            if (!StringUtils.isEmpty(allDictPath)) {
                generateDictionaryFromDictionaryFiles(sQLContext, carbonLoadModel, carbonTableIdentifier, metadataPath, carbonDimensionArr, allDictPath);
                return;
            }
            org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().info("Generate global dictionary from source data files!");
            String[] strArr = (String[]) Predef$.MODULE$.refArrayOps(carbonLoadModel.getCsvHeaderColumns()).map(new GlobalDictionaryUtil$$anonfun$9(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
            String colDictFilePath = carbonLoadModel.getColDictFilePath();
            if (colDictFilePath != null) {
                generatePredefinedColDictionary(colDictFilePath, carbonTableIdentifier, carbonDimensionArr, carbonLoadModel, sQLContext, metadataPath);
            }
            String[] columns = option.isDefined() ? ((Dataset) option.get()).columns() : strArr;
            if (strArr.length > columns.length && !carbonTable.isHivePartitionTable()) {
                org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().error("The number of columns in the file header do not match the number of columns in the data file; Either delimiter or fileheader provided is not correct");
                throw new DataLoadingException("The number of columns in the file header do not match the number of columns in the data file; Either delimiter or fileheader provided is not correct");
            }
            Tuple2<CarbonDimension[], String[]> pruneDimensions = pruneDimensions(carbonDimensionArr, strArr, columns);
            if (pruneDimensions == null) {
                throw new MatchError(pruneDimensions);
            }
            Tuple2 tuple2 = new Tuple2((CarbonDimension[]) pruneDimensions._1(), (String[]) pruneDimensions._2());
            CarbonDimension[] carbonDimensionArr2 = (CarbonDimension[]) tuple2._1();
            String[] strArr2 = (String[]) tuple2._2();
            if (!Predef$.MODULE$.refArrayOps(carbonDimensionArr2).nonEmpty()) {
                org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().info("No column found for generating global dictionary in source data files");
                return;
            }
            RDD<Row> loadInputDataAsDictRdd = loadInputDataAsDictRdd(sQLContext, carbonLoadModel, option, strArr2, configuration);
            DictionaryLoadModel createDictionaryLoadModel = createDictionaryLoadModel(carbonLoadModel, carbonTableIdentifier, carbonDimensionArr2, metadataPath, false);
            checkStatus(carbonLoadModel, sQLContext, createDictionaryLoadModel, (Tuple2[]) new CarbonGlobalDictionaryGenerateRDD(sQLContext.sparkSession(), RDD$.MODULE$.rddToPairRDDFunctions(new CarbonBlockDistinctValuesCombineRDD(sQLContext.sparkSession(), loadInputDataAsDictRdd, createDictionaryLoadModel), ClassTag$.MODULE$.Int(), ClassTag$.MODULE$.apply(ColumnDistinctValues.class), Ordering$Int$.MODULE$).partitionBy(new ColumnPartitioner(createDictionaryLoadModel.primDimensions().length)), createDictionaryLoadModel).collect());
        } catch (Exception e) {
            if (e.getCause() != null && (e.getCause() instanceof NoRetryException)) {
                org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().error("generate global dictionary failed", e.getCause());
                throw new Exception(new StringBuilder().append("generate global dictionary failed, ").append(e.getCause().getMessage()).toString());
            }
            if (!(e instanceof SparkException)) {
                org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().error("generate global dictionary failed", e);
                throw e;
            }
            SparkException sparkException = e;
            org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().error("generate global dictionary failed", sparkException);
            throw new Exception(new StringBuilder().append("generate global dictionary failed, ").append(trimErrorMessage(sparkException.getMessage())).toString());
        }
    }

    public Option<Dataset<Row>> generateGlobalDictionary$default$4() {
        return None$.MODULE$;
    }

    public void generateDictionaryFromDictionaryFiles(SQLContext sQLContext, CarbonLoadModel carbonLoadModel, CarbonTableIdentifier carbonTableIdentifier, String str, CarbonDimension[] carbonDimensionArr, String str2) {
        org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().info("Generate global dictionary from dictionary files!");
        String checkAndAppendHDFSUrl = CarbonUtil.checkAndAppendHDFSUrl(str2);
        if (validateAllDictionaryPath(checkAndAppendHDFSUrl)) {
            String[] strArr = (String[]) Predef$.MODULE$.refArrayOps(carbonLoadModel.getCsvHeaderColumns()).map(new GlobalDictionaryUtil$$anonfun$generateDictionaryFromDictionaryFiles$1(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
            Tuple2<CarbonDimension[], String[]> pruneDimensions = pruneDimensions(carbonDimensionArr, strArr, strArr);
            if (pruneDimensions == null) {
                throw new MatchError(pruneDimensions);
            }
            Tuple2 tuple2 = new Tuple2((CarbonDimension[]) pruneDimensions._1(), (String[]) pruneDimensions._2());
            CarbonDimension[] carbonDimensionArr2 = (CarbonDimension[]) tuple2._1();
            String[] strArr2 = (String[]) tuple2._2();
            if (!Predef$.MODULE$.refArrayOps(carbonDimensionArr2).nonEmpty()) {
                org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().info("have no column need to generate global dictionary");
                return;
            }
            DictionaryLoadModel createDictionaryLoadModel = createDictionaryLoadModel(carbonLoadModel, carbonTableIdentifier, carbonDimensionArr2, str, false);
            Accumulator<Object> accumulator = sQLContext.sparkContext().accumulator(BoxesRunTime.boxToInteger(0), AccumulatorParam$IntAccumulatorParam$.MODULE$);
            checkStatus(carbonLoadModel, sQLContext, createDictionaryLoadModel, (Tuple2[]) new CarbonGlobalDictionaryGenerateRDD(sQLContext.sparkSession(), RDD$.MODULE$.rddToPairRDDFunctions(new CarbonAllDictionaryCombineRDD(sQLContext.sparkSession(), readAllDictionaryFiles(sQLContext, strArr, strArr2, checkAndAppendHDFSUrl, accumulator), createDictionaryLoadModel), ClassTag$.MODULE$.Int(), ClassTag$.MODULE$.apply(ColumnDistinctValues.class), Ordering$Int$.MODULE$).partitionBy(new ColumnPartitioner(createDictionaryLoadModel.primDimensions().length)), createDictionaryLoadModel).collect());
            if (BoxesRunTime.unboxToInt(accumulator.value()) > 0) {
                throw new DataLoadingException("Data Loading failure, dictionary values are not in correct format!");
            }
        }
    }

    public String trimErrorMessage(String str) {
        String str2 = null;
        if (str == null || !str.contains("TextParsingException:")) {
            if (str != null && str.contains("Exception:")) {
                str2 = str.split("Exception: ")[1].split("\n")[0];
            }
        } else if (str.split("Hint").length > 1 && str.split("Hint")[0].split("TextParsingException: ").length > 1) {
            str2 = str.split("Hint")[0].split("TextParsingException: ")[1];
        } else if (str.split("Parser Configuration:").length > 1) {
            str2 = str.split("Parser Configuration:")[0];
        }
        return str2;
    }

    public void loadDefaultDictionaryValueForNewColumn(ColumnSchema columnSchema, AbsoluteTableIdentifier absoluteTableIdentifier, String str) {
        ICarbonLock carbonLockObj = CarbonLockFactory.getCarbonLockObj(absoluteTableIdentifier, new StringBuilder().append(columnSchema.getColumnUniqueId()).append(".lock").toString());
        try {
            try {
                boolean lockWithRetries = carbonLockObj.lockWithRetries();
                if (!lockWithRetries) {
                    throw package$.MODULE$.error(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Dictionary file ", " is locked for updation. Please try after some time"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{columnSchema.getColumnName()})));
                }
                org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Successfully able to get the dictionary lock for ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{columnSchema.getColumnName()})));
                ColumnIdentifier columnIdentifier = new ColumnIdentifier(columnSchema.getColumnUniqueId(), (Map) null, columnSchema.getDataType());
                DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier, columnIdentifier, columnIdentifier.getDataType());
                String normalizeColumnValueForItsDataType = DataTypeUtil.normalizeColumnValueForItsDataType(str, columnSchema);
                HashSet hashSet = new HashSet();
                if (normalizeColumnValueForItsDataType == null) {
                    BoxedUnit boxedUnit = BoxedUnit.UNIT;
                } else {
                    hashSet.$plus$eq(normalizeColumnValueForItsDataType);
                }
                DictionaryWriterTask dictionaryWriterTask = new DictionaryWriterTask(hashSet, null, dictionaryColumnUniqueIdentifier, columnSchema, false, DictionaryWriterTask$.MODULE$.$lessinit$greater$default$6());
                List<String> execute = dictionaryWriterTask.execute();
                org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Dictionary file writing is successful for new column ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{columnSchema.getColumnName()})));
                if (execute.size() > 0) {
                    new SortIndexWriterTask(dictionaryColumnUniqueIdentifier, columnSchema.getDataType(), null, execute, SortIndexWriterTask$.MODULE$.$lessinit$greater$default$5()).execute();
                }
                org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"SortIndex file writing is successful for new column ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{columnSchema.getColumnName()})));
                dictionaryWriterTask.updateMetaData();
                org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Dictionary meta file writing is successful for new column ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{columnSchema.getColumnName()})));
                if (carbonLockObj == null || !lockWithRetries) {
                    return;
                }
                if (carbonLockObj.unlock()) {
                    org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Dictionary ", " Unlocked Successfully."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{columnSchema.getColumnName()})));
                } else {
                    org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().error(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Unable to unlock Dictionary ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{columnSchema.getColumnName()})));
                }
            } catch (Exception e) {
                org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().error(e);
                throw e;
            }
        } catch (Throwable th) {
            if (carbonLockObj != null && 0 != 0) {
                if (carbonLockObj.unlock()) {
                    org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().info(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Dictionary ", " Unlocked Successfully."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{columnSchema.getColumnName()})));
                } else {
                    org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER().error(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Unable to unlock Dictionary ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{columnSchema.getColumnName()})));
                }
            }
            throw th;
        }
    }

    private GlobalDictionaryUtil$() {
        MODULE$ = this;
        this.org$apache$carbondata$spark$util$GlobalDictionaryUtil$$LOGGER = LogServiceFactory.getLogService(getClass().getCanonicalName());
        this.DEFAULT_SEPARATOR = ',';
        this.DEFAULT_QUOTE_CHARACTER = '\"';
    }
}
