package com.huawei.hadoop.hbase.tools.bulkload;

import com.google.common.collect.Lists;
import java.io.Closeable;
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.Trash;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
import org.apache.hbase.thirdparty.org.apache.commons.cli.DefaultParser;
import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;
import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;
import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils;
import org.apache.spark.Partitioner;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.serializer.KryoSerializer;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.MapType;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

/* loaded from: input_file:com/huawei/hadoop/hbase/tools/bulkload/SparkBulkLoadTool.class */
public class SparkBulkLoadTool implements Serializable {
    private static final long serialVersionUID = -2685766666L;
    private static final String DEFAULT_COLUMN_FAMILY_STR = "info";
    private static final String DEFAULT_CF_COMPRESSION = "SNAPPY";
    private static final String DEFAULT_ENCODING_OPT = "FAST_DIFF";
    private static final String DEFAULT_COLUMNS_SEPARATOR = ",";
    private static final String DEFAULT_ROW_SEPARATOR = "#";
    private static final int DEFAULT_MAX_SAMPLING_MULTIPLE = 10;
    private static final String SAMPLING_SQL_FORMAT = "SELECT %s from ( %s ) ORDER BY %s";
    private static TableName tableName;
    private byte[] columnFamily;
    private String compressionStr;
    private String dataBlockEncodingStr;
    private Path outputPath;
    private String sql;
    private List<String> rowColumns;
    private String rowSeparatorStr;
    private boolean skipStoreRowCols;
    private SparkSession sparkSession;
    private int regionNums = 100;
    private int samplingMultiple = 1;
    private boolean allRowColsAreStr = false;
    private static final Logger LOG = LoggerFactory.getLogger(SparkBulkLoadTool.class);
    private static final Options OPTIONS = new Options();
    private static final Option SQL_OPT = Option.builder("sql").longOpt("export-sql").desc("The sql execute by spark sql to extract data.").hasArg(true).numberOfArgs(1).required(true).type(String.class).build();
    private static final Option TABLE_NAME_OPT = Option.builder("tb").longOpt("table").desc("The target hbase table name.").hasArg(true).numberOfArgs(1).required(true).type(String.class).build();
    private static final Option CF_OPT = Option.builder("cf").longOpt("column-family").desc("The column family of target hbase table.").hasArg(true).numberOfArgs(1).required(false).type(String.class).build();
    private static final Option CF_COMPRESSION_OPT = Option.builder("comp").longOpt("compression").desc("The compression of target table column family when target table not exist.").hasArg(true).numberOfArgs(1).required(false).type(String.class).build();
    private static final Option CF_DATABLOCK_ENCODING_OPT = Option.builder("enc").longOpt("block-encoding").desc("The compression of target table column family when target table not exist.").hasArg(true).numberOfArgs(1).required(false).type(String.class).build();
    private static final Option REGION_NUMS_OPT = Option.builder("rn").longOpt("region-nums").desc("The region numbers of target hbase table when target table not exist.").hasArg(true).numberOfArgs(1).required(false).type(Integer.class).build();
    private static final Option ROWKEY_COLUMNS_OPT = Option.builder("rc").longOpt("rowkey-columns").desc("The rowkey column for hbase table, should be separated by commas.").hasArg(true).required(true).type(String.class).build();
    private static final Option ROWKEY_SEPARATOR_OPT = Option.builder("sp").longOpt("rowkey-separator").desc("The rowkey column value separator character for hbase table, default separator is '#' please ensure the separator character not in your data.").hasArg(true).required(false).type(Character.class).build();
    private static final Option SKIP_STORE_ROWKEY_COL_OPT = Option.builder("sr").longOpt("skip-store-rowcol").desc("If this option exist, will skip store rowkey columns in hbase table cell, need parse it from rowkey.").hasArg(false).required(false).build();
    private static final Option SAMPLING_MULTIPLE_OPT = Option.builder("sm").longOpt("sampling-multiple").desc("The sampling multiple, will create more hfiles in one region dir, must in range 1 to 10,The default value is 1.").hasArg(true).numberOfArgs(1).required(false).type(Integer.class).build();
    private static final Option OUTPUT_PATH_OPT = Option.builder("op").longOpt("output-path").desc("The output path of hfiles.").hasArg(true).numberOfArgs(1).required(true).type(String.class).build();
    private static final Configuration CONF = HBaseConfiguration.create();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/huawei/hadoop/hbase/tools/bulkload/SparkBulkLoadTool$ByteArrayComparator.class */
    public static class ByteArrayComparator implements Comparator<byte[]>, Serializable {
        private static final long serialVersionUID = -26857666906612L;

        private ByteArrayComparator() {
        }

        @Override // java.util.Comparator
        public int compare(byte[] bArr, byte[] bArr2) {
            return Bytes.BYTES_COMPARATOR.compare(bArr, bArr2);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/huawei/hadoop/hbase/tools/bulkload/SparkBulkLoadTool$KVComparator.class */
    public static class KVComparator implements Comparator<Tuple2<ImmutableBytesWritable, KeyValue>>, Serializable {
        private static final long serialVersionUID = -268576669066L;

        private KVComparator() {
        }

        @Override // java.util.Comparator
        public int compare(Tuple2<ImmutableBytesWritable, KeyValue> tuple2, Tuple2<ImmutableBytesWritable, KeyValue> tuple22) {
            int compareTo = ((ImmutableBytesWritable) tuple2._1).compareTo((ImmutableBytesWritable) tuple22._1);
            return compareTo != 0 ? compareTo : CellComparator.getInstance().compare((Cell) tuple2._2, (Cell) tuple22._2);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/huawei/hadoop/hbase/tools/bulkload/SparkBulkLoadTool$RegionPartitioner.class */
    public static class RegionPartitioner extends Partitioner implements Serializable {
        private final byte[][] startKeys;

        public RegionPartitioner(byte[][] bArr) {
            this.startKeys = bArr;
            Arrays.sort(this.startKeys, Bytes.BYTES_COMPARATOR);
        }

        public int numPartitions() {
            return this.startKeys.length;
        }

        public int getPartition(Object obj) {
            byte[] bArr;
            if (this.startKeys.length == 1) {
                return 0;
            }
            if (obj instanceof ImmutableBytesWritable) {
                bArr = ((ImmutableBytesWritable) obj).get();
            } else {
                if (!(obj instanceof byte[])) {
                    throw new IllegalArgumentException("RowKeyObj must be instanceof ImmutableBytesWritable or byte[].");
                }
                bArr = (byte[]) obj;
            }
            int binarySearch = Arrays.binarySearch(this.startKeys, bArr, Bytes.BYTES_COMPARATOR);
            if (binarySearch < 0) {
                binarySearch = ((-1) * binarySearch) - 2;
            }
            return Math.max(binarySearch, 0);
        }
    }

    private void addOptions() {
        OPTIONS.addOption(SQL_OPT);
        OPTIONS.addOption(TABLE_NAME_OPT);
        OPTIONS.addOption(CF_OPT);
        OPTIONS.addOption(CF_COMPRESSION_OPT);
        OPTIONS.addOption(CF_DATABLOCK_ENCODING_OPT);
        OPTIONS.addOption(REGION_NUMS_OPT);
        OPTIONS.addOption(ROWKEY_COLUMNS_OPT);
        OPTIONS.addOption(ROWKEY_SEPARATOR_OPT);
        OPTIONS.addOption(SKIP_STORE_ROWKEY_COL_OPT);
        OPTIONS.addOption(SAMPLING_MULTIPLE_OPT);
        OPTIONS.addOption(OUTPUT_PATH_OPT);
    }

    private void printHelp() {
        new HelpFormatter().printHelp(getClass().getName(), OPTIONS, true);
    }

    private void processOptions(CommandLine commandLine) throws IOException {
        this.sql = commandLine.getOptionValue(SQL_OPT);
        if (StringUtils.isEmpty(this.sql)) {
            throw new IllegalArgumentException("Sql is empty.");
        }
        tableName = TableName.valueOf(commandLine.getOptionValue(TABLE_NAME_OPT));
        this.columnFamily = Bytes.toBytesBinary(commandLine.getOptionValue(CF_OPT, DEFAULT_COLUMN_FAMILY_STR));
        ColumnFamilyDescriptorBuilder.isLegalColumnFamilyName(this.columnFamily);
        this.compressionStr = commandLine.getOptionValue(CF_COMPRESSION_OPT, DEFAULT_CF_COMPRESSION);
        if (Arrays.stream(Compression.Algorithm.values()).noneMatch(algorithm -> {
            return algorithm.name().equals(this.compressionStr);
        })) {
            throw new IllegalArgumentException("The compression algorithm: " + this.compressionStr + " is not support.");
        }
        this.dataBlockEncodingStr = commandLine.getOptionValue(CF_DATABLOCK_ENCODING_OPT, DEFAULT_ENCODING_OPT);
        if (Arrays.stream(DataBlockEncoding.values()).noneMatch(dataBlockEncoding -> {
            return dataBlockEncoding.name().equals(this.dataBlockEncodingStr);
        })) {
            throw new IllegalArgumentException("The data block encoding: " + this.dataBlockEncodingStr + " is not support.");
        }
        String optionValue = commandLine.getOptionValue(REGION_NUMS_OPT);
        if (optionValue != null) {
            this.regionNums = Integer.parseInt(optionValue);
            if (this.regionNums <= 0) {
                throw new IllegalArgumentException("Region number should be positive.");
            }
        }
        String optionValue2 = commandLine.getOptionValue(ROWKEY_COLUMNS_OPT);
        if (StringUtils.isEmpty(optionValue2)) {
            throw new IllegalArgumentException("Rowkey columns is empty.");
        }
        this.rowColumns = (List) Arrays.stream(optionValue2.split(DEFAULT_COLUMNS_SEPARATOR)).collect(Collectors.toList());
        this.rowSeparatorStr = commandLine.getOptionValue(ROWKEY_SEPARATOR_OPT, DEFAULT_ROW_SEPARATOR);
        if (this.rowSeparatorStr.length() != 1) {
            throw new IllegalArgumentException("RowSeparatorStr just only support one character.");
        }
        this.samplingMultiple = Integer.parseInt(commandLine.getOptionValue(SAMPLING_MULTIPLE_OPT, "1"));
        if (this.samplingMultiple > 10 || this.samplingMultiple < 1) {
            throw new IllegalArgumentException("Sampling multiple must in range 1 to 10.");
        }
        this.skipStoreRowCols = commandLine.hasOption(SKIP_STORE_ROWKEY_COL_OPT);
        this.outputPath = new Path(commandLine.getOptionValue(OUTPUT_PATH_OPT), UUID.randomUUID().toString());
        FileSystem fileSystem = this.outputPath.getFileSystem(CONF);
        if (fileSystem.exists(this.outputPath)) {
            if (!ArrayUtils.isEmpty(fileSystem.listStatus(this.outputPath))) {
                throw new IOException("Output path is exist.");
            }
            clearOutPath(fileSystem);
        }
        LOG.warn("Target output path is {}.", this.outputPath);
    }

    private void parseArgs(String[] strArr) throws ParseException, IOException {
        try {
            addOptions();
            processOptions(DefaultParser.builder().build().parse(OPTIONS, strArr));
        } catch (ParseException | IOException | IllegalArgumentException e) {
            printHelp();
            throw e;
        }
    }

    public void runTool(String[] strArr) throws ParseException, IOException {
        parseArgs(strArr);
        Closeable fileSystem = this.outputPath.getFileSystem(CONF);
        Closeable closeable = null;
        Closeable closeable2 = null;
        Closeable closeable3 = null;
        try {
            closeable = ConnectionFactory.createConnection(CONF);
            closeable2 = closeable.getAdmin();
            boolean isTargetTableExist = isTargetTableExist(closeable2);
            if (isTargetTableExist) {
                addColumnFamilyIfNotExist(closeable2);
            }
            this.sparkSession = SparkSession.builder().config(new SparkConf().setAppName("SparkBulkloadTool_" + tableName.getNameAsString() + "_" + System.currentTimeMillis()).set("spark.serializer", KryoSerializer.class.getCanonicalName()).registerKryoClasses(new Class[]{ImmutableBytesWritable.class, KeyValue.class})).getOrCreate();
            closeable3 = JavaSparkContext.fromSparkContext(this.sparkSession.sparkContext());
            Broadcast<List<String>> broadcast = closeable3.broadcast(this.rowColumns);
            long currentTimeMillis = System.currentTimeMillis();
            StructType schema = getSchema();
            this.allRowColsAreStr = allRowColsAreStr(schema);
            Broadcast<String[]> broadcast2 = closeable3.broadcast(schema.fieldNames());
            LOG.info("Judge row cols type cost {} ms", Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
            LOG.info("All row cols are string type: {}", Boolean.valueOf(this.allRowColsAreStr));
            List<byte[]> arrayList = new ArrayList();
            if (!isTargetTableExist) {
                arrayList = samplingAndGetRowKeys(closeable3, broadcast);
                createTargetTable(closeable2, getSplitPoints(arrayList));
            }
            RegionLocator regionLocator = closeable.getRegionLocator(tableName);
            byte[][] startKeys = (this.samplingMultiple <= 1 || arrayList.isEmpty()) ? regionLocator.getStartKeys() : (byte[][]) arrayList.toArray((Object[]) new byte[arrayList.size()]);
            initHFileFormatConf(closeable2);
            exportDataByFieldOrder(closeable3, startKeys, broadcast, broadcast2);
            broadcast.destroy();
            broadcast2.destroy();
            doBulkload(closeable2, closeable.getTable(tableName), regionLocator);
            clearOutPath(fileSystem);
            IOUtils.close(new Closeable[]{closeable2, closeable, fileSystem, this.sparkSession, closeable3});
        } catch (Throwable th) {
            clearOutPath(fileSystem);
            IOUtils.close(new Closeable[]{closeable2, closeable, fileSystem, this.sparkSession, closeable3});
            throw th;
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v11, types: [java.util.List] */
    private List<byte[]> getSplitPoints(List<byte[]> list) {
        ArrayList arrayList = new ArrayList();
        int size = list.size() / Math.max(this.regionNums - 1, 1);
        int i = 0;
        while (true) {
            int i2 = i;
            if (i2 >= list.size()) {
                break;
            }
            arrayList.add(list.get(i2));
            i = i2 + size;
        }
        if (arrayList.size() >= this.regionNums) {
            arrayList = arrayList.subList(1, this.regionNums);
        }
        return arrayList;
    }

    private boolean isTargetTableExist(Admin admin) throws IOException {
        if (admin.tableExists(tableName)) {
            LOG.info("Table {} exists, it is a incremental import task.", tableName);
            return true;
        }
        LOG.info("Table {} exists, it is a full import task.", tableName);
        return false;
    }

    private void addColumnFamilyIfNotExist(Admin admin) throws IOException {
        if (admin.getDescriptor(tableName).hasColumnFamily(this.columnFamily)) {
            return;
        }
        admin.addColumnFamily(tableName, generateColumnFamilyDesc());
        LOG.info("ColumnFamily {} not exists, added it.", Bytes.toString(this.columnFamily));
    }

    private long checkAndGetRowCount(Dataset<Row> dataset) throws IOException {
        long count = dataset.count();
        LOG.info("Total data has {} rows.", Long.valueOf(count));
        if (count == 0) {
            LOG.warn("Query result is empty, skip bulkload, please check sql.");
            throw new IOException("Query result is empty.");
        }
        if (count <= this.regionNums) {
            this.regionNums = Math.max(1, ((int) count) - 1);
            LOG.warn("The target region nums is bigger than data row count, set target region nums to {}.", Integer.valueOf(this.regionNums));
        }
        return count;
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v22, types: [java.util.List] */
    private List<byte[]> samplingAndGetRowKeys(JavaSparkContext javaSparkContext, final Broadcast<List<String>> broadcast) throws IOException {
        long currentTimeMillis = System.currentTimeMillis();
        Dataset<Row> executeSamplingSQL = executeSamplingSQL(this.sparkSession);
        long checkAndGetRowCount = checkAndGetRowCount(executeSamplingSQL);
        ArrayList arrayList = new ArrayList();
        int min = (int) Math.min(checkAndGetRowCount, this.regionNums * this.samplingMultiple);
        if (min < 0) {
            throw new IOException("Too many sampling points.");
        }
        if (min > 1) {
            final Broadcast broadcast2 = javaSparkContext.broadcast(Long.valueOf(checkAndGetRowCount / (min - 1)));
            arrayList = executeSamplingSQL.javaRDD().zipWithIndex().filter(new Function<Tuple2<Row, Long>, Boolean>() { // from class: com.huawei.hadoop.hbase.tools.bulkload.SparkBulkLoadTool.2
                public Boolean call(Tuple2<Row, Long> tuple2) {
                    return Boolean.valueOf(((Long) tuple2._2).longValue() != 0 && ((Long) tuple2._2).longValue() % ((Long) broadcast2.value()).longValue() == 0);
                }
            }).map(new Function<Tuple2<Row, Long>, byte[]>() { // from class: com.huawei.hadoop.hbase.tools.bulkload.SparkBulkLoadTool.1
                public byte[] call(Tuple2<Row, Long> tuple2) {
                    return SparkBulkLoadTool.this.generateRowKeyByte((Row) tuple2._1, (List) broadcast.value(), SparkBulkLoadTool.this.rowSeparatorStr, SparkBulkLoadTool.this.allRowColsAreStr);
                }
            }).collect();
            broadcast2.destroy();
            if (CollectionUtils.isEmpty(arrayList)) {
                LOG.warn("Sampling result is empty, skip bulkload.");
                throw new IOException("Sampling result is empty.");
            }
        }
        LOG.info("Sampling finished, cost {} ms.", Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
        return arrayList;
    }

    private Dataset<Row> executeSamplingSQL(SparkSession sparkSession) {
        String join = String.join(DEFAULT_COLUMNS_SEPARATOR, this.rowColumns);
        String format = String.format(SAMPLING_SQL_FORMAT, join, this.sql, join);
        LOG.info("Sampling sql = {}", format);
        return sparkSession.sql(format);
    }

    private StructType getSchema() {
        String str = "select * from (" + this.sql + " ) LIMIT 1";
        LOG.info("Sampling oneline sql = {}", str);
        return this.sparkSession.sql(str).schema();
    }

    private boolean allRowColsAreStr(StructType structType) {
        for (StructField structField : structType.fields()) {
            DataType dataType = structField.dataType();
            if (this.rowColumns.contains(structField.name()) && !dataType.sameType(DataTypes.StringType) && !needConvertToStr(dataType)) {
                return false;
            }
        }
        return true;
    }

    private boolean needConvertToStr(DataType dataType) {
        return dataType.sameType(DataTypes.CalendarIntervalType) || dataType.sameType(DataTypes.TimestampType) || dataType.sameType(DataTypes.DateType) || (dataType instanceof ArrayType) || (dataType instanceof MapType) || (dataType instanceof StructType);
    }

    private void createTargetTable(Admin admin, List<byte[]> list) throws IOException {
        String namespaceAsString = tableName.getNamespaceAsString();
        if (!ArrayUtils.contains(admin.listNamespaces(), namespaceAsString)) {
            admin.createNamespace(NamespaceDescriptor.create(namespaceAsString).build());
        }
        TableDescriptor build = TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(generateColumnFamilyDesc()).build();
        if (CollectionUtils.isNotEmpty(list)) {
            admin.createTable(build, (byte[][]) list.toArray((Object[]) new byte[list.size()]));
        } else {
            admin.createTable(build);
        }
        LOG.info("Create Table {} success.", tableName);
    }

    private ColumnFamilyDescriptor generateColumnFamilyDesc() {
        ColumnFamilyDescriptorBuilder newBuilder = ColumnFamilyDescriptorBuilder.newBuilder(this.columnFamily);
        newBuilder.setCompressionType(Compression.Algorithm.valueOf(this.compressionStr));
        newBuilder.setDataBlockEncoding(DataBlockEncoding.valueOf(this.dataBlockEncodingStr));
        return newBuilder.build();
    }

    private void exportDataByFieldOrder(JavaSparkContext javaSparkContext, byte[][] bArr, final Broadcast<List<String>> broadcast, final Broadcast<String[]> broadcast2) {
        long currentTimeMillis = System.currentTimeMillis();
        RegionPartitioner regionPartitioner = new RegionPartitioner(bArr);
        final Broadcast broadcast3 = javaSparkContext.broadcast(this.columnFamily);
        final Broadcast broadcast4 = javaSparkContext.broadcast(new KVComparator());
        Broadcast broadcast5 = javaSparkContext.broadcast(new ByteArrayComparator());
        this.sparkSession.sql(this.sql).javaRDD().flatMapToPair(new PairFlatMapFunction<Row, byte[], List<byte[]>>() { // from class: com.huawei.hadoop.hbase.tools.bulkload.SparkBulkLoadTool.3
            public Iterator<Tuple2<byte[], List<byte[]>>> call(Row row) throws Exception {
                return Lists.newArrayList(new Tuple2[]{Tuple2.apply(SparkBulkLoadTool.this.generateRowKeyByte(row, (List) broadcast.value(), SparkBulkLoadTool.this.rowSeparatorStr, SparkBulkLoadTool.this.allRowColsAreStr), SparkBulkLoadTool.this.gengerateByteArrayValueList(row, (List) broadcast.value(), SparkBulkLoadTool.this.skipStoreRowCols))}).iterator();
            }
        }).repartitionAndSortWithinPartitions(regionPartitioner, (Comparator) broadcast5.value()).flatMapToPair(new PairFlatMapFunction<Tuple2<byte[], List<byte[]>>, ImmutableBytesWritable, KeyValue>() { // from class: com.huawei.hadoop.hbase.tools.bulkload.SparkBulkLoadTool.4
            public Iterator<Tuple2<ImmutableBytesWritable, KeyValue>> call(Tuple2<byte[], List<byte[]>> tuple2) throws Exception {
                List generateKvTuplesToWrite = SparkBulkLoadTool.generateKvTuplesToWrite(tuple2, (byte[]) broadcast3.value(), (String[]) broadcast2.value());
                generateKvTuplesToWrite.sort((Comparator) broadcast4.value());
                return generateKvTuplesToWrite.iterator();
            }
        }).saveAsNewAPIHadoopFile(this.outputPath.toString(), ImmutableBytesWritable.class, KeyValue.class, HFileOutputFormat2.class, CONF);
        broadcast3.destroy();
        broadcast4.destroy();
        broadcast5.destroy();
        LOG.info("Export finished, cost {} ms.", Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static List<Tuple2<ImmutableBytesWritable, KeyValue>> generateKvTuplesToWrite(Tuple2<byte[], List<byte[]>> tuple2, byte[] bArr, String[] strArr) {
        ArrayList arrayList = new ArrayList();
        byte[] bArr2 = (byte[]) tuple2._1;
        ImmutableBytesWritable immutableBytesWritable = new ImmutableBytesWritable(bArr2);
        List list = (List) tuple2._2;
        int size = list.size();
        for (int i = 0; i < size; i++) {
            byte[] bArr3 = (byte[]) list.get(i);
            if (bArr3 != null) {
                arrayList.add(Tuple2.apply(immutableBytesWritable, new KeyValue(bArr2, bArr, Bytes.toBytes(strArr[i]), bArr3)));
            }
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public List<byte[]> gengerateByteArrayValueList(Row row, List<String> list, boolean z) {
        int size = row.size();
        String[] fieldNames = row.schema().fieldNames();
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < size; i++) {
            String str = fieldNames[i];
            byte[] bArr = null;
            if (!z || !list.contains(str)) {
                bArr = SparkBulkLoadUtils.convertRowValueToByteArray(row, i);
            }
            arrayList.add(bArr);
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public byte[] generateRowKeyByte(Row row, List<String> list, String str, boolean z) {
        return z ? SparkBulkLoadUtils.generateRowkeyFromStringCols(row, list, str) : SparkBulkLoadUtils.generateRowkey(row, list, str.getBytes(StandardCharsets.UTF_8)[0]);
    }

    private void initHFileFormatConf(Admin admin) throws IOException {
        ColumnFamilyDescriptor columnFamily = admin.getDescriptor(tableName).getColumnFamily(this.columnFamily);
        CONF.set("hbase.mapreduce.hfileoutputformat.compression", columnFamily.getCompressionType().getName());
        CONF.set("hbase.mapreduce.hfileoutputformat.datablock.encoding", columnFamily.getDataBlockEncoding().name());
        CONF.set("hbase.mapreduce.hfileoutputformat.table.name", tableName.getNameAsString());
    }

    private void doBulkload(Admin admin, Table table, RegionLocator regionLocator) throws IOException {
        long currentTimeMillis = System.currentTimeMillis();
        BulkLoadHFilesTool bulkLoadHFilesTool = new BulkLoadHFilesTool(CONF);
        LOG.info("Loading HFiles for {} from {}", tableName, this.outputPath);
        bulkLoadHFilesTool.doBulkLoad(this.outputPath, admin, table, regionLocator);
        LOG.info("Incremental load complete for table={}, cost {} ms.", tableName, Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
    }

    private void clearOutPath(FileSystem fileSystem) {
        try {
            if (fileSystem.exists(this.outputPath)) {
                if (!Trash.moveToAppropriateTrash(fileSystem, this.outputPath, CONF)) {
                    fileSystem.delete(this.outputPath, true);
                }
                LOG.info("Clear output dir {} success.", this.outputPath);
            }
        } catch (IOException e) {
            LOG.warn("Clear output dir failed.", e);
        }
    }

    public static void main(String[] strArr) throws ParseException, IOException {
        new SparkBulkLoadTool().runTool(strArr);
    }
}
