/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysml.runtime.matrix;

import java.util.HashMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.conf.DMLConfig;
import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import org.apache.sysml.runtime.instructions.MRInstructionParser;
import org.apache.sysml.runtime.instructions.MRJobInstruction;
import org.apache.sysml.runtime.instructions.mr.CSVWriteInstruction;
import org.apache.sysml.runtime.matrix.JobReturn;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.InputInfo;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.data.TaggedFirstSecondIndexes;
import org.apache.sysml.runtime.matrix.mapred.CSVWriteMapper;
import org.apache.sysml.runtime.matrix.mapred.CSVWriteReducer;
import org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration;
import org.apache.sysml.yarn.ropt.YarnClusterAnalyzer;

public class WriteCSVMR {
    private static final Log LOG = LogFactory.getLog((String)WriteCSVMR.class.getName());

    private WriteCSVMR() {
    }

    public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String csvWriteInstructions, int numReducers, int replication, byte[] resultIndexes, String[] outputs) throws Exception {
        CSVWriteInstruction[] ins;
        JobConf job = new JobConf(WriteCSVMR.class);
        job.setJobName("WriteCSV-MR");
        byte[] realIndexes = new byte[inputs.length];
        for (int b = 0; b < realIndexes.length; b = (int)((byte)(b + 1))) {
            realIndexes[b] = b;
        }
        MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, true, MRJobConfiguration.ConvertTarget.CSVWRITE);
        MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens);
        MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens);
        MRJobConfiguration.setCSVWriteInstructions(job, csvWriteInstructions);
        job.setInt("dfs.replication", replication);
        MRJobConfiguration.addBinaryBlockSerializationFramework((Configuration)job);
        DMLConfig config = ConfigurationManager.getDMLConfig();
        MRJobConfiguration.setupCustomMRConfigurations(job, config);
        long maxRlen = 0L;
        for (long rlen : rlens) {
            if (rlen <= maxRlen) continue;
            maxRlen = rlen;
        }
        int numRed = WriteCSVMR.determineNumReducers(rlens, clens, config.getIntValue("numreducers"), (int)maxRlen);
        job.setNumReduceTasks(numRed);
        byte[] resultDimsUnknown = new byte[resultIndexes.length];
        MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
        OutputInfo[] outputInfos = new OutputInfo[outputs.length];
        HashMap<Byte, Integer> indexmap = new HashMap<Byte, Integer>();
        for (int i = 0; i < stats.length; ++i) {
            indexmap.put(resultIndexes[i], i);
            resultDimsUnknown[i] = 0;
            stats[i] = new MatrixCharacteristics();
            outputInfos[i] = OutputInfo.CSVOutputInfo;
        }
        for (CSVWriteInstruction in : ins = MRInstructionParser.parseCSVWriteInstructions(csvWriteInstructions)) {
            stats[(Integer)indexmap.get(in.output)].set(rlens[in.input], clens[in.input], -1, -1);
        }
        if (LOG.isTraceEnabled()) {
            inst.printCompleteMRJobInstruction(stats);
        }
        MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, "", "", csvWriteInstructions, resultIndexes);
        MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true, true);
        job.setMapperClass(CSVWriteMapper.class);
        job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class);
        job.setMapOutputValueClass(MatrixBlock.class);
        job.setReducerClass(CSVWriteReducer.class);
        job.setOutputKeyComparatorClass(TaggedFirstSecondIndexes.Comparator.class);
        job.setPartitionerClass(TaggedFirstSecondIndexes.FirstIndexRangePartitioner.class);
        MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length];
        for (int i = 0; i < inputs.length; ++i) {
            inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]);
        }
        MRJobConfiguration.setUniqueWorkingDir(job);
        RunningJob runjob = JobClient.runJob((JobConf)job);
        Counters.Group group = runjob.getCounters().getGroup("nonzeros");
        for (int i = 0; i < resultIndexes.length; ++i) {
            stats[i].setNonZeros(group.getCounter(Integer.toString(i)));
        }
        return new JobReturn(stats, outputInfos, runjob.isSuccessful());
    }

    public static int determineNumReducers(long[] rlen, long[] clen, int defaultNumRed, long numRedGroups) {
        int ret = defaultNumRed;
        long maxNumRed = InfrastructureAnalyzer.getRemoteParallelReduceTasks();
        long blockSize = InfrastructureAnalyzer.getHDFSBlockSize() / 0x100000L;
        long maxSize = -1L;
        for (int i = 0; i < rlen.length; ++i) {
            long tmp = MatrixBlock.estimateSizeOnDisk(rlen[i], clen[i], rlen[i] * clen[i]) / 0x100000L;
            maxSize = Math.max(maxSize, tmp);
        }
        if (InfrastructureAnalyzer.isYarnEnabled()) {
            maxNumRed = Math.max(maxNumRed, YarnClusterAnalyzer.getNumCores() / 2L);
        }
        ret = (int)Math.max((long)ret, Math.min(maxSize / blockSize, maxNumRed));
        ret = (int)Math.min((long)ret, numRedGroups);
        ret = Math.max(ret, 1);
        return ret;
    }
}

