/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysml.runtime.codegen;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.codegen.SpoofOperator;
import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
import org.apache.sysml.runtime.instructions.cp.DoubleObject;
import org.apache.sysml.runtime.instructions.cp.ScalarObject;
import org.apache.sysml.runtime.matrix.data.IJV;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.SparseBlock;
import org.apache.sysml.runtime.util.UtilFunctions;

public abstract class SpoofOuterProduct
extends SpoofOperator {
    private static final long serialVersionUID = 2948612259863710279L;
    private static final int L2_CACHESIZE = 262144;
    protected OutProdType _outerProductType;

    public SpoofOuterProduct(OutProdType type) {
        this.setOuterProdType(type);
    }

    public void setOuterProdType(OutProdType type) {
        this._outerProductType = type;
    }

    public OutProdType getOuterProdType() {
        return this._outerProductType;
    }

    @Override
    public String getSpoofType() {
        return "OP" + this.getClass().getName().split("\\.")[1];
    }

    @Override
    public ScalarObject execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects) throws DMLRuntimeException {
        if (inputs == null || inputs.size() < 3) {
            throw new RuntimeException("Invalid input arguments.");
        }
        if (inputs.get(0).isEmptyBlock(false)) {
            return new DoubleObject(0.0);
        }
        double[][] ab = SpoofOuterProduct.getDenseMatrices(this.prepInputMatrices(inputs, 1, 2, true, false));
        double[][] b = SpoofOuterProduct.getDenseMatrices(this.prepInputMatrices(inputs, 3, true));
        double[] scalars = SpoofOuterProduct.prepInputScalars(scalarObjects);
        int m = inputs.get(0).getNumRows();
        int n = inputs.get(0).getNumColumns();
        int k = inputs.get(1).getNumColumns();
        MatrixBlock a = inputs.get(0);
        MatrixBlock out = new MatrixBlock(1, 1, false);
        out.allocateDenseBlock();
        if (a instanceof CompressedMatrixBlock) {
            this.executeCellwiseCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, out, m, n, k, this._outerProductType, 0, m, 0, n);
        } else if (!a.isInSparseFormat()) {
            this.executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, this._outerProductType, 0, m, 0, n);
        } else {
            this.executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, m, n, k, a.getNonZeros(), this._outerProductType, 0, m, 0, n);
        }
        return new DoubleObject(out.getDenseBlock()[0]);
    }

    @Override
    public ScalarObject execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, int numThreads) throws DMLRuntimeException {
        if (inputs == null || inputs.size() < 3) {
            throw new RuntimeException("Invalid input arguments.");
        }
        if (inputs.get(0).isEmptyBlock(false)) {
            return new DoubleObject(0.0);
        }
        if (2L * inputs.get(0).getNonZeros() * (long)inputs.get(1).getNumColumns() < 0x200000L) {
            return this.execute(inputs, scalarObjects);
        }
        double[][] ab = SpoofOuterProduct.getDenseMatrices(this.prepInputMatrices(inputs, 1, 2, true, false));
        double[][] b = SpoofOuterProduct.getDenseMatrices(this.prepInputMatrices(inputs, 3, true));
        double[] scalars = SpoofOuterProduct.prepInputScalars(scalarObjects);
        int m = inputs.get(0).getNumRows();
        int n = inputs.get(0).getNumColumns();
        int k = inputs.get(1).getNumColumns();
        long nnz = inputs.get(0).getNonZeros();
        double sum = 0.0;
        try {
            ExecutorService pool = Executors.newFixedThreadPool(k);
            ArrayList<ParOuterProdAggTask> tasks = new ArrayList<ParOuterProdAggTask>();
            int numThreads2 = SpoofOuterProduct.getPreferredNumberOfTasks(m, n, nnz, k, numThreads);
            int blklen = (int)Math.ceil((double)m / (double)numThreads2);
            int i = 0;
            while (i < numThreads2 & i * blklen < m) {
                tasks.add(new ParOuterProdAggTask(inputs.get(0), ab[0], ab[1], b, scalars, m, n, k, this._outerProductType, i * blklen, Math.min((i + 1) * blklen, m), 0, n));
                ++i;
            }
            List taskret = pool.invokeAll(tasks);
            pool.shutdown();
            for (Future task : taskret) {
                sum += ((Double)task.get()).doubleValue();
            }
        }
        catch (Exception e) {
            throw new DMLRuntimeException(e);
        }
        return new DoubleObject(sum);
    }

    @Override
    public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out) throws DMLRuntimeException {
        if (inputs == null || inputs.size() < 3 || out == null) {
            throw new RuntimeException("Invalid input arguments.");
        }
        if (this._outerProductType == OutProdType.LEFT_OUTER_PRODUCT && inputs.get(1).isEmptyBlock(false) || this._outerProductType == OutProdType.RIGHT_OUTER_PRODUCT && inputs.get(2).isEmptyBlock(false) || inputs.get(0).isEmptyBlock(false)) {
            out.examSparsity();
            return out;
        }
        if (this._outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT) {
            out.reset(inputs.get(0).getNumRows(), inputs.get(0).getNumColumns(), inputs.get(0).isInSparseFormat());
        } else if (this._outerProductType == OutProdType.LEFT_OUTER_PRODUCT) {
            out.reset(inputs.get(0).getNumColumns(), inputs.get(1).getNumColumns(), false);
        } else if (this._outerProductType == OutProdType.RIGHT_OUTER_PRODUCT) {
            out.reset(inputs.get(0).getNumRows(), inputs.get(1).getNumColumns(), false);
        }
        if (inputs.get(0).isEmptyBlock(false)) {
            return out;
        }
        out.allocateDenseOrSparseBlock();
        double[][] ab = SpoofOuterProduct.getDenseMatrices(this.prepInputMatrices(inputs, 1, 2, true, false));
        double[][] b = SpoofOuterProduct.getDenseMatrices(this.prepInputMatrices(inputs, 3, true));
        double[] scalars = SpoofOuterProduct.prepInputScalars(scalarObjects);
        int m = inputs.get(0).getNumRows();
        int n = inputs.get(0).getNumColumns();
        int k = inputs.get(1).getNumColumns();
        MatrixBlock a = inputs.get(0);
        switch (this._outerProductType) {
            case LEFT_OUTER_PRODUCT: 
            case RIGHT_OUTER_PRODUCT: {
                if (a instanceof CompressedMatrixBlock) {
                    this.executeCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, this._outerProductType, 0, m, 0, ((CompressedMatrixBlock)a).getNumColGroups());
                    break;
                }
                if (!a.isInSparseFormat()) {
                    this.executeDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, this._outerProductType, 0, m, 0, n);
                    break;
                }
                this.executeSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, a.getNonZeros(), this._outerProductType, 0, m, 0, n);
                break;
            }
            case CELLWISE_OUTER_PRODUCT: {
                if (a instanceof CompressedMatrixBlock) {
                    this.executeCellwiseCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, out, m, n, k, this._outerProductType, 0, m, 0, n);
                    break;
                }
                if (!a.isInSparseFormat()) {
                    this.executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, this._outerProductType, 0, m, 0, n);
                    break;
                }
                this.executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, m, n, k, a.getNonZeros(), this._outerProductType, 0, m, 0, n);
                break;
            }
            case AGG_OUTER_PRODUCT: {
                throw new DMLRuntimeException("Wrong codepath for aggregate outer product.");
            }
        }
        if (a instanceof CompressedMatrixBlock && out.isInSparseFormat() && this._outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT) {
            out.sortSparseRows();
        }
        out.recomputeNonZeros();
        out.examSparsity();
        return out;
    }

    @Override
    public MatrixBlock execute(ArrayList<MatrixBlock> inputs, ArrayList<ScalarObject> scalarObjects, MatrixBlock out, int numThreads) throws DMLRuntimeException {
        if (inputs == null || inputs.size() < 3 || out == null) {
            throw new RuntimeException("Invalid input arguments.");
        }
        if (this._outerProductType == OutProdType.LEFT_OUTER_PRODUCT && inputs.get(1).isEmptyBlock(false) || this._outerProductType == OutProdType.RIGHT_OUTER_PRODUCT && inputs.get(2).isEmptyBlock(false) || inputs.get(0).isEmptyBlock(false)) {
            out.examSparsity();
            return out;
        }
        if (this._outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT) {
            out.reset(inputs.get(0).getNumRows(), inputs.get(0).getNumColumns(), inputs.get(0).isInSparseFormat());
            out.allocateDenseOrSparseBlock();
        } else {
            if (this._outerProductType == OutProdType.LEFT_OUTER_PRODUCT) {
                out.reset(inputs.get(0).getNumColumns(), inputs.get(1).getNumColumns(), false);
            } else if (this._outerProductType == OutProdType.RIGHT_OUTER_PRODUCT) {
                out.reset(inputs.get(0).getNumRows(), inputs.get(1).getNumColumns(), false);
            }
            out.allocateDenseBlock();
        }
        if (2L * inputs.get(0).getNonZeros() * (long)inputs.get(1).getNumColumns() < 0x200000L) {
            return this.execute(inputs, scalarObjects, out);
        }
        double[][] ab = SpoofOuterProduct.getDenseMatrices(this.prepInputMatrices(inputs, 1, 2, true, false));
        double[][] b = SpoofOuterProduct.getDenseMatrices(this.prepInputMatrices(inputs, 3, true));
        double[] scalars = SpoofOuterProduct.prepInputScalars(scalarObjects);
        int m = inputs.get(0).getNumRows();
        int n = inputs.get(0).getNumColumns();
        int k = inputs.get(1).getNumColumns();
        long nnz = inputs.get(0).getNonZeros();
        MatrixBlock a = inputs.get(0);
        try {
            int blklen;
            ExecutorService pool = Executors.newFixedThreadPool(numThreads);
            ArrayList<ParExecTask> tasks = new ArrayList<ParExecTask>();
            if (this._outerProductType == OutProdType.LEFT_OUTER_PRODUCT) {
                if (a instanceof CompressedMatrixBlock) {
                    int numCG = ((CompressedMatrixBlock)a).getNumColGroups();
                    blklen = (int)Math.ceil((double)numCG / (double)numThreads);
                    int j = 0;
                    while (j < numThreads & j * blklen < numCG) {
                        tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, this._outerProductType, 0, m, j * blklen, Math.min((j + 1) * blklen, numCG)));
                        ++j;
                    }
                } else {
                    int blklen2 = (int)Math.ceil((double)n / (double)numThreads);
                    int j = 0;
                    while (j < numThreads & j * blklen2 < n) {
                        tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, this._outerProductType, 0, m, j * blklen2, Math.min((j + 1) * blklen2, n)));
                        ++j;
                    }
                }
            } else {
                int numThreads2 = SpoofOuterProduct.getPreferredNumberOfTasks(m, n, nnz, k, numThreads);
                blklen = (int)Math.ceil((double)m / (double)numThreads2);
                int i = 0;
                while (i < numThreads2 & i * blklen < m) {
                    tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, this._outerProductType, i * blklen, Math.min((i + 1) * blklen, m), 0, n));
                    ++i;
                }
            }
            List taskret = pool.invokeAll(tasks);
            pool.shutdown();
            for (Future task : taskret) {
                out.setNonZeros(out.getNonZeros() + (Long)task.get());
            }
        }
        catch (Exception e) {
            throw new DMLRuntimeException(e);
        }
        if (a instanceof CompressedMatrixBlock) {
            if (out.isInSparseFormat() && this._outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT) {
                out.sortSparseRows();
            } else if (this._outerProductType == OutProdType.LEFT_OUTER_PRODUCT) {
                out.recomputeNonZeros();
            }
        }
        out.examSparsity();
        return out;
    }

    private static int getPreferredNumberOfTasks(int m, int n, long nnz, int rank, int k) {
        int base = (int)Math.min((double)Math.min(8 * k, m / 32), Math.ceil(2.0 * (double)nnz * (double)rank / 2097152.0));
        return UtilFunctions.roundToNext(base, k);
    }

    private void executeDense(double[] a, double[] u, double[] v, double[][] b, double[] scalars, double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu) {
        int blocksizeIJ = 16;
        int cix = 0;
        for (int bi = rl; bi < ru; bi += 16) {
            int bimin = Math.min(ru, bi + 16);
            for (int bj = cl; bj < cu; bj += 16) {
                int bjmin = Math.min(cu, bj + 16);
                int i = bi;
                int ix = bi * n;
                int uix = bi * k;
                while (i < bimin) {
                    int j = bj;
                    int vix = bj * k;
                    while (j < bjmin) {
                        if (a[ix + j] != 0.0) {
                            cix = type == OutProdType.LEFT_OUTER_PRODUCT ? vix : uix;
                            this.genexecDense(a[ix + j], u, uix, v, vix, b, scalars, c, cix, m, n, k, i, j);
                        }
                        ++j;
                        vix += k;
                    }
                    ++i;
                    ix += n;
                    uix += k;
                }
            }
        }
    }

    private void executeCellwiseDense(double[] a, double[] u, double[] v, double[][] b, double[] scalars, double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu) {
        int blocksizeIJ = 16;
        for (int bi = rl; bi < ru; bi += 16) {
            int bimin = Math.min(ru, bi + 16);
            for (int bj = cl; bj < cu; bj += 16) {
                int bjmin = Math.min(cu, bj + 16);
                int i = bi;
                int ix = bi * n;
                int uix = bi * k;
                while (i < bimin) {
                    int j = bj;
                    int vix = bj * k;
                    while (j < bjmin) {
                        if (a[ix + j] != 0.0) {
                            if (type == OutProdType.CELLWISE_OUTER_PRODUCT) {
                                c[ix + j] = this.genexecCellwise(a[ix + j], u, uix, v, vix, b, scalars, m, n, k, i, j);
                            } else {
                                c[0] = c[0] + this.genexecCellwise(a[ix + j], u, uix, v, vix, b, scalars, m, n, k, i, j);
                            }
                        }
                        ++j;
                        vix += k;
                    }
                    ++i;
                    ix += n;
                    uix += k;
                }
            }
        }
    }

    private void executeSparse(SparseBlock sblock, double[] u, double[] v, double[][] b, double[] scalars, double[] c, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu) {
        boolean left = this._outerProductType == OutProdType.LEFT_OUTER_PRODUCT;
        int blocksizeI = (int)(8L * (long)m * (long)n / nnz);
        int blocksizeJ = left ? Math.max(8, Math.min(262144 / (k * 8), blocksizeI)) : blocksizeI;
        int[] curk = new int[Math.min(blocksizeI, ru - rl)];
        for (int bi = rl; bi < ru; bi += blocksizeI) {
            int bimin = Math.min(ru, bi + blocksizeI);
            for (int i = bi; i < bimin; ++i) {
                int index = cl == 0 || sblock.isEmpty(i) ? 0 : sblock.posFIndexGTE(i, cl);
                curk[i - bi] = index >= 0 ? index : n;
            }
            for (int bj = cl; bj < cu; bj += blocksizeJ) {
                int bjmin = Math.min(cu, bj + blocksizeJ);
                int i = bi;
                int uix = bi * k;
                while (i < bimin) {
                    if (!sblock.isEmpty(i)) {
                        int index;
                        int wpos = sblock.pos(i);
                        int wlen = sblock.size(i);
                        int[] wix = sblock.indexes(i);
                        double[] wval = sblock.values(i);
                        for (index = wpos + curk[i - bi]; index < wpos + wlen && wix[index] < bjmin; ++index) {
                            this.genexecDense(wval[index], u, uix, v, wix[index] * k, b, scalars, c, left ? wix[index] * k : uix, m, n, k, i, wix[index]);
                        }
                        curk[i - bi] = index - wpos;
                    }
                    ++i;
                    uix += k;
                }
            }
        }
    }

    private void executeCellwiseSparse(SparseBlock sblock, double[] u, double[] v, double[][] b, double[] scalars, MatrixBlock out, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu) {
        int blocksizeIJ = (int)(8L * (long)m * (long)n / nnz);
        int[] curk = new int[Math.min(blocksizeIJ, ru - rl)];
        if (!out.isInSparseFormat()) {
            double[] c = out.getDenseBlock();
            double tmp = 0.0;
            for (int bi = rl; bi < ru; bi += blocksizeIJ) {
                int bimin = Math.min(ru, bi + blocksizeIJ);
                Arrays.fill(curk, 0);
                for (int bj = 0; bj < n; bj += blocksizeIJ) {
                    int bjmin = Math.min(n, bj + blocksizeIJ);
                    int i = bi;
                    int uix = bi * k;
                    while (i < bimin) {
                        if (!sblock.isEmpty(i)) {
                            int index;
                            int wpos = sblock.pos(i);
                            int wlen = sblock.size(i);
                            int[] wix = sblock.indexes(i);
                            double[] wval = sblock.values(i);
                            if (type == OutProdType.CELLWISE_OUTER_PRODUCT) {
                                for (index = wpos + curk[i - bi]; index < wpos + wlen && wix[index] < bjmin; ++index) {
                                    c[wix[index]] = this.genexecCellwise(wval[index], u, uix, v, wix[index] * k, b, scalars, m, n, k, i, wix[index]);
                                }
                            } else {
                                while (index < wpos + wlen && wix[index] < bjmin) {
                                    tmp += this.genexecCellwise(wval[index], u, uix, v, wix[index] * k, b, scalars, m, n, k, i, wix[index]);
                                    ++index;
                                }
                            }
                            curk[i - bi] = index - wpos;
                        }
                        ++i;
                        uix += k;
                    }
                }
            }
            if (type != OutProdType.CELLWISE_OUTER_PRODUCT) {
                c[0] = tmp;
            }
        } else {
            SparseBlock c = out.getSparseBlock();
            for (int bi = rl; bi < ru; bi += blocksizeIJ) {
                int bimin = Math.min(ru, bi + blocksizeIJ);
                Arrays.fill(curk, 0);
                for (int bj = 0; bj < n; bj += blocksizeIJ) {
                    int bjmin = Math.min(n, bj + blocksizeIJ);
                    int i = bi;
                    int uix = bi * k;
                    while (i < bimin) {
                        if (!sblock.isEmpty(i)) {
                            int index;
                            int wpos = sblock.pos(i);
                            int wlen = sblock.size(i);
                            int[] wix = sblock.indexes(i);
                            double[] wval = sblock.values(i);
                            for (index = wpos + curk[i - bi]; index < wpos + wlen && wix[index] < bjmin; ++index) {
                                c.append(i, wix[index], this.genexecCellwise(wval[index], u, uix, v, wix[index] * k, b, scalars, m, n, k, i, wix[index]));
                            }
                            curk[i - bi] = index - wpos;
                        }
                        ++i;
                        uix += k;
                    }
                }
            }
        }
    }

    private void executeCompressed(CompressedMatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars, double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu) {
        Iterator<IJV> iter;
        boolean left = this._outerProductType == OutProdType.LEFT_OUTER_PRODUCT;
        Iterator<IJV> iterator = iter = !left ? a.getIterator(rl, ru, false) : a.getIterator(rl, ru, cl, cu, false);
        while (iter.hasNext()) {
            IJV cell = iter.next();
            int uix = cell.getI() * k;
            int vix = cell.getJ() * k;
            this.genexecDense(cell.getV(), u, uix, v, vix, b, scalars, c, left ? vix : uix, m, n, k, cell.getI(), cell.getJ());
        }
    }

    private void executeCellwiseCompressed(CompressedMatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars, MatrixBlock out, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu) {
        double[] c = out.getDenseBlock();
        SparseBlock csblock = out.getSparseBlock();
        Iterator<IJV> iter = a.getIterator(rl, ru, false);
        while (iter.hasNext()) {
            IJV cell = iter.next();
            int uix = cell.getI() * k;
            int vix = cell.getJ() * k;
            if (type == OutProdType.CELLWISE_OUTER_PRODUCT) {
                if (out.isInSparseFormat()) {
                    csblock.allocate(cell.getI());
                    csblock.append(cell.getI(), cell.getJ(), this.genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ()));
                    continue;
                }
                c[cell.getI() * n + cell.getJ()] = this.genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ());
                continue;
            }
            c[0] = c[0] + this.genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ());
        }
    }

    protected abstract void genexecDense(double var1, double[] var3, int var4, double[] var5, int var6, double[][] var7, double[] var8, double[] var9, int var10, int var11, int var12, int var13, int var14, int var15);

    protected abstract double genexecCellwise(double var1, double[] var3, int var4, double[] var5, int var6, double[][] var7, double[] var8, int var9, int var10, int var11, int var12, int var13);

    private class ParOuterProdAggTask
    implements Callable<Double> {
        private final MatrixBlock _a;
        private final double[] _u;
        private final double[] _v;
        private final double[][] _b;
        private final double[] _scalars;
        private final int _rlen;
        private final int _clen;
        private final int _k;
        private final OutProdType _type;
        private final int _rl;
        private final int _ru;
        private final int _cl;
        private final int _cu;

        protected ParOuterProdAggTask(MatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu) {
            this._a = a;
            this._u = u;
            this._v = v;
            this._b = b;
            this._scalars = scalars;
            this._rlen = m;
            this._clen = n;
            this._k = k;
            this._type = type;
            this._rl = rl;
            this._ru = ru;
            this._cl = cl;
            this._cu = cu;
        }

        @Override
        public Double call() throws DMLRuntimeException {
            MatrixBlock out = new MatrixBlock(1, 1, false);
            out.allocateDenseBlock();
            if (this._a instanceof CompressedMatrixBlock) {
                SpoofOuterProduct.this.executeCellwiseCompressed((CompressedMatrixBlock)this._a, this._u, this._v, this._b, this._scalars, out, this._rlen, this._clen, this._k, this._type, this._rl, this._ru, this._cl, this._cu);
            } else if (!this._a.isInSparseFormat()) {
                SpoofOuterProduct.this.executeCellwiseDense(this._a.getDenseBlock(), this._u, this._v, this._b, this._scalars, out.getDenseBlock(), this._rlen, this._clen, this._k, this._type, this._rl, this._ru, this._cl, this._cu);
            } else {
                SpoofOuterProduct.this.executeCellwiseSparse(this._a.getSparseBlock(), this._u, this._v, this._b, this._scalars, out, this._rlen, this._clen, this._k, this._a.getNonZeros(), this._type, this._rl, this._ru, this._cl, this._cu);
            }
            return out.getDenseBlock()[0];
        }
    }

    private class ParExecTask
    implements Callable<Long> {
        private final MatrixBlock _a;
        private final double[] _u;
        private final double[] _v;
        private final double[][] _b;
        private final double[] _scalars;
        private final MatrixBlock _c;
        private final int _clen;
        private final int _rlen;
        private final int _k;
        private final OutProdType _type;
        private final int _rl;
        private final int _ru;
        private final int _cl;
        private final int _cu;

        protected ParExecTask(MatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars, MatrixBlock c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu) {
            this._a = a;
            this._u = u;
            this._v = v;
            this._b = b;
            this._c = c;
            this._scalars = scalars;
            this._rlen = m;
            this._clen = n;
            this._k = k;
            this._type = type;
            this._rl = rl;
            this._ru = ru;
            this._cl = cl;
            this._cu = cu;
        }

        @Override
        public Long call() throws DMLRuntimeException {
            switch (this._type) {
                case LEFT_OUTER_PRODUCT: 
                case RIGHT_OUTER_PRODUCT: {
                    if (this._a instanceof CompressedMatrixBlock) {
                        SpoofOuterProduct.this.executeCompressed((CompressedMatrixBlock)this._a, this._u, this._v, this._b, this._scalars, this._c.getDenseBlock(), this._rlen, this._clen, this._k, this._type, this._rl, this._ru, this._cl, this._cu);
                        break;
                    }
                    if (!this._a.isInSparseFormat()) {
                        SpoofOuterProduct.this.executeDense(this._a.getDenseBlock(), this._u, this._v, this._b, this._scalars, this._c.getDenseBlock(), this._rlen, this._clen, this._k, this._type, this._rl, this._ru, this._cl, this._cu);
                        break;
                    }
                    SpoofOuterProduct.this.executeSparse(this._a.getSparseBlock(), this._u, this._v, this._b, this._scalars, this._c.getDenseBlock(), this._rlen, this._clen, this._k, this._a.getNonZeros(), this._type, this._rl, this._ru, this._cl, this._cu);
                    break;
                }
                case CELLWISE_OUTER_PRODUCT: {
                    if (this._a instanceof CompressedMatrixBlock) {
                        SpoofOuterProduct.this.executeCellwiseCompressed((CompressedMatrixBlock)this._a, this._u, this._v, this._b, this._scalars, this._c, this._rlen, this._clen, this._k, this._type, this._rl, this._ru, this._cl, this._cu);
                        break;
                    }
                    if (!this._c.isInSparseFormat()) {
                        SpoofOuterProduct.this.executeCellwiseDense(this._a.getDenseBlock(), this._u, this._v, this._b, this._scalars, this._c.getDenseBlock(), this._rlen, this._clen, this._k, this._type, this._rl, this._ru, this._cl, this._cu);
                        break;
                    }
                    SpoofOuterProduct.this.executeCellwiseSparse(this._a.getSparseBlock(), this._u, this._v, this._b, this._scalars, this._c, this._rlen, this._clen, this._k, this._a.getNonZeros(), this._type, this._rl, this._ru, this._cl, this._cu);
                    break;
                }
                case AGG_OUTER_PRODUCT: {
                    throw new DMLRuntimeException("Wrong codepath for aggregate outer product.");
                }
            }
            boolean left = SpoofOuterProduct.this._outerProductType == OutProdType.LEFT_OUTER_PRODUCT;
            int rl = left ? this._cl : this._rl;
            int ru = left ? this._cu : this._ru;
            return this._a instanceof CompressedMatrixBlock && left ? -1L : this._c.recomputeNonZeros(rl, ru - 1, 0, this._c.getNumColumns() - 1);
        }
    }

    public static enum OutProdType {
        LEFT_OUTER_PRODUCT,
        RIGHT_OUTER_PRODUCT,
        CELLWISE_OUTER_PRODUCT,
        AGG_OUTER_PRODUCT;

    }
}

