/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysml.runtime.instructions.gpu.context;

import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
import jcuda.Pointer;
import jcuda.jcusparse.cusparseHandle;
import jcuda.jcusparse.cusparseMatDescr;
import jcuda.runtime.JCuda;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.caching.CacheException;
import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.instructions.gpu.context.CSRPointer;
import org.apache.sysml.runtime.instructions.gpu.context.ExecutionConfig;
import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
import org.apache.sysml.runtime.matrix.data.LibMatrixCUDA;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.SparseBlock;
import org.apache.sysml.runtime.matrix.data.SparseBlockCOO;
import org.apache.sysml.runtime.matrix.data.SparseBlockCSR;
import org.apache.sysml.runtime.matrix.data.SparseBlockMCSR;
import org.apache.sysml.utils.GPUStatistics;

public class GPUObject {
    private static final Log LOG = LogFactory.getLog((String)GPUObject.class.getName());
    private final GPUContext gpuContext;
    private Pointer jcudaDenseMatrixPtr = null;
    private CSRPointer jcudaSparseMatrixPtr = null;
    protected boolean dirty = false;
    protected LongAdder readLocks = new LongAdder();
    protected boolean writeLock = false;
    AtomicLong timestamp = new AtomicLong();
    protected boolean isSparse = false;
    protected MatrixObject mat = null;

    public Object clone() {
        GPUObject me = this;
        GPUObject that = new GPUObject(me.gpuContext, me.mat);
        that.dirty = me.dirty;
        that.readLocks.reset();
        that.writeLock = false;
        that.timestamp = new AtomicLong(me.timestamp.get());
        that.isSparse = me.isSparse;
        try {
            long rows;
            if (me.jcudaDenseMatrixPtr != null) {
                rows = me.mat.getNumRows();
                long cols = me.mat.getNumColumns();
                long size = rows * cols * (long)LibMatrixCUDA.sizeOfDataType;
                me.gpuContext.ensureFreeSpace((int)size);
                that.jcudaDenseMatrixPtr = this.allocate(size);
                JCuda.cudaMemcpy((Pointer)that.jcudaDenseMatrixPtr, (Pointer)me.jcudaDenseMatrixPtr, (long)size, (int)3);
            }
            if (me.getJcudaSparseMatrixPtr() != null) {
                rows = this.mat.getNumRows();
                that.jcudaSparseMatrixPtr = me.jcudaSparseMatrixPtr.clone((int)rows);
            }
        }
        catch (DMLRuntimeException e) {
            throw new RuntimeException(e);
        }
        return that;
    }

    private Pointer allocate(long size) throws DMLRuntimeException {
        return this.getGPUContext().allocate(size);
    }

    private void cudaFreeHelper(Pointer toFree) throws DMLRuntimeException {
        this.getGPUContext().cudaFreeHelper(toFree);
    }

    private void cudaFreeHelper(String instName, Pointer toFree, boolean eager) throws DMLRuntimeException {
        this.getGPUContext().cudaFreeHelper(instName, toFree, eager);
    }

    private GPUContext getGPUContext() {
        return this.gpuContext;
    }

    public static Pointer transpose(GPUContext gCtx, Pointer densePtr, int m, int n, int lda, int ldc) throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : transpose of block of size [" + m + "," + n + "], GPUContext=" + gCtx));
        }
        Pointer alpha = LibMatrixCUDA.one();
        Pointer beta = LibMatrixCUDA.zero();
        Pointer A = densePtr;
        Pointer C = gCtx.allocate((long)m * GPUObject.getDatatypeSizeOf(n));
        LibMatrixCUDA.cudaSupportFunctions.cublasgeam(gCtx.getCublasHandle(), 1, 1, m, n, alpha, A, lda, beta, new Pointer(), lda, C, ldc);
        return C;
    }

    public static CSRPointer columnMajorDenseToRowMajorSparse(GPUContext gCtx, cusparseHandle cusparseHandle2, Pointer densePtr, int rows, int cols) throws DMLRuntimeException {
        cusparseMatDescr matDescr = CSRPointer.getDefaultCuSparseMatrixDescriptor();
        Pointer nnzPerRowPtr = null;
        Pointer nnzTotalDevHostPtr = null;
        gCtx.ensureFreeSpace(GPUObject.getIntSizeOf(rows + 1));
        nnzPerRowPtr = gCtx.allocate(GPUObject.getIntSizeOf(rows));
        nnzTotalDevHostPtr = gCtx.allocate(GPUObject.getIntSizeOf(1L));
        LibMatrixCUDA.cudaSupportFunctions.cusparsennz(cusparseHandle2, 0, rows, cols, matDescr, densePtr, rows, nnzPerRowPtr, nnzTotalDevHostPtr);
        int[] nnzC = new int[]{-1};
        long t2 = 0L;
        if (DMLScript.STATISTICS) {
            t2 = System.nanoTime();
        }
        JCuda.cudaMemcpy((Pointer)Pointer.to((int[])nnzC), (Pointer)nnzTotalDevHostPtr, (long)GPUObject.getIntSizeOf(1L), (int)2);
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaFromDevTime.add(System.nanoTime() - t2);
        }
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaFromDevCount.add(1L);
        }
        if (nnzC[0] == -1) {
            throw new DMLRuntimeException("cusparseDnnz did not calculate the correct number of nnz from the sparse-matrix vector mulitply on the GPU");
        }
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : col-major dense size[" + rows + "," + cols + "] to row-major sparse of with nnz = " + nnzC[0] + ", GPUContext=" + gCtx));
        }
        CSRPointer C = CSRPointer.allocateEmpty(gCtx, nnzC[0], rows);
        LibMatrixCUDA.cudaSupportFunctions.cusparsedense2csr(cusparseHandle2, rows, cols, matDescr, densePtr, rows, nnzPerRowPtr, C.val, C.rowPtr, C.colInd);
        gCtx.cudaFreeHelper(nnzPerRowPtr);
        gCtx.cudaFreeHelper(nnzTotalDevHostPtr);
        return C;
    }

    public CSRPointer getSparseMatrixCudaPointer() {
        return this.getJcudaSparseMatrixPtr();
    }

    public void setSparseMatrixCudaPointer(CSRPointer sparseMatrixPtr) throws DMLRuntimeException {
        if (this.jcudaSparseMatrixPtr != null) {
            throw new DMLRuntimeException("jcudaSparseMatrixPtr was already allocated for " + this + ", this will cause a memory leak on the GPU");
        }
        this.jcudaSparseMatrixPtr = sparseMatrixPtr;
        this.isSparse = true;
        if (this.getJcudaDenseMatrixPtr() != null) {
            this.cudaFreeHelper(this.getJcudaDenseMatrixPtr());
            this.jcudaDenseMatrixPtr = null;
        }
        this.getGPUContext().recordBlockUsage(this);
    }

    public void setDenseMatrixCudaPointer(Pointer densePtr) throws DMLRuntimeException {
        if (this.jcudaDenseMatrixPtr != null) {
            throw new DMLRuntimeException("jcudaDenseMatrixPtr was already allocated for " + this + ", this will cause a memory leak on the GPU");
        }
        this.jcudaDenseMatrixPtr = densePtr;
        this.isSparse = false;
        if (this.getJcudaSparseMatrixPtr() != null) {
            this.getJcudaSparseMatrixPtr().deallocate();
            this.jcudaSparseMatrixPtr = null;
        }
        this.getGPUContext().recordBlockUsage(this);
    }

    public void denseToSparse() throws DMLRuntimeException {
        cusparseHandle cusparseHandle2;
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : dense -> sparse on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        long t0 = 0L;
        if (DMLScript.STATISTICS) {
            t0 = System.nanoTime();
        }
        if ((cusparseHandle2 = this.getGPUContext().getCusparseHandle()) == null) {
            throw new DMLRuntimeException("Expected cusparse to be initialized");
        }
        int rows = GPUObject.toIntExact(this.mat.getNumRows());
        int cols = GPUObject.toIntExact(this.mat.getNumColumns());
        if (this.getJcudaDenseMatrixPtr() == null || !this.isAllocated()) {
            throw new DMLRuntimeException("Expected allocated dense matrix before denseToSparse() call");
        }
        this.denseRowMajorToColumnMajor();
        this.setSparseMatrixCudaPointer(GPUObject.columnMajorDenseToRowMajorSparse(this.getGPUContext(), cusparseHandle2, this.getJcudaDenseMatrixPtr(), rows, cols));
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaDenseToSparseTime.add(System.nanoTime() - t0);
        }
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaDenseToSparseCount.add(1L);
        }
    }

    public void denseRowMajorToColumnMajor() throws DMLRuntimeException {
        int n;
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : dense Ptr row-major -> col-major on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        int m = GPUObject.toIntExact(this.mat.getNumRows());
        int lda = n = GPUObject.toIntExact(this.mat.getNumColumns());
        int ldc = m;
        if (!this.isAllocated()) {
            throw new DMLRuntimeException("Error in converting row major to column major : data is not allocated");
        }
        Pointer tmp = GPUObject.transpose(this.getGPUContext(), this.getJcudaDenseMatrixPtr(), m, n, lda, ldc);
        this.cudaFreeHelper(this.getJcudaDenseMatrixPtr());
        this.jcudaDenseMatrixPtr = null;
        this.setDenseMatrixCudaPointer(tmp);
    }

    public void denseColumnMajorToRowMajor() throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : dense Ptr row-major -> col-major on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        int n = GPUObject.toIntExact(this.mat.getNumRows());
        int m = GPUObject.toIntExact(this.mat.getNumColumns());
        int lda = n;
        int ldc = m;
        if (!this.isAllocated()) {
            throw new DMLRuntimeException("Error in converting column major to row major : data is not allocated");
        }
        Pointer tmp = GPUObject.transpose(this.getGPUContext(), this.getJcudaDenseMatrixPtr(), m, n, lda, ldc);
        this.cudaFreeHelper(this.getJcudaDenseMatrixPtr());
        this.jcudaDenseMatrixPtr = null;
        this.setDenseMatrixCudaPointer(tmp);
    }

    public void sparseToDense() throws DMLRuntimeException {
        this.sparseToDense(null);
    }

    public void sparseToDense(String instructionName) throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : sparse -> dense on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        long start = 0L;
        long end = 0L;
        if (DMLScript.STATISTICS) {
            start = System.nanoTime();
        }
        if (this.getJcudaSparseMatrixPtr() == null || !this.isAllocated()) {
            throw new DMLRuntimeException("Expected allocated sparse matrix before sparseToDense() call");
        }
        this.sparseToColumnMajorDense();
        this.denseColumnMajorToRowMajor();
        if (DMLScript.STATISTICS) {
            end = System.nanoTime();
        }
        if (instructionName != null && DMLScript.FINEGRAINED_STATISTICS) {
            GPUStatistics.maintainCPMiscTimes(instructionName, "s2d", end - start);
        }
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaSparseToDenseTime.add(end - start);
        }
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaSparseToDenseCount.add(1L);
        }
    }

    public void sparseToColumnMajorDense() throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : sparse -> col-major dense on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        if (this.getJcudaSparseMatrixPtr() == null || !this.isAllocated()) {
            throw new DMLRuntimeException("Expected allocated sparse matrix before sparseToDense() call");
        }
        cusparseHandle cusparseHandle2 = this.getGPUContext().getCusparseHandle();
        if (cusparseHandle2 == null) {
            throw new DMLRuntimeException("Expected cusparse to be initialized");
        }
        int rows = GPUObject.toIntExact(this.mat.getNumRows());
        int cols = GPUObject.toIntExact(this.mat.getNumColumns());
        this.setDenseMatrixCudaPointer(this.getJcudaSparseMatrixPtr().toColumnMajorDenseMatrix(cusparseHandle2, null, rows, cols, null));
    }

    GPUObject(GPUContext gCtx, MatrixObject mat2) {
        this.gpuContext = gCtx;
        this.mat = mat2;
    }

    public boolean isSparse() {
        return this.isSparse;
    }

    private static long getDatatypeSizeOf(long numElems) {
        return numElems * (long)LibMatrixCUDA.sizeOfDataType;
    }

    private static long getIntSizeOf(long numElems) {
        return numElems * 4L;
    }

    public boolean isAllocated() {
        boolean eitherAllocated = this.getJcudaDenseMatrixPtr() != null || this.getJcudaSparseMatrixPtr() != null;
        return eitherAllocated;
    }

    public boolean isInputAllocated() {
        boolean eitherAllocated = this.getJcudaDenseMatrixPtr() != null || this.getJcudaSparseMatrixPtr() != null;
        boolean isAllocatedOnThisGPUContext = this.getGPUContext().isBlockRecorded(this);
        if (eitherAllocated && !isAllocatedOnThisGPUContext) {
            LOG.warn((Object)("GPU : A block was allocated but was not on this GPUContext, GPUContext=" + this.getGPUContext()));
        }
        return eitherAllocated && isAllocatedOnThisGPUContext;
    }

    public void allocateSparseAndEmpty() throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : allocate sparse and empty block on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        this.setSparseMatrixCudaPointer(CSRPointer.allocateEmpty(this.getGPUContext(), 0L, this.mat.getNumRows()));
    }

    public void allocateAndFillDense(double v) throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : allocate and fill dense with value " + v + " on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        long rows = this.mat.getNumRows();
        long cols = this.mat.getNumColumns();
        int numElems = GPUObject.toIntExact(rows * cols);
        long size = GPUObject.getDatatypeSizeOf(numElems);
        this.setDenseMatrixCudaPointer(this.allocate(size));
        if (v != 0.0) {
            this.getGPUContext().getKernels().launchKernel("fill", ExecutionConfig.getConfigForSimpleVectorOperations(numElems), this.getJcudaDenseMatrixPtr(), v, numElems);
        }
    }

    public boolean isSparseAndEmpty() throws DMLRuntimeException {
        boolean isSparseAndAllocated = this.isAllocated() && LibMatrixCUDA.isInSparseFormat(this.getGPUContext(), this.mat);
        boolean isEmptyAndSparseAndAllocated = isSparseAndAllocated && this.getJcudaSparseMatrixPtr().nnz == 0L;
        return isEmptyAndSparseAndAllocated;
    }

    public boolean acquireDeviceRead(String opcode) throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : acquireDeviceRead on " + this));
        }
        boolean transferred = false;
        if (!this.isAllocated()) {
            if (LOG.isTraceEnabled()) {
                LOG.trace((Object)("GPU : in acquireDeviceRead, data is not allocated, copying from host, on " + this + ", GPUContext=" + this.getGPUContext()));
            }
            this.copyFromHostToDevice(opcode);
            transferred = true;
        }
        this.addReadLock();
        if (!this.isAllocated()) {
            throw new DMLRuntimeException("Expected device data to be allocated");
        }
        return transferred;
    }

    public boolean acquireDeviceModifyDense() throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : acquireDeviceModifyDense on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        boolean allocated = false;
        if (!this.isAllocated()) {
            this.mat.setDirty(true);
            if (LOG.isTraceEnabled()) {
                LOG.trace((Object)("GPU : data is not allocated, allocating a dense block, on " + this));
            }
            this.allocateDenseMatrixOnDevice();
            allocated = true;
        }
        this.dirty = true;
        if (!this.isAllocated()) {
            throw new DMLRuntimeException("Expected device data to be allocated");
        }
        return allocated;
    }

    public boolean acquireDeviceModifySparse() throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : acquireDeviceModifySparse on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        boolean allocated = false;
        this.isSparse = true;
        if (!this.isAllocated()) {
            if (LOG.isTraceEnabled()) {
                LOG.trace((Object)("GPU : data is not allocated, allocating a sparse block, on " + this));
            }
            this.mat.setDirty(true);
            this.allocateSparseMatrixOnDevice();
            allocated = true;
        }
        this.dirty = true;
        if (!this.isAllocated()) {
            throw new DMLRuntimeException("Expected device data to be allocated");
        }
        return allocated;
    }

    public boolean acquireHostRead(String instName) throws CacheException {
        boolean copied = false;
        try {
            if (LOG.isTraceEnabled()) {
                LOG.trace((Object)("GPU : acquireDeviceModifySparse on " + this + ", GPUContext=" + this.getGPUContext()));
            }
            if (this.isAllocated() && this.dirty) {
                if (LOG.isTraceEnabled()) {
                    LOG.trace((Object)("GPU : data is dirty on device, copying to host, on " + this + ", GPUContext=" + this.getGPUContext()));
                }
                this.copyFromDeviceToHost(instName, false);
                copied = true;
            }
        }
        catch (DMLRuntimeException e) {
            throw new CacheException(e);
        }
        return copied;
    }

    public boolean isLocked() {
        return this.writeLock || this.readLocks.longValue() > 0L;
    }

    public void addReadLock() throws DMLRuntimeException {
        if (this.writeLock) {
            throw new DMLRuntimeException("Attempting to add a read lock when writeLock=" + this.writeLock);
        }
        this.readLocks.increment();
    }

    public void addWriteLock() throws DMLRuntimeException {
        if (this.readLocks.longValue() > 0L) {
            throw new DMLRuntimeException("Attempting to add a write lock when readLocks=" + this.readLocks.longValue());
        }
        if (this.writeLock) {
            throw new DMLRuntimeException("Attempting to add a write lock when writeLock=" + this.writeLock);
        }
        this.writeLock = true;
    }

    public void releaseReadLock() throws DMLRuntimeException {
        this.readLocks.decrement();
        if (this.readLocks.longValue() < 0L) {
            throw new DMLRuntimeException("Attempting to release a read lock when readLocks=" + this.readLocks.longValue());
        }
    }

    public void releaseWriteLock() throws DMLRuntimeException {
        if (!this.writeLock) {
            throw new DMLRuntimeException("Internal state error : Attempting to release write lock on a GPUObject, which was already released");
        }
        this.writeLock = false;
    }

    public void resetReadWriteLock() {
        this.readLocks.reset();
        this.writeLock = false;
    }

    private void updateReleaseLocks() throws DMLRuntimeException {
        GPUContext.EvictionPolicy evictionPolicy = this.getGPUContext().evictionPolicy;
        switch (evictionPolicy) {
            case LRU: {
                this.timestamp.set(System.nanoTime());
                break;
            }
            case LFU: {
                this.timestamp.addAndGet(1L);
                break;
            }
            case MIN_EVICT: {
                break;
            }
            default: {
                throw new CacheException("The eviction policy is not supported:" + evictionPolicy.name());
            }
        }
    }

    public void releaseInput() throws DMLRuntimeException {
        this.releaseReadLock();
        this.updateReleaseLocks();
        if (!this.isAllocated()) {
            throw new CacheException("Attempting to release an input before allocating it");
        }
    }

    public void releaseOutput() throws DMLRuntimeException {
        this.releaseWriteLock();
        this.updateReleaseLocks();
        this.dirty = true;
        if (!this.isAllocated()) {
            throw new CacheException("Attempting to release an output before allocating it");
        }
    }

    void allocateDenseMatrixOnDevice() throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : allocateDenseMatrixOnDevice, on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        if (this.isAllocated()) {
            throw new DMLRuntimeException("Internal error - trying to allocated dense matrix to a GPUObject that is already allocated");
        }
        long rows = this.mat.getNumRows();
        long cols = this.mat.getNumColumns();
        if (rows <= 0L) {
            throw new DMLRuntimeException("Internal error - invalid number of rows when allocating dense matrix");
        }
        if (cols <= 0L) {
            throw new DMLRuntimeException("Internal error - invalid number of columns when allocating dense matrix;");
        }
        long size = GPUObject.getDatatypeSizeOf(rows * cols);
        Pointer tmp = this.allocate(size);
        this.setDenseMatrixCudaPointer(tmp);
    }

    void allocateSparseMatrixOnDevice() throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : allocateSparseMatrixOnDevice, on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        if (this.isAllocated()) {
            throw new DMLRuntimeException("Internal error - trying to allocated sparse matrix to a GPUObject that is already allocated");
        }
        long rows = this.mat.getNumRows();
        long nnz = this.mat.getNnz();
        if (rows <= 0L) {
            throw new DMLRuntimeException("Internal error - invalid number of rows when allocating sparse matrix");
        }
        if (nnz < 0L) {
            throw new DMLRuntimeException("Internal error - invalid number of non zeroes when allocating a sparse matrix");
        }
        CSRPointer tmp = CSRPointer.allocateEmpty(this.getGPUContext(), nnz, rows);
        this.setSparseMatrixCudaPointer(tmp);
    }

    void deallocateMemoryOnDevice(boolean eager) throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : deallocateMemoryOnDevice, on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        if (this.getJcudaDenseMatrixPtr() != null) {
            this.cudaFreeHelper(null, this.getJcudaDenseMatrixPtr(), eager);
        }
        if (this.getJcudaSparseMatrixPtr() != null) {
            this.getJcudaSparseMatrixPtr().deallocate(eager);
        }
        this.jcudaDenseMatrixPtr = null;
        this.jcudaSparseMatrixPtr = null;
        this.resetReadWriteLock();
        this.getGPUContext().removeRecordedUsage(this);
    }

    protected long getSizeOnDevice() throws DMLRuntimeException {
        long GPUSize = 0L;
        long rlen = this.mat.getNumRows();
        long clen = this.mat.getNumColumns();
        long nnz = this.mat.getNnz();
        GPUSize = LibMatrixCUDA.isInSparseFormat(this.getGPUContext(), this.mat) ? CSRPointer.estimateSize(nnz, rlen) : GPUObject.getDatatypeSizeOf(rlen * clen);
        return GPUSize;
    }

    void copyFromHostToDevice(String opcode) throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : copyFromHostToDevice, on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        long start = 0L;
        if (DMLScript.STATISTICS) {
            start = System.nanoTime();
        }
        long acqrTime = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0L;
        MatrixBlock tmp = (MatrixBlock)this.mat.acquireRead();
        if (DMLScript.FINEGRAINED_STATISTICS) {
            if (tmp.isInSparseFormat()) {
                GPUStatistics.maintainCPMiscTimes(opcode, "aqrs", System.nanoTime() - acqrTime);
            } else {
                GPUStatistics.maintainCPMiscTimes(opcode, "aqrd", System.nanoTime() - acqrTime);
            }
        }
        if (tmp.isInSparseFormat()) {
            int[] rowPtr = null;
            int[] colInd = null;
            double[] values = null;
            if (tmp.getNonZeros() < 0L) {
                tmp.recomputeNonZeros(opcode);
            }
            long nnz = tmp.getNonZeros();
            this.mat.getMatrixCharacteristics().setNonZeros(nnz);
            SparseBlock block = tmp.getSparseBlock();
            boolean copyToDevice = true;
            if (block == null && tmp.getNonZeros() == 0L) {
                copyToDevice = false;
            } else {
                if (block == null && tmp.getNonZeros() != 0L) {
                    throw new DMLRuntimeException("Expected CP sparse block to be not null.");
                }
                SparseBlockCSR csrBlock = null;
                long t0 = 0L;
                if (block instanceof SparseBlockCSR) {
                    csrBlock = (SparseBlockCSR)block;
                } else if (block instanceof SparseBlockCOO) {
                    if (DMLScript.STATISTICS) {
                        t0 = System.nanoTime();
                    }
                    SparseBlockCOO cooBlock = (SparseBlockCOO)block;
                    csrBlock = new SparseBlockCSR(GPUObject.toIntExact(this.mat.getNumRows()), cooBlock.rowIndexes(), cooBlock.indexes(), cooBlock.values());
                    if (DMLScript.STATISTICS) {
                        GPUStatistics.cudaSparseConversionTime.add(System.nanoTime() - t0);
                    }
                    if (DMLScript.STATISTICS) {
                        GPUStatistics.cudaSparseConversionCount.increment();
                    }
                } else if (block instanceof SparseBlockMCSR) {
                    if (DMLScript.STATISTICS) {
                        t0 = System.nanoTime();
                    }
                    SparseBlockMCSR mcsrBlock = (SparseBlockMCSR)block;
                    csrBlock = new SparseBlockCSR(mcsrBlock.getRows(), GPUObject.toIntExact(mcsrBlock.size()));
                    if (DMLScript.STATISTICS) {
                        GPUStatistics.cudaSparseConversionTime.add(System.nanoTime() - t0);
                    }
                    if (DMLScript.STATISTICS) {
                        GPUStatistics.cudaSparseConversionCount.increment();
                    }
                } else {
                    throw new DMLRuntimeException("Unsupported sparse matrix format for CUDA operations");
                }
                rowPtr = csrBlock.rowPointers();
                colInd = csrBlock.indexes();
                values = csrBlock.values();
            }
            this.allocateSparseMatrixOnDevice();
            if (copyToDevice) {
                long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0L;
                CSRPointer.copyToDevice(this.getGPUContext(), this.getJcudaSparseMatrixPtr(), tmp.getNumRows(), tmp.getNonZeros(), rowPtr, colInd, values);
                if (DMLScript.FINEGRAINED_STATISTICS) {
                    GPUStatistics.maintainCPMiscTimes(opcode, "H2D", System.nanoTime() - t1);
                }
            }
        } else {
            double[] data = tmp.getDenseBlock();
            if (data == null && tmp.getSparseBlock() != null) {
                throw new DMLRuntimeException("Incorrect sparsity calculation");
            }
            if (data == null && tmp.getNonZeros() != 0L) {
                throw new DMLRuntimeException("MatrixBlock is not allocated");
            }
            this.allocateDenseMatrixOnDevice();
            if (tmp.getNonZeros() == 0L) {
                long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0L;
                JCuda.cudaMemset((Pointer)this.getJcudaDenseMatrixPtr(), (int)0, (long)GPUObject.getDatatypeSizeOf(this.mat.getNumRows() * this.mat.getNumColumns()));
                if (DMLScript.FINEGRAINED_STATISTICS) {
                    GPUStatistics.maintainCPMiscTimes(opcode, "az", System.nanoTime() - t1);
                }
            } else {
                LibMatrixCUDA.cudaSupportFunctions.hostToDevice(this.getGPUContext(), data, this.getJcudaDenseMatrixPtr(), opcode);
            }
        }
        this.mat.release();
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaToDevTime.add(System.nanoTime() - start);
        }
        if (DMLScript.STATISTICS) {
            GPUStatistics.cudaToDevCount.add(1L);
        }
    }

    public static int toIntExact(long l) throws DMLRuntimeException {
        if (l < Integer.MIN_VALUE || l > Integer.MAX_VALUE) {
            throw new DMLRuntimeException("Cannot be cast to int:" + l);
        }
        return (int)l;
    }

    protected void copyFromDeviceToHost(String instName, boolean isEviction) throws DMLRuntimeException {
        if (LOG.isTraceEnabled()) {
            LOG.trace((Object)("GPU : copyFromDeviceToHost, on " + this + ", GPUContext=" + this.getGPUContext()));
        }
        if (this.getJcudaDenseMatrixPtr() != null && this.getJcudaSparseMatrixPtr() != null) {
            throw new DMLRuntimeException("Invalid state : JCuda dense/sparse pointer are both allocated");
        }
        if (this.getJcudaDenseMatrixPtr() != null) {
            long start = 0L;
            if (DMLScript.STATISTICS) {
                start = System.nanoTime();
            }
            MatrixBlock tmp = new MatrixBlock(GPUObject.toIntExact(this.mat.getNumRows()), GPUObject.toIntExact(this.mat.getNumColumns()), false);
            tmp.allocateDenseBlock();
            LibMatrixCUDA.cudaSupportFunctions.deviceToHost(this.getGPUContext(), this.getJcudaDenseMatrixPtr(), tmp.getDenseBlock(), instName, isEviction);
            tmp.recomputeNonZeros();
            this.mat.acquireModify(tmp);
            this.mat.release();
            if (DMLScript.STATISTICS) {
                GPUStatistics.cudaFromDevTime.add(System.nanoTime() - start);
            }
            if (DMLScript.STATISTICS) {
                GPUStatistics.cudaFromDevCount.add(1L);
            }
        } else if (this.getJcudaSparseMatrixPtr() != null) {
            if (!LibMatrixCUDA.isInSparseFormat(this.getGPUContext(), this.mat)) {
                throw new DMLRuntimeException("Block not in sparse format on host yet the device sparse matrix pointer is not null");
            }
            if (this.isSparseAndEmpty()) {
                MatrixBlock tmp = new MatrixBlock((int)this.mat.getNumRows(), (int)this.mat.getNumColumns(), 0L);
                this.mat.acquireModify(tmp);
                this.mat.release();
            } else {
                long start = 0L;
                if (DMLScript.STATISTICS) {
                    start = System.nanoTime();
                }
                int rows = GPUObject.toIntExact(this.mat.getNumRows());
                int cols = GPUObject.toIntExact(this.mat.getNumColumns());
                int nnz = GPUObject.toIntExact(this.getJcudaSparseMatrixPtr().nnz);
                double[] values = new double[nnz];
                LibMatrixCUDA.cudaSupportFunctions.deviceToHost(this.getGPUContext(), this.getJcudaSparseMatrixPtr().val, values, instName, isEviction);
                int[] rowPtr = new int[rows + 1];
                int[] colInd = new int[nnz];
                long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0L;
                CSRPointer.copyPtrToHost(this.getJcudaSparseMatrixPtr(), rows, nnz, rowPtr, colInd);
                if (DMLScript.STATISTICS) {
                    GPUStatistics.cudaFromDevTime.add(System.nanoTime() - t0);
                }
                if (DMLScript.STATISTICS) {
                    GPUStatistics.cudaFromDevCount.add(3L);
                }
                SparseBlockCSR sparseBlock = new SparseBlockCSR(rowPtr, colInd, values, nnz);
                MatrixBlock tmp = new MatrixBlock(rows, cols, nnz, sparseBlock);
                this.mat.acquireModify(tmp);
                this.mat.release();
                if (DMLScript.STATISTICS) {
                    GPUStatistics.cudaFromDevTime.add(System.nanoTime() - start);
                }
                if (DMLScript.STATISTICS) {
                    GPUStatistics.cudaFromDevCount.add(1L);
                }
            }
        } else {
            throw new DMLRuntimeException("Cannot copy from device to host as JCuda dense/sparse pointer is not allocated");
        }
        this.dirty = false;
    }

    public void clearData() throws DMLRuntimeException {
        this.clearData(DMLScript.EAGER_CUDA_FREE);
    }

    public void clearData(boolean eager) throws DMLRuntimeException {
        this.deallocateMemoryOnDevice(eager);
        this.getGPUContext().removeRecordedUsage(this);
    }

    public Pointer getJcudaDenseMatrixPtr() {
        return this.jcudaDenseMatrixPtr;
    }

    public CSRPointer getJcudaSparseMatrixPtr() {
        return this.jcudaSparseMatrixPtr;
    }

    public boolean isDirty() {
        return this.dirty;
    }

    public String toString() {
        StringBuilder sb = new StringBuilder("GPUObject{");
        sb.append(", dirty=").append(this.dirty);
        sb.append(", readLocks=").append(this.readLocks.longValue());
        sb.append(", writeLock=").append(this.writeLock);
        sb.append(", sparse? ").append(this.isSparse);
        sb.append(", dims=[").append(this.mat.getNumRows()).append(",").append(this.mat.getNumColumns()).append("]");
        sb.append('}');
        return sb.toString();
    }
}

