Search in sources :

Example 41 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class RandSPInstruction method generateRandData.

private void generateRandData(SparkExecutionContext sec) {
    long lrows = sec.getScalarInput(rows).getLongValue();
    long lcols = sec.getScalarInput(cols).getLongValue();
    // step 1: generate pseudo-random seed (because not specified)
    // seed per invocation
    long lSeed = seed;
    if (lSeed == DataGenOp.UNSPECIFIED_SEED)
        lSeed = DataGenOp.generateRandomSeed();
    if (LOG.isTraceEnabled())
        LOG.trace("Process RandSPInstruction rand with seed = " + lSeed + ".");
    // step 2: potential in-memory rand operations if applicable
    if (isMemAvail(lrows, lcols, sparsity, minValue, maxValue) && DMLScript.rtplatform != RUNTIME_PLATFORM.SPARK) {
        RandomMatrixGenerator rgen = LibMatrixDatagen.createRandomMatrixGenerator(pdf, (int) lrows, (int) lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdfParams);
        MatrixBlock mb = MatrixBlock.randOperations(rgen, lSeed);
        sec.setMatrixOutput(output.getName(), mb, getExtendedOpcode());
        Statistics.decrementNoOfExecutedSPInst();
        return;
    }
    // step 3: seed generation
    JavaPairRDD<MatrixIndexes, Long> seedsRDD = null;
    Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(lSeed);
    double totalSize = OptimizerUtils.estimatePartitionedSizeExactSparsity(lrows, lcols, rowsInBlock, colsInBlock, // overestimate for on disk, ensures hdfs block per partition
    sparsity);
    double hdfsBlkSize = InfrastructureAnalyzer.getHDFSBlockSize();
    MatrixCharacteristics tmp = new MatrixCharacteristics(lrows, lcols, rowsInBlock, colsInBlock);
    long numBlocks = tmp.getNumBlocks();
    long numColBlocks = tmp.getNumColBlocks();
    // a) in-memory seed rdd construction
    if (numBlocks < INMEMORY_NUMBLOCKS_THRESHOLD) {
        ArrayList<Tuple2<MatrixIndexes, Long>> seeds = new ArrayList<>();
        for (long i = 0; i < numBlocks; i++) {
            long r = 1 + i / numColBlocks;
            long c = 1 + i % numColBlocks;
            MatrixIndexes indx = new MatrixIndexes(r, c);
            Long seedForBlock = bigrand.nextLong();
            seeds.add(new Tuple2<>(indx, seedForBlock));
        }
        // for load balancing: degree of parallelism such that ~128MB per partition
        int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
        // create seeds rdd
        seedsRDD = sec.getSparkContext().parallelizePairs(seeds, numPartitions);
    } else // b) file-based seed rdd construction (for robustness wrt large number of blocks)
    {
        Path path = new Path(LibMatrixDatagen.generateUniqueSeedPath(dir));
        PrintWriter pw = null;
        try {
            FileSystem fs = IOUtilFunctions.getFileSystem(path);
            pw = new PrintWriter(fs.create(path));
            StringBuilder sb = new StringBuilder();
            for (long i = 0; i < numBlocks; i++) {
                sb.append(1 + i / numColBlocks);
                sb.append(',');
                sb.append(1 + i % numColBlocks);
                sb.append(',');
                sb.append(bigrand.nextLong());
                pw.println(sb.toString());
                sb.setLength(0);
            }
        } catch (IOException ex) {
            throw new DMLRuntimeException(ex);
        } finally {
            IOUtilFunctions.closeSilently(pw);
        }
        // for load balancing: degree of parallelism such that ~128MB per partition
        int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
        // create seeds rdd
        seedsRDD = sec.getSparkContext().textFile(path.toString(), numPartitions).mapToPair(new ExtractSeedTuple());
    }
    // step 4: execute rand instruction over seed input
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = seedsRDD.mapToPair(new GenerateRandomBlock(lrows, lcols, rowsInBlock, colsInBlock, sparsity, minValue, maxValue, pdf, pdfParams));
    // step 5: output handling
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    if (!mcOut.dimsKnown(true)) {
        // note: we cannot compute the nnz from sparsity because this would not reflect the
        // actual number of non-zeros, except for extreme values of sparsity equals 0 or 1.
        long lnnz = (sparsity == 0 || sparsity == 1) ? (long) (sparsity * lrows * lcols) : -1;
        mcOut.set(lrows, lcols, rowsInBlock, colsInBlock, lnnz);
    }
    sec.setRDDHandleForVariable(output.getName(), out);
}
Also used : RandomMatrixGenerator(org.apache.sysml.runtime.matrix.data.RandomMatrixGenerator) Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) Tuple2(scala.Tuple2) FileSystem(org.apache.hadoop.fs.FileSystem) Well1024a(org.apache.commons.math3.random.Well1024a) PrintWriter(java.io.PrintWriter)

Example 42 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class RandSPInstruction method generateSample.

/**
 * Helper function to construct a sample.
 *
 * @param sec spark execution context
 */
private void generateSample(SparkExecutionContext sec) {
    long lrows = sec.getScalarInput(rows).getLongValue();
    if (maxValue < lrows && !replace)
        throw new DMLRuntimeException("Sample (size=" + rows + ") larger than population (size=" + maxValue + ") can only be generated with replacement.");
    if (LOG.isTraceEnabled())
        LOG.trace("Process RandSPInstruction sample with range=" + maxValue + ", size=" + lrows + ", replace=" + replace + ", seed=" + seed);
    // sampling rate that guarantees a sample of size >= sampleSizeLowerBound 99.99% of the time.
    double fraction = SamplingUtils.computeFractionForSampleSize((int) lrows, UtilFunctions.toLong(maxValue), replace);
    Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(seed);
    // divide the population range across numPartitions by creating SampleTasks
    double hdfsBlockSize = InfrastructureAnalyzer.getHDFSBlockSize();
    long outputSize = MatrixBlock.estimateSizeDenseInMemory(lrows, 1);
    int numPartitions = (int) Math.ceil((double) outputSize / hdfsBlockSize);
    long partitionSize = (long) Math.ceil(maxValue / numPartitions);
    ArrayList<SampleTask> offsets = new ArrayList<>();
    long st = 1;
    while (st <= maxValue) {
        SampleTask s = new SampleTask();
        s.range_start = st;
        s.seed = bigrand.nextLong();
        offsets.add(s);
        st = st + partitionSize;
    }
    JavaRDD<SampleTask> offsetRDD = sec.getSparkContext().parallelize(offsets, numPartitions);
    // Construct the sample in a distributed manner
    JavaRDD<Double> rdd = offsetRDD.flatMap((new GenerateSampleBlock(replace, fraction, (long) maxValue, partitionSize)));
    // Randomize the sampled elements
    JavaRDD<Double> randomizedRDD = rdd.mapToPair(new AttachRandom()).sortByKey().values();
    // Trim the sampled list to required size & attach matrix indexes to randomized elements
    JavaPairRDD<MatrixIndexes, MatrixCell> miRDD = randomizedRDD.zipWithIndex().filter(new TrimSample(lrows)).mapToPair(new Double2MatrixCell());
    MatrixCharacteristics mcOut = new MatrixCharacteristics(lrows, 1, rowsInBlock, colsInBlock, lrows);
    // Construct BinaryBlock representation
    JavaPairRDD<MatrixIndexes, MatrixBlock> mbRDD = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), miRDD, mcOut, true);
    sec.getMatrixCharacteristics(output.getName()).setNonZeros(lrows);
    sec.setRDDHandleForVariable(output.getName(), mbRDD);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ArrayList(java.util.ArrayList) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Well1024a(org.apache.commons.math3.random.Well1024a)

Example 43 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class RandSPInstruction method generateSequence.

private void generateSequence(SparkExecutionContext sec) {
    double lfrom = sec.getScalarInput(seq_from).getDoubleValue();
    double lto = sec.getScalarInput(seq_to).getDoubleValue();
    double lincr = sec.getScalarInput(seq_incr).getDoubleValue();
    // sanity check valid increment
    if (lincr == 0) {
        throw new DMLRuntimeException("ERROR: While performing seq(" + lfrom + "," + lto + "," + lincr + ")");
    }
    // handle default 1 to -1 for special case of from>to
    lincr = LibMatrixDatagen.updateSeqIncr(lfrom, lto, lincr);
    if (LOG.isTraceEnabled())
        LOG.trace("Process RandSPInstruction seq with seqFrom=" + lfrom + ", seqTo=" + lto + ", seqIncr" + lincr);
    // step 1: offset generation
    JavaRDD<Double> offsetsRDD = null;
    long nnz = UtilFunctions.getSeqLength(lfrom, lto, lincr);
    double totalSize = OptimizerUtils.estimatePartitionedSizeExactSparsity(nnz, 1, rowsInBlock, colsInBlock, // overestimate for on disk, ensures hdfs block per partition
    nnz);
    double hdfsBlkSize = InfrastructureAnalyzer.getHDFSBlockSize();
    long numBlocks = (long) Math.ceil(((double) nnz) / rowsInBlock);
    // a) in-memory offset rdd construction
    if (numBlocks < INMEMORY_NUMBLOCKS_THRESHOLD) {
        ArrayList<Double> offsets = new ArrayList<>();
        for (long i = 0; i < numBlocks; i++) {
            double off = lfrom + lincr * i * rowsInBlock;
            offsets.add(off);
        }
        // for load balancing: degree of parallelism such that ~128MB per partition
        int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
        // create offset rdd
        offsetsRDD = sec.getSparkContext().parallelize(offsets, numPartitions);
    } else // b) file-based offset rdd construction (for robustness wrt large number of blocks)
    {
        Path path = new Path(LibMatrixDatagen.generateUniqueSeedPath(dir));
        PrintWriter pw = null;
        try {
            FileSystem fs = IOUtilFunctions.getFileSystem(path);
            pw = new PrintWriter(fs.create(path));
            for (long i = 0; i < numBlocks; i++) {
                double off = lfrom + lincr * i * rowsInBlock;
                pw.println(off);
            }
        } catch (IOException ex) {
            throw new DMLRuntimeException(ex);
        } finally {
            IOUtilFunctions.closeSilently(pw);
        }
        // for load balancing: degree of parallelism such that ~128MB per partition
        int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
        // create seeds rdd
        offsetsRDD = sec.getSparkContext().textFile(path.toString(), numPartitions).map(new ExtractOffsetTuple());
    }
    // step 2: execute seq instruction over offset input
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = offsetsRDD.mapToPair(new GenerateSequenceBlock(rowsInBlock, lfrom, lto, lincr));
    // step 3: output handling
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    if (!mcOut.dimsKnown()) {
        mcOut.set(nnz, 1, rowsInBlock, colsInBlock, nnz);
    }
    sec.setRDDHandleForVariable(output.getName(), out);
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ArrayList(java.util.ArrayList) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) FileSystem(org.apache.hadoop.fs.FileSystem) PrintWriter(java.io.PrintWriter)

Example 44 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class ReblockSPInstruction method processMatrixReblockInstruction.

@SuppressWarnings("unchecked")
protected void processMatrixReblockInstruction(SparkExecutionContext sec, InputInfo iinfo) {
    MatrixObject mo = sec.getMatrixObject(input1.getName());
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
        // get the input textcell rdd
        JavaPairRDD<LongWritable, Text> lines = (JavaPairRDD<LongWritable, Text>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
        // convert textcell to binary block
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.textCellToBinaryBlock(sec.getSparkContext(), lines, mcOut, outputEmptyBlocks);
        // put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else if (iinfo == InputInfo.CSVInputInfo) {
        // HACK ALERT: Until we introduces the rewrite to insert csvrblock for non-persistent read
        // throw new DMLRuntimeException("CSVInputInfo is not supported for ReblockSPInstruction");
        CSVReblockSPInstruction csvInstruction = null;
        boolean hasHeader = false;
        String delim = ",";
        boolean fill = false;
        double fillValue = 0;
        if (mo.getFileFormatProperties() instanceof CSVFileFormatProperties && mo.getFileFormatProperties() != null) {
            CSVFileFormatProperties props = (CSVFileFormatProperties) mo.getFileFormatProperties();
            hasHeader = props.hasHeader();
            delim = props.getDelim();
            fill = props.isFill();
            fillValue = props.getFillValue();
        }
        csvInstruction = new CSVReblockSPInstruction(null, input1, output, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), hasHeader, delim, fill, fillValue, "csvrblk", instString);
        csvInstruction.processInstruction(sec);
        return;
    } else if (iinfo == InputInfo.BinaryCellInputInfo) {
        JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = (JavaPairRDD<MatrixIndexes, MatrixCell>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), binaryCells, mcOut, outputEmptyBlocks);
        // put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else if (iinfo == InputInfo.BinaryBlockInputInfo) {
        // BINARY BLOCK <- BINARY BLOCK (different sizes)
        JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
        boolean shuffleFreeReblock = mc.dimsKnown() && mcOut.dimsKnown() && (mc.getRows() < mcOut.getRowsPerBlock() || mc.getRowsPerBlock() % mcOut.getRowsPerBlock() == 0) && (mc.getCols() < mcOut.getColsPerBlock() || mc.getColsPerBlock() % mcOut.getColsPerBlock() == 0);
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new ExtractBlockForBinaryReblock(mc, mcOut));
        if (!shuffleFreeReblock)
            out = RDDAggregateUtils.mergeByKey(out, false);
        // put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else {
        throw new DMLRuntimeException("The given InputInfo is not implemented " + "for ReblockSPInstruction:" + InputInfo.inputInfoToString(iinfo));
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) Text(org.apache.hadoop.io.Text) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExtractBlockForBinaryReblock(org.apache.sysml.runtime.instructions.spark.functions.ExtractBlockForBinaryReblock) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) LongWritable(org.apache.hadoop.io.LongWritable)

Example 45 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class ReorgSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    String opcode = getOpcode();
    // get input rdd handle
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    if (// TRANSPOSE
    opcode.equalsIgnoreCase("r'")) {
        // execute transpose reorg operation
        out = in1.mapToPair(new ReorgMapFunction(opcode));
    } else if (// REVERSE
    opcode.equalsIgnoreCase("rev")) {
        // execute reverse reorg operation
        out = in1.flatMapToPair(new RDDRevFunction(mcIn));
        if (mcIn.getRows() % mcIn.getRowsPerBlock() != 0)
            out = RDDAggregateUtils.mergeByKey(out, false);
    } else if (// DIAG
    opcode.equalsIgnoreCase("rdiag")) {
        if (mcIn.getCols() == 1) {
            // diagV2M
            out = in1.flatMapToPair(new RDDDiagV2MFunction(mcIn));
        } else {
            // diagM2V
            // execute diagM2V operation
            out = in1.filter(new FilterDiagBlocksFunction()).mapToPair(new ReorgMapFunction(opcode));
        }
    } else if (// ORDER
    opcode.equalsIgnoreCase("rsort")) {
        // Sort by column 'col' in ascending/descending order and return either index/value
        // get parameters
        long[] cols = _col.getDataType().isMatrix() ? DataConverter.convertToLongVector(ec.getMatrixInput(_col.getName())) : new long[] { ec.getScalarInput(_col.getName(), _col.getValueType(), _col.isLiteral()).getLongValue() };
        boolean desc = ec.getScalarInput(_desc.getName(), _desc.getValueType(), _desc.isLiteral()).getBooleanValue();
        boolean ixret = ec.getScalarInput(_ixret.getName(), _ixret.getValueType(), _ixret.isLiteral()).getBooleanValue();
        boolean singleCol = (mcIn.getCols() == 1);
        out = in1;
        if (cols.length > mcIn.getColsPerBlock())
            LOG.warn("Unsupported sort with number of order-by columns large than blocksize: " + cols.length);
        if (singleCol || cols.length == 1) {
            // extract column (if necessary) and sort
            if (!singleCol)
                out = out.filter(new IsBlockInRange(1, mcIn.getRows(), cols[0], cols[0], mcIn)).mapValues(new ExtractColumn((int) UtilFunctions.computeCellInBlock(cols[0], mcIn.getColsPerBlock())));
            // actual index/data sort operation
            if (// sort indexes
            ixret)
                out = RDDSortUtils.sortIndexesByVal(out, !desc, mcIn.getRows(), mcIn.getRowsPerBlock());
            else if (// sort single-column matrix
            singleCol && !desc)
                out = RDDSortUtils.sortByVal(out, mcIn.getRows(), mcIn.getRowsPerBlock());
            else if (// sort multi-column matrix w/ rewrite
            !_bSortIndInMem)
                out = RDDSortUtils.sortDataByVal(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
            else
                // sort multi-column matrix
                out = RDDSortUtils.sortDataByValMemSort(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock(), sec, (ReorgOperator) _optr);
        } else {
            // extract columns (if necessary)
            if (cols.length < mcIn.getCols())
                out = out.filter(new IsBlockInList(cols, mcIn)).mapToPair(new ExtractColumns(cols, mcIn));
            // append extracted columns (if necessary)
            if (mcIn.getCols() > mcIn.getColsPerBlock())
                out = RDDAggregateUtils.mergeByKey(out);
            // actual index/data sort operation
            if (// sort indexes
            ixret)
                out = RDDSortUtils.sortIndexesByVals(out, !desc, mcIn.getRows(), (long) cols.length, mcIn.getRowsPerBlock());
            else if (// sort single-column matrix
            cols.length == mcIn.getCols() && !desc)
                out = RDDSortUtils.sortByVals(out, mcIn.getRows(), cols.length, mcIn.getRowsPerBlock());
            else
                // sort multi-column matrix
                out = RDDSortUtils.sortDataByVals(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), cols.length, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
        }
    } else {
        throw new DMLRuntimeException("Error: Incorrect opcode in ReorgSPInstruction:" + opcode);
    }
    // store output rdd handle
    if (opcode.equalsIgnoreCase("rsort") && _col.getDataType().isMatrix())
        sec.releaseMatrixInput(_col.getName());
    updateReorgMatrixCharacteristics(sec);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IsBlockInRange(org.apache.sysml.runtime.instructions.spark.functions.IsBlockInRange) ReorgOperator(org.apache.sysml.runtime.matrix.operators.ReorgOperator) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FilterDiagBlocksFunction(org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) ReorgMapFunction(org.apache.sysml.runtime.instructions.spark.functions.ReorgMapFunction) IsBlockInList(org.apache.sysml.runtime.instructions.spark.functions.IsBlockInList)

Aggregations

MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)459 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)142 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)111 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)102 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)48 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)48 IOException (java.io.IOException)44 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)41 ArrayList (java.util.ArrayList)40 Path (org.apache.hadoop.fs.Path)29 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)24 FileSystem (org.apache.hadoop.fs.FileSystem)23 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)23 JobConf (org.apache.hadoop.mapred.JobConf)21 Tuple2 (scala.Tuple2)19 SequenceFile (org.apache.hadoop.io.SequenceFile)17 Row (org.apache.spark.sql.Row)14 SparseBlock (org.apache.sysml.runtime.matrix.data.SparseBlock)14 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)14 IndexedMatrixValue (org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)13