Search in sources :

Example 6 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class ResultMergeLocalFile method createBinaryCellResultFile.

@SuppressWarnings("deprecation")
private void createBinaryCellResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew, MatrixFormatMetaData metadata, boolean withCompare) throws IOException, DMLRuntimeException {
    JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
    Path path = new Path(fnameNew);
    FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
    MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
    long rlen = mc.getRows();
    long clen = mc.getCols();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();
    MatrixIndexes indexes = new MatrixIndexes(1, 1);
    MatrixCell cell = new MatrixCell(0);
    //beware ca 50ms
    SequenceFile.Writer out = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
    try {
        boolean written = false;
        for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++) for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) {
            File dir = new File(fnameStaging + "/" + brow + "_" + bcol);
            File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol);
            MatrixBlock mb = null;
            long row_offset = (brow - 1) * brlen + 1;
            long col_offset = (bcol - 1) * bclen + 1;
            if (dir.exists()) {
                if (//WITH COMPARE BLOCK
                withCompare && dir2.exists()) {
                    //copy only values that are different from the original
                    String[] lnames2 = dir2.list();
                    if (//there should be exactly 1 compare block
                    lnames2.length != 1)
                        throw new DMLRuntimeException("Unable to merge results because multiple compare blocks found.");
                    mb = StagingFileUtils.readCellList2BlockFromLocal(dir2 + "/" + lnames2[0], brlen, bclen);
                    boolean appendOnly = mb.isInSparseFormat();
                    double[][] compare = DataConverter.convertToDoubleMatrix(mb);
                    String[] lnames = dir.list();
                    for (String lname : lnames) {
                        MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                        mergeWithComp(mb, tmp, compare);
                    }
                    //sort sparse due to append-only
                    if (appendOnly)
                        mb.sortSparseRows();
                    //change sparsity if required after 
                    mb.examSparsity();
                } else //WITHOUT COMPARE BLOCK
                {
                    //copy all non-zeros from all workers
                    String[] lnames = dir.list();
                    boolean appendOnly = false;
                    for (String lname : lnames) {
                        if (mb == null) {
                            mb = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                            appendOnly = mb.isInSparseFormat();
                        } else {
                            MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
                            mergeWithoutComp(mb, tmp, appendOnly);
                        }
                    }
                    //sort sparse due to append-only
                    if (appendOnly)
                        mb.sortSparseRows();
                    //change sparsity if required after 
                    mb.examSparsity();
                }
            }
            //write the block to binary cell
            if (mb != null) {
                if (mb.isInSparseFormat()) {
                    Iterator<IJV> iter = mb.getSparseBlockIterator();
                    while (iter.hasNext()) {
                        IJV lcell = iter.next();
                        indexes.setIndexes(row_offset + lcell.getI(), col_offset + lcell.getJ());
                        cell.setValue(lcell.getV());
                        out.append(indexes, cell);
                        written = true;
                    }
                } else {
                    for (int i = 0; i < brlen; i++) for (int j = 0; j < bclen; j++) {
                        double lvalue = mb.getValueDenseUnsafe(i, j);
                        if (//for nnz
                        lvalue != 0) {
                            indexes.setIndexes(row_offset + i, col_offset + j);
                            cell.setValue(lvalue);
                            out.append(indexes, cell);
                            written = true;
                        }
                    }
                }
            }
        }
        if (!written)
            out.append(indexes, cell);
    } finally {
        IOUtilFunctions.closeSilently(out);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) SequenceFile(org.apache.hadoop.io.SequenceFile) IJV(org.apache.sysml.runtime.matrix.data.IJV) FileSystem(org.apache.hadoop.fs.FileSystem) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) Iterator(java.util.Iterator) JobConf(org.apache.hadoop.mapred.JobConf) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter)

Example 7 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class ReblockSPInstruction method processMatrixReblockInstruction.

@SuppressWarnings("unchecked")
protected void processMatrixReblockInstruction(SparkExecutionContext sec, InputInfo iinfo) throws DMLRuntimeException {
    MatrixObject mo = sec.getMatrixObject(input1.getName());
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
        //check jdk version (prevent double.parseDouble contention on <jdk8)
        sec.checkAndRaiseValidationWarningJDKVersion();
        //get the input textcell rdd
        JavaPairRDD<LongWritable, Text> lines = (JavaPairRDD<LongWritable, Text>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
        //convert textcell to binary block
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.textCellToBinaryBlock(sec.getSparkContext(), lines, mcOut, outputEmptyBlocks);
        //put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else if (iinfo == InputInfo.CSVInputInfo) {
        // HACK ALERT: Until we introduces the rewrite to insert csvrblock for non-persistent read
        // throw new DMLRuntimeException("CSVInputInfo is not supported for ReblockSPInstruction");
        CSVReblockSPInstruction csvInstruction = null;
        boolean hasHeader = false;
        String delim = ",";
        boolean fill = false;
        double fillValue = 0;
        if (mo.getFileFormatProperties() instanceof CSVFileFormatProperties && mo.getFileFormatProperties() != null) {
            CSVFileFormatProperties props = (CSVFileFormatProperties) mo.getFileFormatProperties();
            hasHeader = props.hasHeader();
            delim = props.getDelim();
            fill = props.isFill();
            fillValue = props.getFillValue();
        }
        csvInstruction = new CSVReblockSPInstruction(null, input1, output, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), hasHeader, delim, fill, fillValue, "csvrblk", instString);
        csvInstruction.processInstruction(sec);
        return;
    } else if (iinfo == InputInfo.BinaryCellInputInfo) {
        JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = (JavaPairRDD<MatrixIndexes, MatrixCell>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), binaryCells, mcOut, outputEmptyBlocks);
        //put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else if (iinfo == InputInfo.BinaryBlockInputInfo) {
        //BINARY BLOCK <- BINARY BLOCK (different sizes)
        JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new ExtractBlockForBinaryReblock(mc, mcOut));
        out = RDDAggregateUtils.mergeByKey(out, false);
        //put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else {
        throw new DMLRuntimeException("The given InputInfo is not implemented " + "for ReblockSPInstruction:" + InputInfo.inputInfoToString(iinfo));
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) Text(org.apache.hadoop.io.Text) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExtractBlockForBinaryReblock(org.apache.sysml.runtime.instructions.spark.functions.ExtractBlockForBinaryReblock) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) LongWritable(org.apache.hadoop.io.LongWritable)

Example 8 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class ReorgSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    String opcode = getOpcode();
    //get input rdd handle
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    if (//TRANSPOSE
    opcode.equalsIgnoreCase("r'")) {
        //execute transpose reorg operation
        out = in1.mapToPair(new ReorgMapFunction(opcode));
    } else if (//REVERSE
    opcode.equalsIgnoreCase("rev")) {
        //execute reverse reorg operation
        out = in1.flatMapToPair(new RDDRevFunction(mcIn));
        if (mcIn.getRows() % mcIn.getRowsPerBlock() != 0)
            out = RDDAggregateUtils.mergeByKey(out, false);
    } else if (// DIAG
    opcode.equalsIgnoreCase("rdiag")) {
        if (mcIn.getCols() == 1) {
            // diagV2M
            out = in1.flatMapToPair(new RDDDiagV2MFunction(mcIn));
        } else {
            // diagM2V
            //execute diagM2V operation
            out = in1.filter(new FilterDiagBlocksFunction()).mapToPair(new ReorgMapFunction(opcode));
        }
    } else if (//ORDER
    opcode.equalsIgnoreCase("rsort")) {
        // Sort by column 'col' in ascending/descending order and return either index/value
        //get parameters
        long col = ec.getScalarInput(_col.getName(), _col.getValueType(), _col.isLiteral()).getLongValue();
        boolean desc = ec.getScalarInput(_desc.getName(), _desc.getValueType(), _desc.isLiteral()).getBooleanValue();
        boolean ixret = ec.getScalarInput(_ixret.getName(), _ixret.getValueType(), _ixret.isLiteral()).getBooleanValue();
        boolean singleCol = (mcIn.getCols() == 1);
        // extract column (if necessary) and sort 
        out = in1;
        if (!singleCol) {
            out = out.filter(new IsBlockInRange(1, mcIn.getRows(), col, col, mcIn)).mapValues(new ExtractColumn((int) UtilFunctions.computeCellInBlock(col, mcIn.getColsPerBlock())));
        }
        //actual index/data sort operation
        if (ixret) {
            //sort indexes 
            out = RDDSortUtils.sortIndexesByVal(out, !desc, mcIn.getRows(), mcIn.getRowsPerBlock());
        } else if (singleCol && !desc) {
            //sort single-column matrix
            out = RDDSortUtils.sortByVal(out, mcIn.getRows(), mcIn.getRowsPerBlock());
        } else {
            //sort multi-column matrix
            if (!_bSortIndInMem)
                out = RDDSortUtils.sortDataByVal(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
            else
                out = RDDSortUtils.sortDataByValMemSort(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock(), sec, (ReorgOperator) _optr);
        }
    } else {
        throw new DMLRuntimeException("Error: Incorrect opcode in ReorgSPInstruction:" + opcode);
    }
    //store output rdd handle
    updateReorgMatrixCharacteristics(sec);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IsBlockInRange(org.apache.sysml.runtime.instructions.spark.functions.IsBlockInRange) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FilterDiagBlocksFunction(org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) ReorgMapFunction(org.apache.sysml.runtime.instructions.spark.functions.ReorgMapFunction)

Example 9 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class QuaternarySPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    QuaternaryOperator qop = (QuaternaryOperator) _optr;
    //tracking of rdds and broadcasts (for lineage maintenance)
    ArrayList<String> rddVars = new ArrayList<String>();
    ArrayList<String> bcVars = new ArrayList<String>();
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    MatrixCharacteristics inMc = sec.getMatrixCharacteristics(input1.getName());
    long rlen = inMc.getRows();
    long clen = inMc.getCols();
    int brlen = inMc.getRowsPerBlock();
    int bclen = inMc.getColsPerBlock();
    //(map/redwsloss, map/redwcemm); safe because theses ops produce a scalar
    if (qop.wtype1 != null || qop.wtype4 != null) {
        in = in.filter(new FilterNonEmptyBlocksFunction());
    }
    //map-side only operation (one rdd input, two broadcasts)
    if (WeightedSquaredLoss.OPCODE.equalsIgnoreCase(getOpcode()) || WeightedSigmoid.OPCODE.equalsIgnoreCase(getOpcode()) || WeightedDivMM.OPCODE.equalsIgnoreCase(getOpcode()) || WeightedCrossEntropy.OPCODE.equalsIgnoreCase(getOpcode()) || WeightedUnaryMM.OPCODE.equalsIgnoreCase(getOpcode())) {
        PartitionedBroadcast<MatrixBlock> bc1 = sec.getBroadcastForVariable(input2.getName());
        PartitionedBroadcast<MatrixBlock> bc2 = sec.getBroadcastForVariable(input3.getName());
        //partitioning-preserving mappartitions (key access required for broadcast loopkup)
        //only wdivmm changes keys
        boolean noKeyChange = (qop.wtype3 == null || qop.wtype3.isBasic());
        out = in.mapPartitionsToPair(new RDDQuaternaryFunction1(qop, bc1, bc2), noKeyChange);
        rddVars.add(input1.getName());
        bcVars.add(input2.getName());
        bcVars.add(input3.getName());
    } else //reduce-side operation (two/three/four rdd inputs, zero/one/two broadcasts)
    {
        PartitionedBroadcast<MatrixBlock> bc1 = _cacheU ? sec.getBroadcastForVariable(input2.getName()) : null;
        PartitionedBroadcast<MatrixBlock> bc2 = _cacheV ? sec.getBroadcastForVariable(input3.getName()) : null;
        JavaPairRDD<MatrixIndexes, MatrixBlock> inU = (!_cacheU) ? sec.getBinaryBlockRDDHandleForVariable(input2.getName()) : null;
        JavaPairRDD<MatrixIndexes, MatrixBlock> inV = (!_cacheV) ? sec.getBinaryBlockRDDHandleForVariable(input3.getName()) : null;
        JavaPairRDD<MatrixIndexes, MatrixBlock> inW = (qop.hasFourInputs() && !_input4.isLiteral()) ? sec.getBinaryBlockRDDHandleForVariable(_input4.getName()) : null;
        //preparation of transposed and replicated U
        if (inU != null)
            inU = inU.flatMapToPair(new ReplicateBlocksFunction(clen, bclen, true));
        //preparation of transposed and replicated V
        if (inV != null)
            inV = inV.mapToPair(new TransposeFactorIndexesFunction()).flatMapToPair(new ReplicateBlocksFunction(rlen, brlen, false));
        //functions calls w/ two rdd inputs		
        if (inU != null && inV == null && inW == null)
            out = in.join(inU).mapToPair(new RDDQuaternaryFunction2(qop, bc1, bc2));
        else if (inU == null && inV != null && inW == null)
            out = in.join(inV).mapToPair(new RDDQuaternaryFunction2(qop, bc1, bc2));
        else if (inU == null && inV == null && inW != null)
            out = in.join(inW).mapToPair(new RDDQuaternaryFunction2(qop, bc1, bc2));
        else //function calls w/ three rdd inputs
        if (inU != null && inV != null && inW == null)
            out = in.join(inU).join(inV).mapToPair(new RDDQuaternaryFunction3(qop, bc1, bc2));
        else if (inU != null && inV == null && inW != null)
            out = in.join(inU).join(inW).mapToPair(new RDDQuaternaryFunction3(qop, bc1, bc2));
        else if (inU == null && inV != null && inW != null)
            out = in.join(inV).join(inW).mapToPair(new RDDQuaternaryFunction3(qop, bc1, bc2));
        else if (inU == null && inV == null && inW == null) {
            out = in.mapPartitionsToPair(new RDDQuaternaryFunction1(qop, bc1, bc2), false);
        } else
            //function call w/ four rdd inputs
            //need keys in case of wdivmm 
            out = in.join(inU).join(inV).join(inW).mapToPair(new RDDQuaternaryFunction4(qop));
        //keep variable names for lineage maintenance
        if (inU == null)
            bcVars.add(input2.getName());
        else
            rddVars.add(input2.getName());
        if (inV == null)
            bcVars.add(input3.getName());
        else
            rddVars.add(input3.getName());
        if (inW != null)
            rddVars.add(_input4.getName());
    }
    //output handling, incl aggregation
    if (//map/redwsloss, map/redwcemm
    qop.wtype1 != null || qop.wtype4 != null) {
        //full aggregate and cast to scalar
        MatrixBlock tmp = RDDAggregateUtils.sumStable(out);
        DoubleObject ret = new DoubleObject(tmp.getValue(0, 0));
        sec.setVariable(output.getName(), ret);
    } else //map/redwsigmoid, map/redwdivmm, map/redwumm 
    {
        //aggregation if required (map/redwdivmm)
        if (qop.wtype3 != null && !qop.wtype3.isBasic())
            out = RDDAggregateUtils.sumByKeyStable(out, false);
        //put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        //maintain lineage information for output rdd
        for (String rddVar : rddVars) sec.addLineageRDD(output.getName(), rddVar);
        for (String bcVar : bcVars) sec.addLineageBroadcast(output.getName(), bcVar);
        //update matrix characteristics
        updateOutputMatrixCharacteristics(sec, qop);
    }
}
Also used : QuaternaryOperator(org.apache.sysml.runtime.matrix.operators.QuaternaryOperator) FilterNonEmptyBlocksFunction(org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) ArrayList(java.util.ArrayList) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 10 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class RandSPInstruction method generateSequence.

private void generateSequence(SparkExecutionContext sec) throws DMLRuntimeException {
    //sanity check valid increment
    if (seq_incr == 0) {
        throw new DMLRuntimeException("ERROR: While performing seq(" + seq_from + "," + seq_to + "," + seq_incr + ")");
    }
    //handle default 1 to -1 for special case of from>to
    seq_incr = LibMatrixDatagen.updateSeqIncr(seq_from, seq_to, seq_incr);
    if (LOG.isTraceEnabled())
        LOG.trace("Process RandSPInstruction seq with seqFrom=" + seq_from + ", seqTo=" + seq_to + ", seqIncr" + seq_incr);
    //step 1: offset generation 
    JavaRDD<Double> offsetsRDD = null;
    long nnz = (long) Math.abs(Math.round((seq_to - seq_from) / seq_incr)) + 1;
    double totalSize = OptimizerUtils.estimatePartitionedSizeExactSparsity(nnz, 1, rowsInBlock, colsInBlock, //overestimate for on disk, ensures hdfs block per partition
    nnz);
    double hdfsBlkSize = InfrastructureAnalyzer.getHDFSBlockSize();
    long numBlocks = (long) Math.ceil(((double) nnz) / rowsInBlock);
    //a) in-memory offset rdd construction 
    if (numBlocks < INMEMORY_NUMBLOCKS_THRESHOLD) {
        ArrayList<Double> offsets = new ArrayList<Double>();
        for (long i = 0; i < numBlocks; i++) {
            double off = seq_from + seq_incr * i * rowsInBlock;
            offsets.add(off);
        }
        //for load balancing: degree of parallelism such that ~128MB per partition
        int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
        //create offset rdd
        offsetsRDD = sec.getSparkContext().parallelize(offsets, numPartitions);
    } else //b) file-based offset rdd construction (for robustness wrt large number of blocks)
    {
        Path path = new Path(LibMatrixDatagen.generateUniqueSeedPath(dir));
        PrintWriter pw = null;
        try {
            FileSystem fs = IOUtilFunctions.getFileSystem(path);
            pw = new PrintWriter(fs.create(path));
            for (long i = 0; i < numBlocks; i++) {
                double off = seq_from + seq_incr * i * rowsInBlock;
                pw.println(off);
            }
        } catch (IOException ex) {
            throw new DMLRuntimeException(ex);
        } finally {
            IOUtilFunctions.closeSilently(pw);
        }
        //for load balancing: degree of parallelism such that ~128MB per partition
        int numPartitions = (int) Math.max(Math.min(totalSize / hdfsBlkSize, numBlocks), 1);
        //create seeds rdd 
        offsetsRDD = sec.getSparkContext().textFile(path.toString(), numPartitions).map(new ExtractOffsetTuple());
    }
    //sanity check number of non-zeros
    if (nnz != rows && rows != -1) {
        throw new DMLRuntimeException("Incorrect number of non-zeros: " + nnz + " != " + rows);
    }
    //step 2: execute seq instruction over offset input
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = offsetsRDD.mapToPair(new GenerateSequenceBlock(rowsInBlock, seq_from, seq_to, seq_incr));
    //step 3: output handling
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    if (!mcOut.dimsKnown()) {
        mcOut.set(nnz, 1, rowsInBlock, colsInBlock, nnz);
    }
    sec.setRDDHandleForVariable(output.getName(), out);
}
Also used : Path(org.apache.hadoop.fs.Path) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ArrayList(java.util.ArrayList) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) FileSystem(org.apache.hadoop.fs.FileSystem) PrintWriter(java.io.PrintWriter)

Aggregations

MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)393 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)121 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)105 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)87 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)44 IOException (java.io.IOException)43 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)38 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)34 ArrayList (java.util.ArrayList)33 Path (org.apache.hadoop.fs.Path)25 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)23 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 JobConf (org.apache.hadoop.mapred.JobConf)17 Tuple2 (scala.Tuple2)17 SequenceFile (org.apache.hadoop.io.SequenceFile)14 MatrixReader (org.apache.sysml.runtime.io.MatrixReader)14 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)13 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)12 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)12