Search in sources :

Example 1 with ExtractBlockForBinaryReblock

use of org.apache.sysml.runtime.instructions.spark.functions.ExtractBlockForBinaryReblock in project incubator-systemml by apache.

the class ReblockSPInstruction method processMatrixReblockInstruction.

@SuppressWarnings("unchecked")
protected void processMatrixReblockInstruction(SparkExecutionContext sec, InputInfo iinfo) throws DMLRuntimeException {
    MatrixObject mo = sec.getMatrixObject(input1.getName());
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
        //check jdk version (prevent double.parseDouble contention on <jdk8)
        sec.checkAndRaiseValidationWarningJDKVersion();
        //get the input textcell rdd
        JavaPairRDD<LongWritable, Text> lines = (JavaPairRDD<LongWritable, Text>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
        //convert textcell to binary block
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.textCellToBinaryBlock(sec.getSparkContext(), lines, mcOut, outputEmptyBlocks);
        //put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else if (iinfo == InputInfo.CSVInputInfo) {
        // HACK ALERT: Until we introduces the rewrite to insert csvrblock for non-persistent read
        // throw new DMLRuntimeException("CSVInputInfo is not supported for ReblockSPInstruction");
        CSVReblockSPInstruction csvInstruction = null;
        boolean hasHeader = false;
        String delim = ",";
        boolean fill = false;
        double fillValue = 0;
        if (mo.getFileFormatProperties() instanceof CSVFileFormatProperties && mo.getFileFormatProperties() != null) {
            CSVFileFormatProperties props = (CSVFileFormatProperties) mo.getFileFormatProperties();
            hasHeader = props.hasHeader();
            delim = props.getDelim();
            fill = props.isFill();
            fillValue = props.getFillValue();
        }
        csvInstruction = new CSVReblockSPInstruction(null, input1, output, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), hasHeader, delim, fill, fillValue, "csvrblk", instString);
        csvInstruction.processInstruction(sec);
        return;
    } else if (iinfo == InputInfo.BinaryCellInputInfo) {
        JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = (JavaPairRDD<MatrixIndexes, MatrixCell>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), binaryCells, mcOut, outputEmptyBlocks);
        //put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else if (iinfo == InputInfo.BinaryBlockInputInfo) {
        //BINARY BLOCK <- BINARY BLOCK (different sizes)
        JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new ExtractBlockForBinaryReblock(mc, mcOut));
        out = RDDAggregateUtils.mergeByKey(out, false);
        //put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else {
        throw new DMLRuntimeException("The given InputInfo is not implemented " + "for ReblockSPInstruction:" + InputInfo.inputInfoToString(iinfo));
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) Text(org.apache.hadoop.io.Text) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExtractBlockForBinaryReblock(org.apache.sysml.runtime.instructions.spark.functions.ExtractBlockForBinaryReblock) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) LongWritable(org.apache.hadoop.io.LongWritable)

Example 2 with ExtractBlockForBinaryReblock

use of org.apache.sysml.runtime.instructions.spark.functions.ExtractBlockForBinaryReblock in project incubator-systemml by apache.

the class ConvolutionSPInstruction method reblockAsRectangularMatrices.

private JavaPairRDD<MatrixIndexes, MatrixBlock> reblockAsRectangularMatrices(SparkExecutionContext sec, String name, int numRowsPerBlock) throws DMLRuntimeException {
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(name);
    MatrixCharacteristics mcRdd = sec.getMatrixCharacteristics(name);
    if (mcRdd.getColsPerBlock() < mcRdd.getCols() || mcRdd.getRowsPerBlock() != 1) {
        MatrixCharacteristics mcOut = new MatrixCharacteristics(mcRdd);
        mcOut.setColsPerBlock((int) mcRdd.getCols());
        mcOut.setRowsPerBlock(numRowsPerBlock);
        in1 = RDDAggregateUtils.mergeByKey(in1.flatMapToPair(new ExtractBlockForBinaryReblock(mcRdd, mcOut)));
    // TODO: Inject checkpoint to avoid doing this repeated for validation set
    //			sec.setRDDHandleForVariable(name, in1);
    //			sec.setMetaData(name, new MatrixDimensionsMetaData(mcOut));
    }
    return in1;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ExtractBlockForBinaryReblock(org.apache.sysml.runtime.instructions.spark.functions.ExtractBlockForBinaryReblock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

ExtractBlockForBinaryReblock (org.apache.sysml.runtime.instructions.spark.functions.ExtractBlockForBinaryReblock)2 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)2 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)2 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)2 LongWritable (org.apache.hadoop.io.LongWritable)1 Text (org.apache.hadoop.io.Text)1 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)1 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)1 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)1 CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)1 MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)1