Search in sources :

Example 81 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class CSVReblockSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // sanity check input info
    CacheableData<?> obj = sec.getCacheableData(input1.getName());
    MetaDataFormat iimd = (MetaDataFormat) obj.getMetaData();
    if (iimd.getInputInfo() != InputInfo.CSVInputInfo) {
        throw new DMLRuntimeException("The given InputInfo is not implemented for " + "CSVReblockSPInstruction:" + iimd.getInputInfo());
    }
    // set output characteristics
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    mcOut.set(mcIn.getRows(), mcIn.getCols(), _brlen, _bclen);
    // check for in-memory reblock (w/ lazy spark context, potential for latency reduction)
    if (Recompiler.checkCPReblock(sec, input1.getName())) {
        if (input1.getDataType() == DataType.MATRIX)
            Recompiler.executeInMemoryMatrixReblock(sec, input1.getName(), output.getName());
        else if (input1.getDataType() == DataType.FRAME)
            Recompiler.executeInMemoryFrameReblock(sec, input1.getName(), output.getName());
        return;
    }
    // execute matrix/frame csvreblock
    JavaPairRDD<?, ?> out = null;
    if (input1.getDataType() == DataType.MATRIX)
        out = processMatrixCSVReblockInstruction(sec, mcOut);
    else if (input1.getDataType() == DataType.FRAME)
        out = processFrameCSVReblockInstruction(sec, mcOut, ((FrameObject) obj).getSchema());
    // put output RDD handle into symbol table
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 82 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class CastSPInstruction method processInstruction.

@Override
@SuppressWarnings("unchecked")
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    String opcode = getOpcode();
    // get input RDD and prepare output
    JavaPairRDD<?, ?> in = sec.getRDDHandleForVariable(input1.getName(), InputInfo.BinaryBlockInputInfo);
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    JavaPairRDD<?, ?> out = null;
    // convert frame-matrix / matrix-frame and set output
    if (opcode.equals(UnaryCP.CAST_AS_MATRIX_OPCODE)) {
        MatrixCharacteristics mcOut = new MatrixCharacteristics(mcIn);
        mcOut.setBlockSize(ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
        out = FrameRDDConverterUtils.binaryBlockToMatrixBlock((JavaPairRDD<Long, FrameBlock>) in, mcIn, mcOut);
    } else if (opcode.equals(UnaryCP.CAST_AS_FRAME_OPCODE)) {
        out = FrameRDDConverterUtils.matrixBlockToBinaryBlockLongIndex(sec.getSparkContext(), (JavaPairRDD<MatrixIndexes, MatrixBlock>) in, mcIn);
    } else {
        throw new DMLRuntimeException("Unsupported spark cast operation: " + opcode);
    }
    // update output statistics and add lineage
    sec.setRDDHandleForVariable(output.getName(), out);
    updateUnaryOutputMatrixCharacteristics(sec, input1.getName(), output.getName());
    sec.addLineageRDD(output.getName(), input1.getName());
    // update schema information for output frame
    if (opcode.equals(UnaryCP.CAST_AS_FRAME_OPCODE)) {
        sec.getFrameObject(output.getName()).setSchema(UtilFunctions.nCopies((int) mcIn.getCols(), ValueType.DOUBLE));
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 83 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class ComputationSPInstruction method updateBinaryOutputMatrixCharacteristics.

protected void updateBinaryOutputMatrixCharacteristics(SparkExecutionContext sec) {
    MatrixCharacteristics mcIn1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcIn2 = sec.getMatrixCharacteristics(input2.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    boolean outer = (mcIn1.getRows() > 1 && mcIn1.getCols() == 1 && mcIn2.getRows() == 1 && mcIn2.getCols() > 1);
    if (!mcOut.dimsKnown()) {
        if (!mcIn1.dimsKnown())
            throw new DMLRuntimeException("The output dimensions are not specified and cannot be inferred from input:" + mcIn1.toString() + " " + mcIn2.toString() + " " + mcOut.toString());
        else if (outer)
            sec.getMatrixCharacteristics(output.getName()).set(mcIn1.getRows(), mcIn2.getCols(), mcIn1.getRowsPerBlock(), mcIn2.getColsPerBlock());
        else
            sec.getMatrixCharacteristics(output.getName()).set(mcIn1.getRows(), mcIn1.getCols(), mcIn1.getRowsPerBlock(), mcIn1.getRowsPerBlock());
    }
}
Also used : MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 84 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class ComputationSPInstruction method updateUnaryAggOutputMatrixCharacteristics.

protected void updateUnaryAggOutputMatrixCharacteristics(SparkExecutionContext sec, IndexFunction ixFn) {
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    if (mcOut.dimsKnown())
        return;
    if (!mc1.dimsKnown()) {
        throw new DMLRuntimeException("The output dimensions are not specified and " + "cannot be inferred from input:" + mc1.toString() + " " + mcOut.toString());
    } else {
        // infer statistics from input based on operator
        if (ixFn instanceof ReduceAll)
            mcOut.set(1, 1, mc1.getRowsPerBlock(), mc1.getColsPerBlock());
        else if (ixFn instanceof ReduceCol)
            mcOut.set(mc1.getRows(), 1, mc1.getRowsPerBlock(), mc1.getColsPerBlock());
        else if (ixFn instanceof ReduceRow)
            mcOut.set(1, mc1.getCols(), mc1.getRowsPerBlock(), mc1.getColsPerBlock());
    }
}
Also used : ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) ReduceRow(org.apache.sysml.runtime.functionobjects.ReduceRow) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 85 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class FrameRDDConverterUtils method matrixBlockToBinaryBlockLongIndex.

public static JavaPairRDD<Long, FrameBlock> matrixBlockToBinaryBlockLongIndex(JavaSparkContext sc, JavaPairRDD<MatrixIndexes, MatrixBlock> input, MatrixCharacteristics mcIn) {
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = input;
    MatrixCharacteristics mc = new MatrixCharacteristics(mcIn);
    // reblock matrix blocks if required (multiple column blocks)
    if (mcIn.getCols() > mcIn.getColsPerBlock()) {
        // split matrix blocks into extended matrix blocks
        in = in.flatMapToPair(new MatrixFrameReblockFunction(mcIn));
        mc.setBlockSize(MatrixFrameReblockFunction.computeBlockSize(mc), (int) mc.getCols());
        // shuffle matrix blocks (instead of frame blocks) in order to exploit
        // sparse formats (for sparse or wide matrices) during shuffle
        in = RDDAggregateUtils.mergeByKey(in, false);
    }
    // convert individual matrix blocks to frame blocks (w/o shuffle)
    return in.mapToPair(new MatrixToFrameBlockFunction(mc));
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)296 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)102 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)89 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)70 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)50 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)47 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)45 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)42 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)37 CellIndex (org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex)37 IOException (java.io.IOException)30 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)27 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)22 ArrayList (java.util.ArrayList)19 ValueType (org.apache.sysml.parser.Expression.ValueType)19 Path (org.apache.hadoop.fs.Path)17 LongWritable (org.apache.hadoop.io.LongWritable)16 Test (org.junit.Test)15 Text (org.apache.hadoop.io.Text)14