Search in sources :

Example 1 with CacheType

use of org.apache.sysml.lops.MapMult.CacheType in project incubator-systemml by apache.

the class MapmmSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    CacheType type = _type;
    String rddVar = type.isRight() ? input1.getName() : input2.getName();
    String bcastVar = type.isRight() ? input2.getName() : input1.getName();
    MatrixCharacteristics mcRdd = sec.getMatrixCharacteristics(rddVar);
    MatrixCharacteristics mcBc = sec.getMatrixCharacteristics(bcastVar);
    // get input rdd
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
    // inputs - is required to ensure moderately sized output partitions (2GB limitation)
    if (requiresFlatMapFunction(type, mcBc) && requiresRepartitioning(type, mcRdd, mcBc, in1.getNumPartitions())) {
        int numParts = getNumRepartitioning(type, mcRdd, mcBc);
        int numParts2 = getNumRepartitioning(type.getFlipped(), mcBc, mcRdd);
        if (numParts2 > numParts) {
            // flip required
            type = type.getFlipped();
            rddVar = type.isRight() ? input1.getName() : input2.getName();
            bcastVar = type.isRight() ? input2.getName() : input1.getName();
            mcRdd = sec.getMatrixCharacteristics(rddVar);
            mcBc = sec.getMatrixCharacteristics(bcastVar);
            in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
            LOG.warn("Mapmm: Switching rdd ('" + bcastVar + "') and broadcast ('" + rddVar + "') inputs " + "for repartitioning because this allows better control of output partition " + "sizes (" + numParts + " < " + numParts2 + ").");
        }
    }
    // get inputs
    PartitionedBroadcast<MatrixBlock> in2 = sec.getBroadcastForVariable(bcastVar);
    // empty input block filter
    if (!_outputEmpty)
        in1 = in1.filter(new FilterNonEmptyBlocksFunction());
    // execute mapmm and aggregation if necessary and put output into symbol table
    if (_aggtype == SparkAggType.SINGLE_BLOCK) {
        JavaRDD<MatrixBlock> out = in1.map(new RDDMapMMFunction2(type, in2));
        MatrixBlock out2 = RDDAggregateUtils.sumStable(out);
        // put output block into symbol table (no lineage because single block)
        // this also includes implicit maintenance of matrix characteristics
        sec.setMatrixOutput(output.getName(), out2, getExtendedOpcode());
    } else // MULTI_BLOCK or NONE
    {
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
        if (requiresFlatMapFunction(type, mcBc)) {
            if (requiresRepartitioning(type, mcRdd, mcBc, in1.getNumPartitions())) {
                int numParts = getNumRepartitioning(type, mcRdd, mcBc);
                LOG.warn("Mapmm: Repartition input rdd '" + rddVar + "' from " + in1.getNumPartitions() + " to " + numParts + " partitions to satisfy size restrictions of output partitions.");
                in1 = in1.repartition(numParts);
            }
            out = in1.flatMapToPair(new RDDFlatMapMMFunction(type, in2));
        } else if (preservesPartitioning(mcRdd, type))
            out = in1.mapPartitionsToPair(new RDDMapMMPartitionFunction(type, in2), true);
        else
            out = in1.mapToPair(new RDDMapMMFunction(type, in2));
        // empty output block filter
        if (!_outputEmpty)
            out = out.filter(new FilterNonEmptyBlocksFunction());
        if (_aggtype == SparkAggType.MULTI_BLOCK)
            out = RDDAggregateUtils.sumByKeyStable(out, false);
        // put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), rddVar);
        sec.addLineageBroadcast(output.getName(), bcastVar);
        // update output statistics if not inferred
        updateBinaryMMOutputMatrixCharacteristics(sec, true);
    }
}
Also used : FilterNonEmptyBlocksFunction(org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) CacheType(org.apache.sysml.lops.MapMult.CacheType) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 2 with CacheType

use of org.apache.sysml.lops.MapMult.CacheType in project incubator-systemml by apache.

the class PmmSPInstruction method parseInstruction.

public static PmmSPInstruction parseInstruction(String str) {
    String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
    String opcode = InstructionUtils.getOpCode(str);
    if (opcode.equalsIgnoreCase(PMMJ.OPCODE)) {
        CPOperand in1 = new CPOperand(parts[1]);
        CPOperand in2 = new CPOperand(parts[2]);
        CPOperand nrow = new CPOperand(parts[3]);
        CPOperand out = new CPOperand(parts[4]);
        CacheType type = CacheType.valueOf(parts[5]);
        AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
        AggregateBinaryOperator aggbin = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
        return new PmmSPInstruction(aggbin, in1, in2, out, nrow, type, opcode, str);
    } else {
        throw new DMLRuntimeException("PmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
    }
}
Also used : AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) AggregateBinaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator) CPOperand(org.apache.sysml.runtime.instructions.cp.CPOperand) CacheType(org.apache.sysml.lops.MapMult.CacheType) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 3 with CacheType

use of org.apache.sysml.lops.MapMult.CacheType in project systemml by apache.

the class PmmSPInstruction method parseInstruction.

public static PmmSPInstruction parseInstruction(String str) {
    String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
    String opcode = InstructionUtils.getOpCode(str);
    if (opcode.equalsIgnoreCase(PMMJ.OPCODE)) {
        CPOperand in1 = new CPOperand(parts[1]);
        CPOperand in2 = new CPOperand(parts[2]);
        CPOperand nrow = new CPOperand(parts[3]);
        CPOperand out = new CPOperand(parts[4]);
        CacheType type = CacheType.valueOf(parts[5]);
        AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
        AggregateBinaryOperator aggbin = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
        return new PmmSPInstruction(aggbin, in1, in2, out, nrow, type, opcode, str);
    } else {
        throw new DMLRuntimeException("PmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
    }
}
Also used : AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) AggregateBinaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator) CPOperand(org.apache.sysml.runtime.instructions.cp.CPOperand) CacheType(org.apache.sysml.lops.MapMult.CacheType) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 4 with CacheType

use of org.apache.sysml.lops.MapMult.CacheType in project incubator-systemml by apache.

the class MapmmSPInstruction method parseInstruction.

public static MapmmSPInstruction parseInstruction(String str) {
    String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
    String opcode = parts[0];
    if (!opcode.equalsIgnoreCase(MapMult.OPCODE))
        throw new DMLRuntimeException("MapmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
    CPOperand in1 = new CPOperand(parts[1]);
    CPOperand in2 = new CPOperand(parts[2]);
    CPOperand out = new CPOperand(parts[3]);
    CacheType type = CacheType.valueOf(parts[4]);
    boolean outputEmpty = Boolean.parseBoolean(parts[5]);
    SparkAggType aggtype = SparkAggType.valueOf(parts[6]);
    AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
    AggregateBinaryOperator aggbin = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
    return new MapmmSPInstruction(aggbin, in1, in2, out, type, outputEmpty, aggtype, opcode, str);
}
Also used : SparkAggType(org.apache.sysml.hops.AggBinaryOp.SparkAggType) AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) AggregateBinaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator) CPOperand(org.apache.sysml.runtime.instructions.cp.CPOperand) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CacheType(org.apache.sysml.lops.MapMult.CacheType)

Example 5 with CacheType

use of org.apache.sysml.lops.MapMult.CacheType in project systemml by apache.

the class MapmmSPInstruction method parseInstruction.

public static MapmmSPInstruction parseInstruction(String str) {
    String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
    String opcode = parts[0];
    if (!opcode.equalsIgnoreCase(MapMult.OPCODE))
        throw new DMLRuntimeException("MapmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
    CPOperand in1 = new CPOperand(parts[1]);
    CPOperand in2 = new CPOperand(parts[2]);
    CPOperand out = new CPOperand(parts[3]);
    CacheType type = CacheType.valueOf(parts[4]);
    boolean outputEmpty = Boolean.parseBoolean(parts[5]);
    SparkAggType aggtype = SparkAggType.valueOf(parts[6]);
    AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
    AggregateBinaryOperator aggbin = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
    return new MapmmSPInstruction(aggbin, in1, in2, out, type, outputEmpty, aggtype, opcode, str);
}
Also used : SparkAggType(org.apache.sysml.hops.AggBinaryOp.SparkAggType) AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) AggregateBinaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator) CPOperand(org.apache.sysml.runtime.instructions.cp.CPOperand) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CacheType(org.apache.sysml.lops.MapMult.CacheType)

Aggregations

CacheType (org.apache.sysml.lops.MapMult.CacheType)6 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)4 CPOperand (org.apache.sysml.runtime.instructions.cp.CPOperand)4 AggregateBinaryOperator (org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator)4 AggregateOperator (org.apache.sysml.runtime.matrix.operators.AggregateOperator)4 SparkAggType (org.apache.sysml.hops.AggBinaryOp.SparkAggType)2 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)2 FilterNonEmptyBlocksFunction (org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction)2 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)2 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)2 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)2