use of org.apache.sysml.lops.MapMult.CacheType in project incubator-systemml by apache.
the class MapmmSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
CacheType type = _type;
String rddVar = type.isRight() ? input1.getName() : input2.getName();
String bcastVar = type.isRight() ? input2.getName() : input1.getName();
MatrixCharacteristics mcRdd = sec.getMatrixCharacteristics(rddVar);
MatrixCharacteristics mcBc = sec.getMatrixCharacteristics(bcastVar);
// get input rdd
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
// inputs - is required to ensure moderately sized output partitions (2GB limitation)
if (requiresFlatMapFunction(type, mcBc) && requiresRepartitioning(type, mcRdd, mcBc, in1.getNumPartitions())) {
int numParts = getNumRepartitioning(type, mcRdd, mcBc);
int numParts2 = getNumRepartitioning(type.getFlipped(), mcBc, mcRdd);
if (numParts2 > numParts) {
// flip required
type = type.getFlipped();
rddVar = type.isRight() ? input1.getName() : input2.getName();
bcastVar = type.isRight() ? input2.getName() : input1.getName();
mcRdd = sec.getMatrixCharacteristics(rddVar);
mcBc = sec.getMatrixCharacteristics(bcastVar);
in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
LOG.warn("Mapmm: Switching rdd ('" + bcastVar + "') and broadcast ('" + rddVar + "') inputs " + "for repartitioning because this allows better control of output partition " + "sizes (" + numParts + " < " + numParts2 + ").");
}
}
// get inputs
PartitionedBroadcast<MatrixBlock> in2 = sec.getBroadcastForVariable(bcastVar);
// empty input block filter
if (!_outputEmpty)
in1 = in1.filter(new FilterNonEmptyBlocksFunction());
// execute mapmm and aggregation if necessary and put output into symbol table
if (_aggtype == SparkAggType.SINGLE_BLOCK) {
JavaRDD<MatrixBlock> out = in1.map(new RDDMapMMFunction2(type, in2));
MatrixBlock out2 = RDDAggregateUtils.sumStable(out);
// put output block into symbol table (no lineage because single block)
// this also includes implicit maintenance of matrix characteristics
sec.setMatrixOutput(output.getName(), out2, getExtendedOpcode());
} else // MULTI_BLOCK or NONE
{
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
if (requiresFlatMapFunction(type, mcBc)) {
if (requiresRepartitioning(type, mcRdd, mcBc, in1.getNumPartitions())) {
int numParts = getNumRepartitioning(type, mcRdd, mcBc);
LOG.warn("Mapmm: Repartition input rdd '" + rddVar + "' from " + in1.getNumPartitions() + " to " + numParts + " partitions to satisfy size restrictions of output partitions.");
in1 = in1.repartition(numParts);
}
out = in1.flatMapToPair(new RDDFlatMapMMFunction(type, in2));
} else if (preservesPartitioning(mcRdd, type))
out = in1.mapPartitionsToPair(new RDDMapMMPartitionFunction(type, in2), true);
else
out = in1.mapToPair(new RDDMapMMFunction(type, in2));
// empty output block filter
if (!_outputEmpty)
out = out.filter(new FilterNonEmptyBlocksFunction());
if (_aggtype == SparkAggType.MULTI_BLOCK)
out = RDDAggregateUtils.sumByKeyStable(out, false);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddVar);
sec.addLineageBroadcast(output.getName(), bcastVar);
// update output statistics if not inferred
updateBinaryMMOutputMatrixCharacteristics(sec, true);
}
}
use of org.apache.sysml.lops.MapMult.CacheType in project incubator-systemml by apache.
the class PmmSPInstruction method parseInstruction.
public static PmmSPInstruction parseInstruction(String str) {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = InstructionUtils.getOpCode(str);
if (opcode.equalsIgnoreCase(PMMJ.OPCODE)) {
CPOperand in1 = new CPOperand(parts[1]);
CPOperand in2 = new CPOperand(parts[2]);
CPOperand nrow = new CPOperand(parts[3]);
CPOperand out = new CPOperand(parts[4]);
CacheType type = CacheType.valueOf(parts[5]);
AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
AggregateBinaryOperator aggbin = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
return new PmmSPInstruction(aggbin, in1, in2, out, nrow, type, opcode, str);
} else {
throw new DMLRuntimeException("PmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
}
}
use of org.apache.sysml.lops.MapMult.CacheType in project systemml by apache.
the class PmmSPInstruction method parseInstruction.
public static PmmSPInstruction parseInstruction(String str) {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = InstructionUtils.getOpCode(str);
if (opcode.equalsIgnoreCase(PMMJ.OPCODE)) {
CPOperand in1 = new CPOperand(parts[1]);
CPOperand in2 = new CPOperand(parts[2]);
CPOperand nrow = new CPOperand(parts[3]);
CPOperand out = new CPOperand(parts[4]);
CacheType type = CacheType.valueOf(parts[5]);
AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
AggregateBinaryOperator aggbin = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
return new PmmSPInstruction(aggbin, in1, in2, out, nrow, type, opcode, str);
} else {
throw new DMLRuntimeException("PmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
}
}
use of org.apache.sysml.lops.MapMult.CacheType in project incubator-systemml by apache.
the class MapmmSPInstruction method parseInstruction.
public static MapmmSPInstruction parseInstruction(String str) {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = parts[0];
if (!opcode.equalsIgnoreCase(MapMult.OPCODE))
throw new DMLRuntimeException("MapmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
CPOperand in1 = new CPOperand(parts[1]);
CPOperand in2 = new CPOperand(parts[2]);
CPOperand out = new CPOperand(parts[3]);
CacheType type = CacheType.valueOf(parts[4]);
boolean outputEmpty = Boolean.parseBoolean(parts[5]);
SparkAggType aggtype = SparkAggType.valueOf(parts[6]);
AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
AggregateBinaryOperator aggbin = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
return new MapmmSPInstruction(aggbin, in1, in2, out, type, outputEmpty, aggtype, opcode, str);
}
use of org.apache.sysml.lops.MapMult.CacheType in project systemml by apache.
the class MapmmSPInstruction method parseInstruction.
public static MapmmSPInstruction parseInstruction(String str) {
String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
String opcode = parts[0];
if (!opcode.equalsIgnoreCase(MapMult.OPCODE))
throw new DMLRuntimeException("MapmmSPInstruction.parseInstruction():: Unknown opcode " + opcode);
CPOperand in1 = new CPOperand(parts[1]);
CPOperand in2 = new CPOperand(parts[2]);
CPOperand out = new CPOperand(parts[3]);
CacheType type = CacheType.valueOf(parts[4]);
boolean outputEmpty = Boolean.parseBoolean(parts[5]);
SparkAggType aggtype = SparkAggType.valueOf(parts[6]);
AggregateOperator agg = new AggregateOperator(0, Plus.getPlusFnObject());
AggregateBinaryOperator aggbin = new AggregateBinaryOperator(Multiply.getMultiplyFnObject(), agg);
return new MapmmSPInstruction(aggbin, in1, in2, out, type, outputEmpty, aggtype, opcode, str);
}
Aggregations