use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.
the class AggUnaryOp method constructLopsTernaryAggregateRewrite.
private Lop constructLopsTernaryAggregateRewrite(ExecType et) {
BinaryOp input1 = (BinaryOp) getInput().get(0);
Hop input11 = input1.getInput().get(0);
Hop input12 = input1.getInput().get(1);
Lop in1 = null, in2 = null, in3 = null;
boolean handled = false;
if (input1.getOp() == OpOp2.POW) {
assert (HopRewriteUtils.isLiteralOfValue(input12, 3)) : "this case can only occur with a power of 3";
in1 = input11.constructLops();
in2 = in1;
in3 = in1;
handled = true;
} else if (input11 instanceof BinaryOp) {
BinaryOp b11 = (BinaryOp) input11;
switch(b11.getOp()) {
case // A*B*C case
MULT:
in1 = input11.getInput().get(0).constructLops();
in2 = input11.getInput().get(1).constructLops();
in3 = input12.constructLops();
handled = true;
break;
case // A*A*B case
POW:
Hop b112 = b11.getInput().get(1);
if (!(input12 instanceof BinaryOp && ((BinaryOp) input12).getOp() == OpOp2.MULT) && HopRewriteUtils.isLiteralOfValue(b112, 2)) {
in1 = b11.getInput().get(0).constructLops();
in2 = in1;
in3 = input12.constructLops();
handled = true;
}
break;
default:
break;
}
} else if (input12 instanceof BinaryOp) {
BinaryOp b12 = (BinaryOp) input12;
switch(b12.getOp()) {
case // A*B*C case
MULT:
in1 = input11.constructLops();
in2 = input12.getInput().get(0).constructLops();
in3 = input12.getInput().get(1).constructLops();
handled = true;
break;
case // A*B*B case
POW:
Hop b112 = b12.getInput().get(1);
if (HopRewriteUtils.isLiteralOfValue(b112, 2)) {
in1 = b12.getInput().get(0).constructLops();
in2 = in1;
in3 = input11.constructLops();
handled = true;
}
break;
default:
break;
}
}
if (!handled) {
in1 = input11.constructLops();
in2 = input12.constructLops();
in3 = new LiteralOp(1).constructLops();
}
// create new ternary aggregate operator
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
// The execution type of a unary aggregate instruction should depend on the execution type of inputs to avoid OOM
// Since we only support matrix-vector and not vector-matrix, checking the execution type of input1 should suffice.
ExecType et_input = input1.optFindExecType();
// Because ternary aggregate are not supported on GPU
et_input = et_input == ExecType.GPU ? ExecType.CP : et_input;
DirectionTypes dir = HopsDirection2Lops.get(_direction);
return new TernaryAggregate(in1, in2, in3, Aggregate.OperationTypes.KahanSum, Binary.OperationTypes.MULTIPLY, dir, getDataType(), ValueType.DOUBLE, et_input, k);
}
use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.
the class BinaryOp method constructLopsBinaryDefault.
private void constructLopsBinaryDefault() {
/* Default behavior for BinaryOp */
// it depends on input data types
DataType dt1 = getInput().get(0).getDataType();
DataType dt2 = getInput().get(1).getDataType();
if (dt1 == dt2 && dt1 == DataType.SCALAR) {
// Both operands scalar
BinaryScalar binScalar1 = new BinaryScalar(getInput().get(0).constructLops(), getInput().get(1).constructLops(), HopsOpOp2LopsBS.get(op), getDataType(), getValueType());
binScalar1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(binScalar1);
setLops(binScalar1);
} else if ((dt1 == DataType.MATRIX && dt2 == DataType.SCALAR) || (dt1 == DataType.SCALAR && dt2 == DataType.MATRIX)) {
// One operand is Matrix and the other is scalar
ExecType et = optFindExecType();
// select specific operator implementations
Unary.OperationTypes ot = null;
Hop right = getInput().get(1);
if (op == OpOp2.POW && right instanceof LiteralOp && ((LiteralOp) right).getDoubleValue() == 2.0)
ot = Unary.OperationTypes.POW2;
else if (op == OpOp2.MULT && right instanceof LiteralOp && ((LiteralOp) right).getDoubleValue() == 2.0)
ot = Unary.OperationTypes.MULTIPLY2;
else
// general case
ot = HopsOpOp2LopsU.get(op);
Unary unary1 = new Unary(getInput().get(0).constructLops(), getInput().get(1).constructLops(), ot, getDataType(), getValueType(), et);
setOutputDimensions(unary1);
setLineNumbers(unary1);
setLops(unary1);
} else {
// Both operands are Matrixes
ExecType et = optFindExecType();
boolean isGPUSoftmax = et == ExecType.GPU && op == Hop.OpOp2.DIV && getInput().get(0) instanceof UnaryOp && getInput().get(1) instanceof AggUnaryOp && ((UnaryOp) getInput().get(0)).getOp() == OpOp1.EXP && ((AggUnaryOp) getInput().get(1)).getOp() == AggOp.SUM && ((AggUnaryOp) getInput().get(1)).getDirection() == Direction.Row && getInput().get(0) == getInput().get(1).getInput().get(0);
if (isGPUSoftmax) {
UnaryCP softmax = new UnaryCP(getInput().get(0).getInput().get(0).constructLops(), UnaryCP.OperationTypes.SOFTMAX, getDataType(), getValueType(), et);
setOutputDimensions(softmax);
setLineNumbers(softmax);
setLops(softmax);
} else if (et == ExecType.CP || et == ExecType.GPU) {
Lop binary = null;
boolean isLeftXGt = (getInput().get(0) instanceof BinaryOp) && ((BinaryOp) getInput().get(0)).getOp() == OpOp2.GREATER;
Hop potentialZero = isLeftXGt ? ((BinaryOp) getInput().get(0)).getInput().get(1) : null;
boolean isLeftXGt0 = isLeftXGt && potentialZero != null && potentialZero instanceof LiteralOp && ((LiteralOp) potentialZero).getDoubleValue() == 0;
if (op == OpOp2.MULT && isLeftXGt0 && !getInput().get(0).isVector() && !getInput().get(1).isVector() && getInput().get(0).dimsKnown() && getInput().get(1).dimsKnown()) {
binary = new ConvolutionTransform(getInput().get(0).getInput().get(0).constructLops(), getInput().get(1).constructLops(), ConvolutionTransform.OperationTypes.RELU_BACKWARD, getDataType(), getValueType(), et, -1);
} else
binary = new Binary(getInput().get(0).constructLops(), getInput().get(1).constructLops(), HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et);
setOutputDimensions(binary);
setLineNumbers(binary);
setLops(binary);
} else if (et == ExecType.SPARK) {
Hop left = getInput().get(0);
Hop right = getInput().get(1);
MMBinaryMethod mbin = optFindMMBinaryMethodSpark(left, right);
Lop binary = null;
if (mbin == MMBinaryMethod.MR_BINARY_UAGG_CHAIN) {
AggUnaryOp uRight = (AggUnaryOp) right;
binary = new BinaryUAggChain(left.constructLops(), HopsOpOp2LopsB.get(op), HopsAgg2Lops.get(uRight.getOp()), HopsDirection2Lops.get(uRight.getDirection()), getDataType(), getValueType(), et);
} else if (mbin == MMBinaryMethod.MR_BINARY_M) {
boolean partitioned = false;
boolean isColVector = (right.getDim2() == 1 && left.getDim1() == right.getDim1());
binary = new BinaryM(left.constructLops(), right.constructLops(), HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et, partitioned, isColVector);
} else {
binary = new Binary(left.constructLops(), right.constructLops(), HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et);
}
setOutputDimensions(binary);
setLineNumbers(binary);
setLops(binary);
} else // MR
{
Hop left = getInput().get(0);
Hop right = getInput().get(1);
MMBinaryMethod mbin = optFindMMBinaryMethod(left, right);
if (mbin == MMBinaryMethod.MR_BINARY_M) {
boolean needPart = requiresPartitioning(right);
Lop dcInput = right.constructLops();
if (needPart) {
// right side in distributed cache
ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(right.getDim1(), right.getDim2(), OptimizerUtils.getSparsity(right.getDim1(), right.getDim2(), right.getNnz())) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : // operator selection
ExecType.MR;
dcInput = new DataPartition(dcInput, DataType.MATRIX, ValueType.DOUBLE, etPart, (right.getDim2() == 1) ? PDataPartitionFormat.ROW_BLOCK_WISE_N : PDataPartitionFormat.COLUMN_BLOCK_WISE_N);
dcInput.getOutputParameters().setDimensions(right.getDim1(), right.getDim2(), right.getRowsInBlock(), right.getColsInBlock(), right.getNnz());
dcInput.setAllPositions(right.getFilename(), right.getBeginLine(), right.getBeginColumn(), right.getEndLine(), right.getEndColumn());
}
BinaryM binary = new BinaryM(left.constructLops(), dcInput, HopsOpOp2LopsB.get(op), getDataType(), getValueType(), ExecType.MR, needPart, (right.getDim2() == 1 && left.getDim1() == right.getDim1()));
setOutputDimensions(binary);
setLineNumbers(binary);
setLops(binary);
} else if (mbin == MMBinaryMethod.MR_BINARY_UAGG_CHAIN) {
AggUnaryOp uRight = (AggUnaryOp) right;
BinaryUAggChain bin = new BinaryUAggChain(left.constructLops(), HopsOpOp2LopsB.get(op), HopsAgg2Lops.get(uRight.getOp()), HopsDirection2Lops.get(uRight.getDirection()), getDataType(), getValueType(), et);
setOutputDimensions(bin);
setLineNumbers(bin);
setLops(bin);
} else if (mbin == MMBinaryMethod.MR_BINARY_OUTER_R) {
boolean requiresRepLeft = (!right.dimsKnown() || right.getDim2() > right.getColsInBlock());
boolean requiresRepRight = (!left.dimsKnown() || left.getDim1() > right.getRowsInBlock());
Lop leftLop = left.constructLops();
Lop rightLop = right.constructLops();
if (requiresRepLeft) {
// ncol of right determines rep of left
Lop offset = createOffsetLop(right, true);
leftLop = new RepMat(leftLop, offset, true, left.getDataType(), left.getValueType());
setOutputDimensions(leftLop);
setLineNumbers(leftLop);
}
if (requiresRepRight) {
// nrow of right determines rep of right
Lop offset = createOffsetLop(left, false);
rightLop = new RepMat(rightLop, offset, false, right.getDataType(), right.getValueType());
setOutputDimensions(rightLop);
setLineNumbers(rightLop);
}
Group group1 = new Group(leftLop, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(group1);
setOutputDimensions(group1);
Group group2 = new Group(rightLop, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(group2);
setOutputDimensions(group2);
Binary binary = new Binary(group1, group2, HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et);
setOutputDimensions(binary);
setLineNumbers(binary);
setLops(binary);
} else // MMBinaryMethod.MR_BINARY_R
{
boolean requiresRep = requiresReplication(left, right);
Lop rightLop = right.constructLops();
if (requiresRep) {
// ncol of left input (determines num replicates)
Lop offset = createOffsetLop(left, (right.getDim2() <= 1));
rightLop = new RepMat(rightLop, offset, (right.getDim2() <= 1), right.getDataType(), right.getValueType());
setOutputDimensions(rightLop);
setLineNumbers(rightLop);
}
Group group1 = new Group(getInput().get(0).constructLops(), Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(group1);
setOutputDimensions(group1);
Group group2 = new Group(rightLop, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(group2);
setOutputDimensions(group2);
Binary binary = new Binary(group1, group2, HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et);
setLineNumbers(binary);
setOutputDimensions(binary);
setLops(binary);
}
}
}
}
use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.
the class BinaryOp method constructMRAppendLop.
/**
* General case binary append.
*
* @param left high-level operator left
* @param right high-level operator right
* @param dt data type
* @param vt value type
* @param cbind true if cbind
* @param current current high-level operator
* @return low-level operator
*/
public static Lop constructMRAppendLop(Hop left, Hop right, DataType dt, ValueType vt, boolean cbind, Hop current) {
Lop ret = null;
long m1_dim1 = left.getDim1();
long m1_dim2 = left.getDim2();
long m2_dim1 = right.getDim1();
long m2_dim2 = right.getDim2();
// output rows
long m3_dim1 = cbind ? m1_dim1 : ((m1_dim1 >= 0 && m2_dim1 >= 0) ? (m1_dim1 + m2_dim1) : -1);
// output cols
long m3_dim2 = cbind ? ((m1_dim2 >= 0 && m2_dim2 >= 0) ? (m1_dim2 + m2_dim2) : -1) : m1_dim2;
// output nnz
long m3_nnz = (left.getNnz() > 0 && right.getNnz() > 0) ? (left.getNnz() + right.getNnz()) : -1;
long brlen = left.getRowsInBlock();
long bclen = left.getColsInBlock();
// offset 1st input
Lop offset = createOffsetLop(left, cbind);
AppendMethod am = optFindAppendMethod(m1_dim1, m1_dim2, m2_dim1, m2_dim2, brlen, bclen, cbind);
switch(am) {
case // special case map-only append
MR_MAPPEND:
{
boolean needPart = requiresPartitioning(right);
// pre partitioning
Lop dcInput = right.constructLops();
if (needPart) {
// right side in distributed cache
ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(right.getDim1(), right.getDim2(), OptimizerUtils.getSparsity(right.getDim1(), right.getDim2(), right.getNnz())) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : // operator selection
ExecType.MR;
dcInput = new DataPartition(dcInput, DataType.MATRIX, ValueType.DOUBLE, etPart, PDataPartitionFormat.ROW_BLOCK_WISE_N);
dcInput.getOutputParameters().setDimensions(right.getDim1(), right.getDim2(), right.getRowsInBlock(), right.getColsInBlock(), right.getNnz());
dcInput.setAllPositions(right.getFilename(), right.getBeginLine(), right.getBeginColumn(), right.getEndLine(), right.getEndColumn());
}
AppendM appM = new AppendM(left.constructLops(), dcInput, offset, dt, vt, cbind, needPart, ExecType.MR);
appM.setAllPositions(current.getFilename(), current.getBeginLine(), current.getBeginColumn(), current.getEndLine(), current.getEndColumn());
appM.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
ret = appM;
break;
}
case // special case reduce append w/ one column block
MR_RAPPEND:
{
// group
Group group1 = new Group(left.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, vt);
group1.getOutputParameters().setDimensions(m1_dim1, m1_dim2, brlen, bclen, left.getNnz());
group1.setAllPositions(left.getFilename(), left.getBeginLine(), left.getBeginColumn(), left.getEndLine(), left.getEndColumn());
Group group2 = new Group(right.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, vt);
group1.getOutputParameters().setDimensions(m2_dim1, m2_dim2, brlen, bclen, right.getNnz());
group1.setAllPositions(right.getFilename(), right.getBeginLine(), right.getBeginColumn(), right.getEndLine(), right.getEndColumn());
AppendR appR = new AppendR(group1, group2, dt, vt, cbind, ExecType.MR);
appR.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
appR.setAllPositions(current.getFilename(), current.getBeginLine(), current.getBeginColumn(), current.getEndLine(), current.getEndColumn());
ret = appR;
break;
}
case MR_GAPPEND:
{
// general case: map expand append, reduce aggregate
// offset second input
Lop offset2 = createOffsetLop(right, cbind);
AppendG appG = new AppendG(left.constructLops(), right.constructLops(), offset, offset2, dt, vt, cbind, ExecType.MR);
appG.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
appG.setAllPositions(current.getFilename(), current.getBeginLine(), current.getBeginColumn(), current.getEndLine(), current.getEndColumn());
// group
Group group1 = new Group(appG, Group.OperationTypes.Sort, DataType.MATRIX, vt);
group1.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
group1.setAllPositions(current.getFilename(), current.getBeginLine(), current.getBeginColumn(), current.getEndLine(), current.getEndColumn());
// aggregate
Aggregate agg1 = new Aggregate(group1, Aggregate.OperationTypes.Sum, DataType.MATRIX, vt, ExecType.MR);
agg1.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
agg1.setAllPositions(current.getFilename(), current.getBeginLine(), current.getBeginColumn(), current.getEndLine(), current.getEndColumn());
ret = agg1;
break;
}
default:
throw new HopsException("Invalid MR append method: " + am);
}
return ret;
}
use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.
the class ConvolutionOp method constructConvolutionLops.
public Lop constructConvolutionLops(ExecType et, ArrayList<Hop> inputs) {
if (inputs.size() != getNumExpectedInputs())
throw new HopsException("Incorrect number of inputs for " + op.name());
// ---------------------------------------------------------------
// Deal with fused operators and contruct lhsInputLop/optionalRhsInputLop
Lop lhsInputLop = null;
Lop optionalRhsInputLop = null;
ArrayList<Hop> inputsOfPotentiallyFusedOp = inputs;
OperationTypes lopOp = HopsConv2Lops.get(op);
// RELU_MAX_POOLING and RELU_MAX_POOLING_BACKWARD is extremely useful for CP backend
// by reducing unnecessary sparse-to-dense-to-sparse conversion.
// For other backends, this operators is not necessary as it reduces an additional relu operator.
Hop parentReLU = isInputReLU(inputs.get(0));
if (OptimizerUtils.ALLOW_OPERATOR_FUSION && et == ExecType.CP && op == ConvOp.MAX_POOLING && parentReLU != null) {
lhsInputLop = parentReLU.constructLops();
lopOp = OperationTypes.RELU_MAX_POOLING;
} else if (OptimizerUtils.ALLOW_OPERATOR_FUSION && et == ExecType.CP && op == ConvOp.MAX_POOLING_BACKWARD && parentReLU != null) {
lhsInputLop = parentReLU.constructLops();
lopOp = OperationTypes.RELU_MAX_POOLING_BACKWARD;
} else if (OptimizerUtils.ALLOW_OPERATOR_FUSION && op == ConvOp.BIAS_ADD && isInputConv2d(inputs.get(0))) {
lopOp = OperationTypes.DIRECT_CONV2D_BIAS_ADD;
// the first lop is image
lhsInputLop = inputs.get(0).getInput().get(0).constructLops();
// the second lop is bias
optionalRhsInputLop = inputs.get(1).constructLops();
// Use the inputs from conv2d rather than bias_add
inputsOfPotentiallyFusedOp = inputs.get(0).getInput();
} else {
lhsInputLop = inputs.get(0).constructLops();
}
// ---------------------------------------------------------------
// ---------------------------------------------------------------
// Compute intermediate memory budget that can be passed to GPU operators
// for better CuDNN operator selection at runtime
double intermediateMemEstimate = computeIntermediateMemEstimate(-1, -1, -1);
if (et == ExecType.GPU && _dim1 >= 0 && _dim2 >= 0) {
// This enables us to compile more efficient matrix-matrix CuDNN operation instead of
// row-by-row invocation of multiple vector-matrix CuDNN operations.
// This is possible as the operations on GPU are single-threaded
double optimisticIntermediateMemEstimate = GPUContextPool.initialGPUMemBudget() - getOutputMemEstimate() - inputs.get(0).getOutputMemEstimate();
if (optionalRhsInputLop != null) {
optimisticIntermediateMemEstimate -= inputs.get(1).getOutputMemEstimate();
}
intermediateMemEstimate = Math.max(intermediateMemEstimate, optimisticIntermediateMemEstimate);
}
// ---------------------------------------------------------------
// Contruct the lop
Lop optionalMaxPoolOutput = (et == ExecType.GPU) ? getMaxPoolOutputLop() : null;
Lop[] l2inputs = new Lop[inputsOfPotentiallyFusedOp.size() - 1];
for (int i = 1; i < inputsOfPotentiallyFusedOp.size(); i++) l2inputs[i - 1] = inputsOfPotentiallyFusedOp.get(i).constructLops();
ConvolutionTransform convolutionLop = new ConvolutionTransform(lhsInputLop, lopOp, getDataType(), getValueType(), et, OptimizerUtils.getConstrainedNumThreads(_maxNumThreads), intermediateMemEstimate);
setOutputDimensions(convolutionLop);
setLineNumbers(convolutionLop);
// ---------------------------------------------------------------
// Add input/output for parent lops of convolutionLop
lhsInputLop.addOutput(convolutionLop);
if (optionalRhsInputLop != null) {
convolutionLop.addInput(optionalRhsInputLop);
optionalRhsInputLop.addOutput(convolutionLop);
}
for (int i = 0; i < l2inputs.length; i++) {
convolutionLop.addInput(l2inputs[i]);
l2inputs[i].addOutput(convolutionLop);
}
// Only valid for MAX_POOLING_BACKWARD on GPU
if (optionalMaxPoolOutput != null) {
convolutionLop.addInput(optionalMaxPoolOutput);
optionalMaxPoolOutput.addOutput(convolutionLop);
}
convolutionLop.updateLopProperties();
return convolutionLop;
}
use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.
the class FunctionOp method constructLops.
@Override
public Lop constructLops() {
// return already created lops
if (getLops() != null)
return getLops();
ExecType et = optFindExecType();
// construct input lops (recursive)
ArrayList<Lop> tmp = new ArrayList<>();
for (Hop in : getInput()) tmp.add(in.constructLops());
// construct function call
Lop fcall = _singleOutFun ? new FunctionCallCPSingle(tmp, _fnamespace, _fname, et) : new FunctionCallCP(tmp, _fnamespace, _fname, _outputs, _outputHops, et);
setLineNumbers(fcall);
setLops(fcall);
return getLops();
}
Aggregations