Search in sources :

Example 1 with ConvolutionTransform

use of org.apache.sysml.lops.ConvolutionTransform in project incubator-systemml by apache.

the class AggUnaryOp method constructLops.

@Override
public Lop constructLops() {
    // return already created lops
    if (getLops() != null)
        return getLops();
    try {
        ExecType et = optFindExecType();
        Hop input = getInput().get(0);
        if (et == ExecType.CP || et == ExecType.GPU) {
            Lop agg1 = null;
            long numChannels = isChannelSumRewriteApplicable() ? Hop.computeSizeInformation(getInput().get(0).getInput().get(1)) : -1;
            if (numChannels > 0 && numChannels < 1000000) {
                // Apply channel sums only if rewrite is applicable and if the dimension of C is known at compile time
                // and if numChannels is less than 8 MB.
                ReorgOp in = ((ReorgOp) getInput().get(0));
                agg1 = new ConvolutionTransform(in.getInput().get(0).getInput().get(0).constructLops(), in.getInput().get(1).constructLops(), in.getInput().get(2).constructLops(), ConvolutionTransform.OperationTypes.CHANNEL_SUMS, getDataType(), getValueType(), et, -1);
                agg1.getOutputParameters().setDimensions(numChannels, 1, getRowsInBlock(), getColsInBlock(), -1);
                setLineNumbers(agg1);
                setLops(agg1);
            } else {
                if (isTernaryAggregateRewriteApplicable()) {
                    agg1 = constructLopsTernaryAggregateRewrite(et);
                } else if (isUnaryAggregateOuterCPRewriteApplicable()) {
                    OperationTypes op = HopsAgg2Lops.get(_op);
                    DirectionTypes dir = HopsDirection2Lops.get(_direction);
                    BinaryOp binput = (BinaryOp) getInput().get(0);
                    agg1 = new UAggOuterChain(binput.getInput().get(0).constructLops(), binput.getInput().get(1).constructLops(), op, dir, HopsOpOp2LopsB.get(binput.getOp()), DataType.MATRIX, getValueType(), ExecType.CP);
                    PartialAggregate.setDimensionsBasedOnDirection(agg1, getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), dir);
                    if (getDataType() == DataType.SCALAR) {
                        UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
                        unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
                        setLineNumbers(unary1);
                        setLops(unary1);
                    }
                } else {
                    // general case
                    int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
                    agg1 = new PartialAggregate(input.constructLops(), HopsAgg2Lops.get(_op), HopsDirection2Lops.get(_direction), getDataType(), getValueType(), et, k);
                }
                setOutputDimensions(agg1);
                setLineNumbers(agg1);
                setLops(agg1);
                if (getDataType() == DataType.SCALAR) {
                    agg1.getOutputParameters().setDimensions(1, 1, getRowsInBlock(), getColsInBlock(), getNnz());
                }
            }
        } else if (et == ExecType.MR) {
            OperationTypes op = HopsAgg2Lops.get(_op);
            DirectionTypes dir = HopsDirection2Lops.get(_direction);
            // unary aggregate operation
            Lop transform1 = null;
            if (isUnaryAggregateOuterRewriteApplicable()) {
                BinaryOp binput = (BinaryOp) getInput().get(0);
                transform1 = new UAggOuterChain(binput.getInput().get(0).constructLops(), binput.getInput().get(1).constructLops(), op, dir, HopsOpOp2LopsB.get(binput.getOp()), DataType.MATRIX, getValueType(), ExecType.MR);
                PartialAggregate.setDimensionsBasedOnDirection(transform1, getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), dir);
            } else // default
            {
                transform1 = new PartialAggregate(input.constructLops(), op, dir, DataType.MATRIX, getValueType());
                ((PartialAggregate) transform1).setDimensionsBasedOnDirection(getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock());
            }
            setLineNumbers(transform1);
            // aggregation if required
            Lop aggregate = null;
            Group group1 = null;
            Aggregate agg1 = null;
            if (requiresAggregation(input, _direction) || transform1 instanceof UAggOuterChain) {
                group1 = new Group(transform1, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
                group1.getOutputParameters().setDimensions(getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), getNnz());
                setLineNumbers(group1);
                agg1 = new Aggregate(group1, HopsAgg2Lops.get(_op), DataType.MATRIX, getValueType(), et);
                agg1.getOutputParameters().setDimensions(getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), getNnz());
                agg1.setupCorrectionLocation(PartialAggregate.getCorrectionLocation(op, dir));
                setLineNumbers(agg1);
                aggregate = agg1;
            } else {
                ((PartialAggregate) transform1).setDropCorrection();
                aggregate = transform1;
            }
            setLops(aggregate);
            // cast if required
            if (getDataType() == DataType.SCALAR) {
                // Set the dimensions of PartialAggregate LOP based on the
                // direction in which aggregation is performed
                PartialAggregate.setDimensionsBasedOnDirection(transform1, input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), dir);
                if (group1 != null && agg1 != null) {
                    // if aggregation required
                    group1.getOutputParameters().setDimensions(input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), getNnz());
                    agg1.getOutputParameters().setDimensions(1, 1, input.getRowsInBlock(), input.getColsInBlock(), getNnz());
                }
                UnaryCP unary1 = new UnaryCP(aggregate, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
                unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
                setLineNumbers(unary1);
                setLops(unary1);
            }
        } else if (et == ExecType.SPARK) {
            OperationTypes op = HopsAgg2Lops.get(_op);
            DirectionTypes dir = HopsDirection2Lops.get(_direction);
            // unary aggregate
            if (isTernaryAggregateRewriteApplicable()) {
                Lop aggregate = constructLopsTernaryAggregateRewrite(et);
                // 0x0 (scalar)
                setOutputDimensions(aggregate);
                setLineNumbers(aggregate);
                setLops(aggregate);
            } else if (isUnaryAggregateOuterSPRewriteApplicable()) {
                BinaryOp binput = (BinaryOp) getInput().get(0);
                Lop transform1 = new UAggOuterChain(binput.getInput().get(0).constructLops(), binput.getInput().get(1).constructLops(), op, dir, HopsOpOp2LopsB.get(binput.getOp()), DataType.MATRIX, getValueType(), ExecType.SPARK);
                PartialAggregate.setDimensionsBasedOnDirection(transform1, getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock(), dir);
                setLineNumbers(transform1);
                setLops(transform1);
                if (getDataType() == DataType.SCALAR) {
                    UnaryCP unary1 = new UnaryCP(transform1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
                    unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
                    setLineNumbers(unary1);
                    setLops(unary1);
                }
            } else // default
            {
                boolean needAgg = requiresAggregation(input, _direction);
                SparkAggType aggtype = getSparkUnaryAggregationType(needAgg);
                PartialAggregate aggregate = new PartialAggregate(input.constructLops(), HopsAgg2Lops.get(_op), HopsDirection2Lops.get(_direction), DataType.MATRIX, getValueType(), aggtype, et);
                aggregate.setDimensionsBasedOnDirection(getDim1(), getDim2(), input.getRowsInBlock(), input.getColsInBlock());
                setLineNumbers(aggregate);
                setLops(aggregate);
                if (getDataType() == DataType.SCALAR) {
                    UnaryCP unary1 = new UnaryCP(aggregate, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
                    unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
                    setLineNumbers(unary1);
                    setLops(unary1);
                }
            }
        }
    } catch (Exception e) {
        throw new HopsException(this.printErrorLocation() + "In AggUnary Hop, error constructing Lops ", e);
    }
    // add reblock/checkpoint lops if necessary
    constructAndSetLopsDataFlowProperties();
    // return created lops
    return getLops();
}
Also used : PartialAggregate(org.apache.sysml.lops.PartialAggregate) Group(org.apache.sysml.lops.Group) SparkAggType(org.apache.sysml.hops.AggBinaryOp.SparkAggType) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Lop(org.apache.sysml.lops.Lop) UAggOuterChain(org.apache.sysml.lops.UAggOuterChain) UnaryCP(org.apache.sysml.lops.UnaryCP) OperationTypes(org.apache.sysml.lops.Aggregate.OperationTypes) DirectionTypes(org.apache.sysml.lops.PartialAggregate.DirectionTypes) ExecType(org.apache.sysml.lops.LopProperties.ExecType) ConvolutionTransform(org.apache.sysml.lops.ConvolutionTransform) PartialAggregate(org.apache.sysml.lops.PartialAggregate) TernaryAggregate(org.apache.sysml.lops.TernaryAggregate) Aggregate(org.apache.sysml.lops.Aggregate)

Example 2 with ConvolutionTransform

use of org.apache.sysml.lops.ConvolutionTransform in project systemml by apache.

the class BinaryOp method constructLopsBinaryDefault.

private void constructLopsBinaryDefault() {
    /* Default behavior for BinaryOp */
    // it depends on input data types
    DataType dt1 = getInput().get(0).getDataType();
    DataType dt2 = getInput().get(1).getDataType();
    if (dt1 == dt2 && dt1 == DataType.SCALAR) {
        // Both operands scalar
        BinaryScalar binScalar1 = new BinaryScalar(getInput().get(0).constructLops(), getInput().get(1).constructLops(), HopsOpOp2LopsBS.get(op), getDataType(), getValueType());
        binScalar1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(binScalar1);
        setLops(binScalar1);
    } else if ((dt1 == DataType.MATRIX && dt2 == DataType.SCALAR) || (dt1 == DataType.SCALAR && dt2 == DataType.MATRIX)) {
        // One operand is Matrix and the other is scalar
        ExecType et = optFindExecType();
        // select specific operator implementations
        Unary.OperationTypes ot = null;
        Hop right = getInput().get(1);
        if (op == OpOp2.POW && right instanceof LiteralOp && ((LiteralOp) right).getDoubleValue() == 2.0)
            ot = Unary.OperationTypes.POW2;
        else if (op == OpOp2.MULT && right instanceof LiteralOp && ((LiteralOp) right).getDoubleValue() == 2.0)
            ot = Unary.OperationTypes.MULTIPLY2;
        else
            // general case
            ot = HopsOpOp2LopsU.get(op);
        Unary unary1 = new Unary(getInput().get(0).constructLops(), getInput().get(1).constructLops(), ot, getDataType(), getValueType(), et);
        setOutputDimensions(unary1);
        setLineNumbers(unary1);
        setLops(unary1);
    } else {
        // Both operands are Matrixes
        ExecType et = optFindExecType();
        boolean isGPUSoftmax = et == ExecType.GPU && op == Hop.OpOp2.DIV && getInput().get(0) instanceof UnaryOp && getInput().get(1) instanceof AggUnaryOp && ((UnaryOp) getInput().get(0)).getOp() == OpOp1.EXP && ((AggUnaryOp) getInput().get(1)).getOp() == AggOp.SUM && ((AggUnaryOp) getInput().get(1)).getDirection() == Direction.Row && getInput().get(0) == getInput().get(1).getInput().get(0);
        if (isGPUSoftmax) {
            UnaryCP softmax = new UnaryCP(getInput().get(0).getInput().get(0).constructLops(), UnaryCP.OperationTypes.SOFTMAX, getDataType(), getValueType(), et);
            setOutputDimensions(softmax);
            setLineNumbers(softmax);
            setLops(softmax);
        } else if (et == ExecType.CP || et == ExecType.GPU) {
            Lop binary = null;
            boolean isLeftXGt = (getInput().get(0) instanceof BinaryOp) && ((BinaryOp) getInput().get(0)).getOp() == OpOp2.GREATER;
            Hop potentialZero = isLeftXGt ? ((BinaryOp) getInput().get(0)).getInput().get(1) : null;
            boolean isLeftXGt0 = isLeftXGt && potentialZero != null && potentialZero instanceof LiteralOp && ((LiteralOp) potentialZero).getDoubleValue() == 0;
            if (op == OpOp2.MULT && isLeftXGt0 && !getInput().get(0).isVector() && !getInput().get(1).isVector() && getInput().get(0).dimsKnown() && getInput().get(1).dimsKnown()) {
                binary = new ConvolutionTransform(getInput().get(0).getInput().get(0).constructLops(), getInput().get(1).constructLops(), ConvolutionTransform.OperationTypes.RELU_BACKWARD, getDataType(), getValueType(), et, -1);
            } else
                binary = new Binary(getInput().get(0).constructLops(), getInput().get(1).constructLops(), HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et);
            setOutputDimensions(binary);
            setLineNumbers(binary);
            setLops(binary);
        } else if (et == ExecType.SPARK) {
            Hop left = getInput().get(0);
            Hop right = getInput().get(1);
            MMBinaryMethod mbin = optFindMMBinaryMethodSpark(left, right);
            Lop binary = null;
            if (mbin == MMBinaryMethod.MR_BINARY_UAGG_CHAIN) {
                AggUnaryOp uRight = (AggUnaryOp) right;
                binary = new BinaryUAggChain(left.constructLops(), HopsOpOp2LopsB.get(op), HopsAgg2Lops.get(uRight.getOp()), HopsDirection2Lops.get(uRight.getDirection()), getDataType(), getValueType(), et);
            } else if (mbin == MMBinaryMethod.MR_BINARY_M) {
                boolean partitioned = false;
                boolean isColVector = (right.getDim2() == 1 && left.getDim1() == right.getDim1());
                binary = new BinaryM(left.constructLops(), right.constructLops(), HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et, partitioned, isColVector);
            } else {
                binary = new Binary(left.constructLops(), right.constructLops(), HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et);
            }
            setOutputDimensions(binary);
            setLineNumbers(binary);
            setLops(binary);
        } else // MR
        {
            Hop left = getInput().get(0);
            Hop right = getInput().get(1);
            MMBinaryMethod mbin = optFindMMBinaryMethod(left, right);
            if (mbin == MMBinaryMethod.MR_BINARY_M) {
                boolean needPart = requiresPartitioning(right);
                Lop dcInput = right.constructLops();
                if (needPart) {
                    // right side in distributed cache
                    ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(right.getDim1(), right.getDim2(), OptimizerUtils.getSparsity(right.getDim1(), right.getDim2(), right.getNnz())) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : // operator selection
                    ExecType.MR;
                    dcInput = new DataPartition(dcInput, DataType.MATRIX, ValueType.DOUBLE, etPart, (right.getDim2() == 1) ? PDataPartitionFormat.ROW_BLOCK_WISE_N : PDataPartitionFormat.COLUMN_BLOCK_WISE_N);
                    dcInput.getOutputParameters().setDimensions(right.getDim1(), right.getDim2(), right.getRowsInBlock(), right.getColsInBlock(), right.getNnz());
                    dcInput.setAllPositions(right.getFilename(), right.getBeginLine(), right.getBeginColumn(), right.getEndLine(), right.getEndColumn());
                }
                BinaryM binary = new BinaryM(left.constructLops(), dcInput, HopsOpOp2LopsB.get(op), getDataType(), getValueType(), ExecType.MR, needPart, (right.getDim2() == 1 && left.getDim1() == right.getDim1()));
                setOutputDimensions(binary);
                setLineNumbers(binary);
                setLops(binary);
            } else if (mbin == MMBinaryMethod.MR_BINARY_UAGG_CHAIN) {
                AggUnaryOp uRight = (AggUnaryOp) right;
                BinaryUAggChain bin = new BinaryUAggChain(left.constructLops(), HopsOpOp2LopsB.get(op), HopsAgg2Lops.get(uRight.getOp()), HopsDirection2Lops.get(uRight.getDirection()), getDataType(), getValueType(), et);
                setOutputDimensions(bin);
                setLineNumbers(bin);
                setLops(bin);
            } else if (mbin == MMBinaryMethod.MR_BINARY_OUTER_R) {
                boolean requiresRepLeft = (!right.dimsKnown() || right.getDim2() > right.getColsInBlock());
                boolean requiresRepRight = (!left.dimsKnown() || left.getDim1() > right.getRowsInBlock());
                Lop leftLop = left.constructLops();
                Lop rightLop = right.constructLops();
                if (requiresRepLeft) {
                    // ncol of right determines rep of left
                    Lop offset = createOffsetLop(right, true);
                    leftLop = new RepMat(leftLop, offset, true, left.getDataType(), left.getValueType());
                    setOutputDimensions(leftLop);
                    setLineNumbers(leftLop);
                }
                if (requiresRepRight) {
                    // nrow of right determines rep of right
                    Lop offset = createOffsetLop(left, false);
                    rightLop = new RepMat(rightLop, offset, false, right.getDataType(), right.getValueType());
                    setOutputDimensions(rightLop);
                    setLineNumbers(rightLop);
                }
                Group group1 = new Group(leftLop, Group.OperationTypes.Sort, getDataType(), getValueType());
                setLineNumbers(group1);
                setOutputDimensions(group1);
                Group group2 = new Group(rightLop, Group.OperationTypes.Sort, getDataType(), getValueType());
                setLineNumbers(group2);
                setOutputDimensions(group2);
                Binary binary = new Binary(group1, group2, HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et);
                setOutputDimensions(binary);
                setLineNumbers(binary);
                setLops(binary);
            } else // MMBinaryMethod.MR_BINARY_R
            {
                boolean requiresRep = requiresReplication(left, right);
                Lop rightLop = right.constructLops();
                if (requiresRep) {
                    // ncol of left input (determines num replicates)
                    Lop offset = createOffsetLop(left, (right.getDim2() <= 1));
                    rightLop = new RepMat(rightLop, offset, (right.getDim2() <= 1), right.getDataType(), right.getValueType());
                    setOutputDimensions(rightLop);
                    setLineNumbers(rightLop);
                }
                Group group1 = new Group(getInput().get(0).constructLops(), Group.OperationTypes.Sort, getDataType(), getValueType());
                setLineNumbers(group1);
                setOutputDimensions(group1);
                Group group2 = new Group(rightLop, Group.OperationTypes.Sort, getDataType(), getValueType());
                setLineNumbers(group2);
                setOutputDimensions(group2);
                Binary binary = new Binary(group1, group2, HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et);
                setLineNumbers(binary);
                setOutputDimensions(binary);
                setLops(binary);
            }
        }
    }
}
Also used : Group(org.apache.sysml.lops.Group) BinaryM(org.apache.sysml.lops.BinaryM) BinaryUAggChain(org.apache.sysml.lops.BinaryUAggChain) Lop(org.apache.sysml.lops.Lop) Unary(org.apache.sysml.lops.Unary) CombineUnary(org.apache.sysml.lops.CombineUnary) UnaryCP(org.apache.sysml.lops.UnaryCP) RepMat(org.apache.sysml.lops.RepMat) OperationTypes(org.apache.sysml.lops.CombineBinary.OperationTypes) DataType(org.apache.sysml.parser.Expression.DataType) ExecType(org.apache.sysml.lops.LopProperties.ExecType) Binary(org.apache.sysml.lops.Binary) CombineBinary(org.apache.sysml.lops.CombineBinary) BinaryScalar(org.apache.sysml.lops.BinaryScalar) ConvolutionTransform(org.apache.sysml.lops.ConvolutionTransform) DataPartition(org.apache.sysml.lops.DataPartition)

Example 3 with ConvolutionTransform

use of org.apache.sysml.lops.ConvolutionTransform in project systemml by apache.

the class ConvolutionOp method constructConvolutionLops.

public Lop constructConvolutionLops(ExecType et, ArrayList<Hop> inputs) {
    if (inputs.size() != getNumExpectedInputs())
        throw new HopsException("Incorrect number of inputs for " + op.name());
    // ---------------------------------------------------------------
    // Deal with fused operators and contruct lhsInputLop/optionalRhsInputLop
    Lop lhsInputLop = null;
    Lop optionalRhsInputLop = null;
    ArrayList<Hop> inputsOfPotentiallyFusedOp = inputs;
    OperationTypes lopOp = HopsConv2Lops.get(op);
    // RELU_MAX_POOLING and RELU_MAX_POOLING_BACKWARD is extremely useful for CP backend
    // by reducing unnecessary sparse-to-dense-to-sparse conversion.
    // For other backends, this operators is not necessary as it reduces an additional relu operator.
    Hop parentReLU = isInputReLU(inputs.get(0));
    if (OptimizerUtils.ALLOW_OPERATOR_FUSION && et == ExecType.CP && op == ConvOp.MAX_POOLING && parentReLU != null) {
        lhsInputLop = parentReLU.constructLops();
        lopOp = OperationTypes.RELU_MAX_POOLING;
    } else if (OptimizerUtils.ALLOW_OPERATOR_FUSION && et == ExecType.CP && op == ConvOp.MAX_POOLING_BACKWARD && parentReLU != null) {
        lhsInputLop = parentReLU.constructLops();
        lopOp = OperationTypes.RELU_MAX_POOLING_BACKWARD;
    } else if (OptimizerUtils.ALLOW_OPERATOR_FUSION && op == ConvOp.BIAS_ADD && isInputConv2d(inputs.get(0))) {
        lopOp = OperationTypes.DIRECT_CONV2D_BIAS_ADD;
        // the first lop is image
        lhsInputLop = inputs.get(0).getInput().get(0).constructLops();
        // the second lop is bias
        optionalRhsInputLop = inputs.get(1).constructLops();
        // Use the inputs from conv2d rather than bias_add
        inputsOfPotentiallyFusedOp = inputs.get(0).getInput();
    } else {
        lhsInputLop = inputs.get(0).constructLops();
    }
    // ---------------------------------------------------------------
    // ---------------------------------------------------------------
    // Compute intermediate memory budget that can be passed to GPU operators
    // for better CuDNN operator selection at runtime
    double intermediateMemEstimate = computeIntermediateMemEstimate(-1, -1, -1);
    if (et == ExecType.GPU && _dim1 >= 0 && _dim2 >= 0) {
        // This enables us to compile more efficient matrix-matrix CuDNN operation instead of
        // row-by-row invocation of multiple vector-matrix CuDNN operations.
        // This is possible as the operations on GPU are single-threaded
        double optimisticIntermediateMemEstimate = GPUContextPool.initialGPUMemBudget() - getOutputMemEstimate() - inputs.get(0).getOutputMemEstimate();
        if (optionalRhsInputLop != null) {
            optimisticIntermediateMemEstimate -= inputs.get(1).getOutputMemEstimate();
        }
        intermediateMemEstimate = Math.max(intermediateMemEstimate, optimisticIntermediateMemEstimate);
    }
    // ---------------------------------------------------------------
    // Contruct the lop
    Lop optionalMaxPoolOutput = (et == ExecType.GPU) ? getMaxPoolOutputLop() : null;
    Lop[] l2inputs = new Lop[inputsOfPotentiallyFusedOp.size() - 1];
    for (int i = 1; i < inputsOfPotentiallyFusedOp.size(); i++) l2inputs[i - 1] = inputsOfPotentiallyFusedOp.get(i).constructLops();
    ConvolutionTransform convolutionLop = new ConvolutionTransform(lhsInputLop, lopOp, getDataType(), getValueType(), et, OptimizerUtils.getConstrainedNumThreads(_maxNumThreads), intermediateMemEstimate);
    setOutputDimensions(convolutionLop);
    setLineNumbers(convolutionLop);
    // ---------------------------------------------------------------
    // Add input/output for parent lops of convolutionLop
    lhsInputLop.addOutput(convolutionLop);
    if (optionalRhsInputLop != null) {
        convolutionLop.addInput(optionalRhsInputLop);
        optionalRhsInputLop.addOutput(convolutionLop);
    }
    for (int i = 0; i < l2inputs.length; i++) {
        convolutionLop.addInput(l2inputs[i]);
        l2inputs[i].addOutput(convolutionLop);
    }
    // Only valid for MAX_POOLING_BACKWARD on GPU
    if (optionalMaxPoolOutput != null) {
        convolutionLop.addInput(optionalMaxPoolOutput);
        optionalMaxPoolOutput.addOutput(convolutionLop);
    }
    convolutionLop.updateLopProperties();
    return convolutionLop;
}
Also used : OperationTypes(org.apache.sysml.lops.ConvolutionTransform.OperationTypes) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Lop(org.apache.sysml.lops.Lop) ConvolutionTransform(org.apache.sysml.lops.ConvolutionTransform)

Example 4 with ConvolutionTransform

use of org.apache.sysml.lops.ConvolutionTransform in project incubator-systemml by apache.

the class BinaryOp method constructLopsBinaryDefault.

private void constructLopsBinaryDefault() {
    /* Default behavior for BinaryOp */
    // it depends on input data types
    DataType dt1 = getInput().get(0).getDataType();
    DataType dt2 = getInput().get(1).getDataType();
    if (dt1 == dt2 && dt1 == DataType.SCALAR) {
        // Both operands scalar
        BinaryScalar binScalar1 = new BinaryScalar(getInput().get(0).constructLops(), getInput().get(1).constructLops(), HopsOpOp2LopsBS.get(op), getDataType(), getValueType());
        binScalar1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
        setLineNumbers(binScalar1);
        setLops(binScalar1);
    } else if ((dt1 == DataType.MATRIX && dt2 == DataType.SCALAR) || (dt1 == DataType.SCALAR && dt2 == DataType.MATRIX)) {
        // One operand is Matrix and the other is scalar
        ExecType et = optFindExecType();
        // select specific operator implementations
        Unary.OperationTypes ot = null;
        Hop right = getInput().get(1);
        if (op == OpOp2.POW && right instanceof LiteralOp && ((LiteralOp) right).getDoubleValue() == 2.0)
            ot = Unary.OperationTypes.POW2;
        else if (op == OpOp2.MULT && right instanceof LiteralOp && ((LiteralOp) right).getDoubleValue() == 2.0)
            ot = Unary.OperationTypes.MULTIPLY2;
        else
            // general case
            ot = HopsOpOp2LopsU.get(op);
        Unary unary1 = new Unary(getInput().get(0).constructLops(), getInput().get(1).constructLops(), ot, getDataType(), getValueType(), et);
        setOutputDimensions(unary1);
        setLineNumbers(unary1);
        setLops(unary1);
    } else {
        // Both operands are Matrixes
        ExecType et = optFindExecType();
        boolean isGPUSoftmax = et == ExecType.GPU && op == Hop.OpOp2.DIV && getInput().get(0) instanceof UnaryOp && getInput().get(1) instanceof AggUnaryOp && ((UnaryOp) getInput().get(0)).getOp() == OpOp1.EXP && ((AggUnaryOp) getInput().get(1)).getOp() == AggOp.SUM && ((AggUnaryOp) getInput().get(1)).getDirection() == Direction.Row && getInput().get(0) == getInput().get(1).getInput().get(0);
        if (isGPUSoftmax) {
            UnaryCP softmax = new UnaryCP(getInput().get(0).getInput().get(0).constructLops(), UnaryCP.OperationTypes.SOFTMAX, getDataType(), getValueType(), et);
            setOutputDimensions(softmax);
            setLineNumbers(softmax);
            setLops(softmax);
        } else if (et == ExecType.CP || et == ExecType.GPU) {
            Lop binary = null;
            boolean isLeftXGt = (getInput().get(0) instanceof BinaryOp) && ((BinaryOp) getInput().get(0)).getOp() == OpOp2.GREATER;
            Hop potentialZero = isLeftXGt ? ((BinaryOp) getInput().get(0)).getInput().get(1) : null;
            boolean isLeftXGt0 = isLeftXGt && potentialZero != null && potentialZero instanceof LiteralOp && ((LiteralOp) potentialZero).getDoubleValue() == 0;
            if (op == OpOp2.MULT && isLeftXGt0 && !getInput().get(0).isVector() && !getInput().get(1).isVector() && getInput().get(0).dimsKnown() && getInput().get(1).dimsKnown()) {
                binary = new ConvolutionTransform(getInput().get(0).getInput().get(0).constructLops(), getInput().get(1).constructLops(), ConvolutionTransform.OperationTypes.RELU_BACKWARD, getDataType(), getValueType(), et, -1);
            } else
                binary = new Binary(getInput().get(0).constructLops(), getInput().get(1).constructLops(), HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et);
            setOutputDimensions(binary);
            setLineNumbers(binary);
            setLops(binary);
        } else if (et == ExecType.SPARK) {
            Hop left = getInput().get(0);
            Hop right = getInput().get(1);
            MMBinaryMethod mbin = optFindMMBinaryMethodSpark(left, right);
            Lop binary = null;
            if (mbin == MMBinaryMethod.MR_BINARY_UAGG_CHAIN) {
                AggUnaryOp uRight = (AggUnaryOp) right;
                binary = new BinaryUAggChain(left.constructLops(), HopsOpOp2LopsB.get(op), HopsAgg2Lops.get(uRight.getOp()), HopsDirection2Lops.get(uRight.getDirection()), getDataType(), getValueType(), et);
            } else if (mbin == MMBinaryMethod.MR_BINARY_M) {
                boolean partitioned = false;
                boolean isColVector = (right.getDim2() == 1 && left.getDim1() == right.getDim1());
                binary = new BinaryM(left.constructLops(), right.constructLops(), HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et, partitioned, isColVector);
            } else {
                binary = new Binary(left.constructLops(), right.constructLops(), HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et);
            }
            setOutputDimensions(binary);
            setLineNumbers(binary);
            setLops(binary);
        } else // MR
        {
            Hop left = getInput().get(0);
            Hop right = getInput().get(1);
            MMBinaryMethod mbin = optFindMMBinaryMethod(left, right);
            if (mbin == MMBinaryMethod.MR_BINARY_M) {
                boolean needPart = requiresPartitioning(right);
                Lop dcInput = right.constructLops();
                if (needPart) {
                    // right side in distributed cache
                    ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(right.getDim1(), right.getDim2(), OptimizerUtils.getSparsity(right.getDim1(), right.getDim2(), right.getNnz())) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : // operator selection
                    ExecType.MR;
                    dcInput = new DataPartition(dcInput, DataType.MATRIX, ValueType.DOUBLE, etPart, (right.getDim2() == 1) ? PDataPartitionFormat.ROW_BLOCK_WISE_N : PDataPartitionFormat.COLUMN_BLOCK_WISE_N);
                    dcInput.getOutputParameters().setDimensions(right.getDim1(), right.getDim2(), right.getRowsInBlock(), right.getColsInBlock(), right.getNnz());
                    dcInput.setAllPositions(right.getFilename(), right.getBeginLine(), right.getBeginColumn(), right.getEndLine(), right.getEndColumn());
                }
                BinaryM binary = new BinaryM(left.constructLops(), dcInput, HopsOpOp2LopsB.get(op), getDataType(), getValueType(), ExecType.MR, needPart, (right.getDim2() == 1 && left.getDim1() == right.getDim1()));
                setOutputDimensions(binary);
                setLineNumbers(binary);
                setLops(binary);
            } else if (mbin == MMBinaryMethod.MR_BINARY_UAGG_CHAIN) {
                AggUnaryOp uRight = (AggUnaryOp) right;
                BinaryUAggChain bin = new BinaryUAggChain(left.constructLops(), HopsOpOp2LopsB.get(op), HopsAgg2Lops.get(uRight.getOp()), HopsDirection2Lops.get(uRight.getDirection()), getDataType(), getValueType(), et);
                setOutputDimensions(bin);
                setLineNumbers(bin);
                setLops(bin);
            } else if (mbin == MMBinaryMethod.MR_BINARY_OUTER_R) {
                boolean requiresRepLeft = (!right.dimsKnown() || right.getDim2() > right.getColsInBlock());
                boolean requiresRepRight = (!left.dimsKnown() || left.getDim1() > right.getRowsInBlock());
                Lop leftLop = left.constructLops();
                Lop rightLop = right.constructLops();
                if (requiresRepLeft) {
                    // ncol of right determines rep of left
                    Lop offset = createOffsetLop(right, true);
                    leftLop = new RepMat(leftLop, offset, true, left.getDataType(), left.getValueType());
                    setOutputDimensions(leftLop);
                    setLineNumbers(leftLop);
                }
                if (requiresRepRight) {
                    // nrow of right determines rep of right
                    Lop offset = createOffsetLop(left, false);
                    rightLop = new RepMat(rightLop, offset, false, right.getDataType(), right.getValueType());
                    setOutputDimensions(rightLop);
                    setLineNumbers(rightLop);
                }
                Group group1 = new Group(leftLop, Group.OperationTypes.Sort, getDataType(), getValueType());
                setLineNumbers(group1);
                setOutputDimensions(group1);
                Group group2 = new Group(rightLop, Group.OperationTypes.Sort, getDataType(), getValueType());
                setLineNumbers(group2);
                setOutputDimensions(group2);
                Binary binary = new Binary(group1, group2, HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et);
                setOutputDimensions(binary);
                setLineNumbers(binary);
                setLops(binary);
            } else // MMBinaryMethod.MR_BINARY_R
            {
                boolean requiresRep = requiresReplication(left, right);
                Lop rightLop = right.constructLops();
                if (requiresRep) {
                    // ncol of left input (determines num replicates)
                    Lop offset = createOffsetLop(left, (right.getDim2() <= 1));
                    rightLop = new RepMat(rightLop, offset, (right.getDim2() <= 1), right.getDataType(), right.getValueType());
                    setOutputDimensions(rightLop);
                    setLineNumbers(rightLop);
                }
                Group group1 = new Group(getInput().get(0).constructLops(), Group.OperationTypes.Sort, getDataType(), getValueType());
                setLineNumbers(group1);
                setOutputDimensions(group1);
                Group group2 = new Group(rightLop, Group.OperationTypes.Sort, getDataType(), getValueType());
                setLineNumbers(group2);
                setOutputDimensions(group2);
                Binary binary = new Binary(group1, group2, HopsOpOp2LopsB.get(op), getDataType(), getValueType(), et);
                setLineNumbers(binary);
                setOutputDimensions(binary);
                setLops(binary);
            }
        }
    }
}
Also used : Group(org.apache.sysml.lops.Group) BinaryM(org.apache.sysml.lops.BinaryM) BinaryUAggChain(org.apache.sysml.lops.BinaryUAggChain) Lop(org.apache.sysml.lops.Lop) Unary(org.apache.sysml.lops.Unary) CombineUnary(org.apache.sysml.lops.CombineUnary) UnaryCP(org.apache.sysml.lops.UnaryCP) RepMat(org.apache.sysml.lops.RepMat) OperationTypes(org.apache.sysml.lops.CombineBinary.OperationTypes) DataType(org.apache.sysml.parser.Expression.DataType) ExecType(org.apache.sysml.lops.LopProperties.ExecType) Binary(org.apache.sysml.lops.Binary) CombineBinary(org.apache.sysml.lops.CombineBinary) BinaryScalar(org.apache.sysml.lops.BinaryScalar) ConvolutionTransform(org.apache.sysml.lops.ConvolutionTransform) DataPartition(org.apache.sysml.lops.DataPartition)

Example 5 with ConvolutionTransform

use of org.apache.sysml.lops.ConvolutionTransform in project incubator-systemml by apache.

the class ConvolutionOp method constructConvolutionLops.

public Lop constructConvolutionLops(ExecType et, ArrayList<Hop> inputs) {
    if (inputs.size() != getNumExpectedInputs())
        throw new HopsException("Incorrect number of inputs for " + op.name());
    // ---------------------------------------------------------------
    // Deal with fused operators and contruct lhsInputLop/optionalRhsInputLop
    Lop lhsInputLop = null;
    Lop optionalRhsInputLop = null;
    ArrayList<Hop> inputsOfPotentiallyFusedOp = inputs;
    OperationTypes lopOp = HopsConv2Lops.get(op);
    // RELU_MAX_POOLING and RELU_MAX_POOLING_BACKWARD is extremely useful for CP backend
    // by reducing unnecessary sparse-to-dense-to-sparse conversion.
    // For other backends, this operators is not necessary as it reduces an additional relu operator.
    Hop parentReLU = isInputReLU(inputs.get(0));
    if (OptimizerUtils.ALLOW_OPERATOR_FUSION && et == ExecType.CP && op == ConvOp.MAX_POOLING && parentReLU != null) {
        lhsInputLop = parentReLU.constructLops();
        lopOp = OperationTypes.RELU_MAX_POOLING;
    } else if (OptimizerUtils.ALLOW_OPERATOR_FUSION && et == ExecType.CP && op == ConvOp.MAX_POOLING_BACKWARD && parentReLU != null) {
        lhsInputLop = parentReLU.constructLops();
        lopOp = OperationTypes.RELU_MAX_POOLING_BACKWARD;
    } else if (OptimizerUtils.ALLOW_OPERATOR_FUSION && op == ConvOp.BIAS_ADD && isInputConv2d(inputs.get(0))) {
        lopOp = OperationTypes.DIRECT_CONV2D_BIAS_ADD;
        // the first lop is image
        lhsInputLop = inputs.get(0).getInput().get(0).constructLops();
        // the second lop is bias
        optionalRhsInputLop = inputs.get(1).constructLops();
        // Use the inputs from conv2d rather than bias_add
        inputsOfPotentiallyFusedOp = inputs.get(0).getInput();
    } else {
        lhsInputLop = inputs.get(0).constructLops();
    }
    // ---------------------------------------------------------------
    // ---------------------------------------------------------------
    // Compute intermediate memory budget that can be passed to GPU operators
    // for better CuDNN operator selection at runtime
    double intermediateMemEstimate = computeIntermediateMemEstimate(-1, -1, -1);
    if (et == ExecType.GPU && _dim1 >= 0 && _dim2 >= 0) {
        // This enables us to compile more efficient matrix-matrix CuDNN operation instead of
        // row-by-row invocation of multiple vector-matrix CuDNN operations.
        // This is possible as the operations on GPU are single-threaded
        double optimisticIntermediateMemEstimate = GPUContextPool.initialGPUMemBudget() - getOutputMemEstimate() - inputs.get(0).getOutputMemEstimate();
        if (optionalRhsInputLop != null) {
            optimisticIntermediateMemEstimate -= inputs.get(1).getOutputMemEstimate();
        }
        intermediateMemEstimate = Math.max(intermediateMemEstimate, optimisticIntermediateMemEstimate);
    }
    // ---------------------------------------------------------------
    // Contruct the lop
    Lop optionalMaxPoolOutput = (et == ExecType.GPU) ? getMaxPoolOutputLop() : null;
    Lop[] l2inputs = new Lop[inputsOfPotentiallyFusedOp.size() - 1];
    for (int i = 1; i < inputsOfPotentiallyFusedOp.size(); i++) l2inputs[i - 1] = inputsOfPotentiallyFusedOp.get(i).constructLops();
    ConvolutionTransform convolutionLop = new ConvolutionTransform(lhsInputLop, lopOp, getDataType(), getValueType(), et, OptimizerUtils.getConstrainedNumThreads(_maxNumThreads), intermediateMemEstimate);
    setOutputDimensions(convolutionLop);
    setLineNumbers(convolutionLop);
    // ---------------------------------------------------------------
    // Add input/output for parent lops of convolutionLop
    lhsInputLop.addOutput(convolutionLop);
    if (optionalRhsInputLop != null) {
        convolutionLop.addInput(optionalRhsInputLop);
        optionalRhsInputLop.addOutput(convolutionLop);
    }
    for (int i = 0; i < l2inputs.length; i++) {
        convolutionLop.addInput(l2inputs[i]);
        l2inputs[i].addOutput(convolutionLop);
    }
    // Only valid for MAX_POOLING_BACKWARD on GPU
    if (optionalMaxPoolOutput != null) {
        convolutionLop.addInput(optionalMaxPoolOutput);
        optionalMaxPoolOutput.addOutput(convolutionLop);
    }
    convolutionLop.updateLopProperties();
    return convolutionLop;
}
Also used : OperationTypes(org.apache.sysml.lops.ConvolutionTransform.OperationTypes) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) Lop(org.apache.sysml.lops.Lop) ConvolutionTransform(org.apache.sysml.lops.ConvolutionTransform)

Aggregations

ConvolutionTransform (org.apache.sysml.lops.ConvolutionTransform)6 Lop (org.apache.sysml.lops.Lop)6 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)4 Group (org.apache.sysml.lops.Group)4 ExecType (org.apache.sysml.lops.LopProperties.ExecType)4 UnaryCP (org.apache.sysml.lops.UnaryCP)4 SparkAggType (org.apache.sysml.hops.AggBinaryOp.SparkAggType)2 Aggregate (org.apache.sysml.lops.Aggregate)2 OperationTypes (org.apache.sysml.lops.Aggregate.OperationTypes)2 Binary (org.apache.sysml.lops.Binary)2 BinaryM (org.apache.sysml.lops.BinaryM)2 BinaryScalar (org.apache.sysml.lops.BinaryScalar)2 BinaryUAggChain (org.apache.sysml.lops.BinaryUAggChain)2 CombineBinary (org.apache.sysml.lops.CombineBinary)2 OperationTypes (org.apache.sysml.lops.CombineBinary.OperationTypes)2 CombineUnary (org.apache.sysml.lops.CombineUnary)2 OperationTypes (org.apache.sysml.lops.ConvolutionTransform.OperationTypes)2 DataPartition (org.apache.sysml.lops.DataPartition)2 PartialAggregate (org.apache.sysml.lops.PartialAggregate)2 DirectionTypes (org.apache.sysml.lops.PartialAggregate.DirectionTypes)2