Search in sources :

Example 16 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class QuaternaryOp method constructSparkLopsWeightedCeMM.

private void constructSparkLopsWeightedCeMM(WCeMMType wtype) {
    // NOTE: the common case for wcemm are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted Cross Entropy only if this constraint holds.
    // Notes: Any broadcast needs to fit twice in local memory because we partition the input in cp,
    // and needs to fit once in executor broadcast memory. The 2GB broadcast constraint is no longer
    // required because the max_int byte buffer constraint has been fixed in Spark 1.4
    double memBudgetExec = SparkExecutionContext.getBroadcastMemoryBudget();
    double memBudgetLocal = OptimizerUtils.getLocalMemBudget();
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    Hop eps = getInput().get(3);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWcemm = (m1Size + m2Size < memBudgetExec && 2 * m1Size < memBudgetLocal && 2 * m2Size < memBudgetLocal);
    if (// broadcast
    !FORCE_REPLICATION && isMapWcemm) {
        // map-side wcemm always with broadcast
        Lop wcemm = new WeightedCrossEntropy(X.constructLops(), U.constructLops(), V.constructLops(), eps.constructLops(), DataType.SCALAR, ValueType.DOUBLE, wtype, ExecType.SPARK);
        setOutputDimensions(wcemm);
        setLineNumbers(wcemm);
        setLops(wcemm);
    } else // general case
    {
        // MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < memBudgetExec && 2 * m1Size < memBudgetLocal);
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < memBudgetExec) || (cacheU && m1Size + m2Size < memBudgetExec)) && 2 * m2Size < memBudgetLocal;
        // reduce-side wcemm w/ or without broadcast
        Lop wcemm = new WeightedCrossEntropyR(X.constructLops(), U.constructLops(), V.constructLops(), eps.constructLops(), DataType.SCALAR, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.SPARK);
        setOutputDimensions(wcemm);
        setLineNumbers(wcemm);
        setLops(wcemm);
    }
}
Also used : WeightedCrossEntropyR(org.apache.sysml.lops.WeightedCrossEntropyR) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedCrossEntropy(org.apache.sysml.lops.WeightedCrossEntropy) Lop(org.apache.sysml.lops.Lop)

Example 17 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class QuaternaryOp method constructSparkLopsWeightedUMM.

private void constructSparkLopsWeightedUMM(WUMMType wtype) {
    // NOTE: the common case for wumm are factors U/V with a rank of 10s to 100s; the current runtime only
    // supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
    // by applying the hop rewrite for Weighted UnaryMM only if this constraint holds.
    Unary.OperationTypes uop = _uop != null ? HopsOpOp1LopsU.get(_uop) : _sop == OpOp2.POW ? Unary.OperationTypes.POW2 : Unary.OperationTypes.MULTIPLY2;
    // Notes: Any broadcast needs to fit twice in local memory because we partition the input in cp,
    // and needs to fit once in executor broadcast memory. The 2GB broadcast constraint is no longer
    // required because the max_int byte buffer constraint has been fixed in Spark 1.4
    double memBudgetExec = SparkExecutionContext.getBroadcastMemoryBudget();
    double memBudgetLocal = OptimizerUtils.getLocalMemBudget();
    Hop X = getInput().get(0);
    Hop U = getInput().get(1);
    Hop V = getInput().get(2);
    // MR operator selection, part1
    // size U
    double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
    // size V
    double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
    boolean isMapWsloss = (m1Size + m2Size < memBudgetExec && 2 * m1Size < memBudgetLocal && 2 * m2Size < memBudgetLocal);
    if (// broadcast
    !FORCE_REPLICATION && isMapWsloss) {
        // map-side wumm always with broadcast
        Lop wumm = new WeightedUnaryMM(X.constructLops(), U.constructLops(), V.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, uop, ExecType.SPARK);
        setOutputDimensions(wumm);
        setLineNumbers(wumm);
        setLops(wumm);
    } else // general case
    {
        // MR operator selection part 2
        boolean cacheU = !FORCE_REPLICATION && (m1Size < memBudgetExec && 2 * m1Size < memBudgetLocal);
        boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < memBudgetExec) || (cacheU && m1Size + m2Size < memBudgetExec)) && 2 * m2Size < memBudgetLocal;
        // reduce-side wumm w/ or without broadcast
        Lop wumm = new WeightedUnaryMMR(X.constructLops(), U.constructLops(), V.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, uop, cacheU, cacheV, ExecType.SPARK);
        setOutputDimensions(wumm);
        setLineNumbers(wumm);
        setLops(wumm);
    }
}
Also used : WeightedUnaryMMR(org.apache.sysml.lops.WeightedUnaryMMR) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) WeightedUnaryMM(org.apache.sysml.lops.WeightedUnaryMM) Lop(org.apache.sysml.lops.Lop) Unary(org.apache.sysml.lops.Unary)

Example 18 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class TernaryOp method constructLopsTernaryDefault.

private void constructLopsTernaryDefault() {
    ExecType et = optFindExecType();
    if (getInput().stream().allMatch(h -> h.getDataType().isScalar()))
        // always CP for pure scalar operations
        et = ExecType.CP;
    Ternary plusmult = null;
    if (et == ExecType.CP || et == ExecType.SPARK || et == ExecType.GPU) {
        plusmult = new Ternary(HopsOpOp3Lops.get(_op), getInput().get(0).constructLops(), getInput().get(1).constructLops(), getInput().get(2).constructLops(), getDataType(), getValueType(), et);
    } else {
        // MR
        Hop first = getInput().get(0);
        Hop second = getInput().get(1);
        Hop third = getInput().get(2);
        Lop firstLop = first.constructLops();
        if (first.getDataType().isMatrix()) {
            firstLop = new Group(firstLop, Group.OperationTypes.Sort, getDataType(), getValueType());
            setLineNumbers(firstLop);
            setOutputDimensions(firstLop);
        }
        Lop secondLop = second.constructLops();
        if (second.getDataType().isMatrix()) {
            secondLop = new Group(secondLop, Group.OperationTypes.Sort, getDataType(), getValueType());
            setLineNumbers(secondLop);
            setOutputDimensions(secondLop);
        }
        Lop thirdLop = third.constructLops();
        if (third.getDataType().isMatrix()) {
            thirdLop = new Group(thirdLop, Group.OperationTypes.Sort, getDataType(), getValueType());
            setLineNumbers(thirdLop);
            setOutputDimensions(thirdLop);
        }
        plusmult = new Ternary(HopsOpOp3Lops.get(_op), firstLop, secondLop, thirdLop, getDataType(), getValueType(), et);
    }
    setOutputDimensions(plusmult);
    setLineNumbers(plusmult);
    setLops(plusmult);
}
Also used : Group(org.apache.sysml.lops.Group) Ternary(org.apache.sysml.lops.Ternary) CombineTernary(org.apache.sysml.lops.CombineTernary) ExecType(org.apache.sysml.lops.LopProperties.ExecType) Lop(org.apache.sysml.lops.Lop)

Example 19 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class UnaryOp method constructLopsSparkCumulativeUnary.

private Lop constructLopsSparkCumulativeUnary() {
    Hop input = getInput().get(0);
    long rlen = input.getDim1();
    long clen = input.getDim2();
    long brlen = input.getRowsInBlock();
    long bclen = input.getColsInBlock();
    boolean force = !dimsKnown() || _etypeForced == ExecType.SPARK;
    OperationTypes aggtype = getCumulativeAggType();
    Lop X = input.constructLops();
    Lop TEMP = X;
    ArrayList<Lop> DATA = new ArrayList<>();
    int level = 0;
    // recursive preaggregation until aggregates fit into CP memory budget
    while (((2 * OptimizerUtils.estimateSize(TEMP.getOutputParameters().getNumRows(), clen) + OptimizerUtils.estimateSize(1, clen)) > OptimizerUtils.getLocalMemBudget() && TEMP.getOutputParameters().getNumRows() > 1) || force) {
        DATA.add(TEMP);
        // preaggregation per block (for spark, the CumulativePartialAggregate subsumes both
        // the preaggregation and subsequent block aggregation)
        long rlenAgg = (long) Math.ceil((double) TEMP.getOutputParameters().getNumRows() / brlen);
        Lop preagg = new CumulativePartialAggregate(TEMP, DataType.MATRIX, ValueType.DOUBLE, aggtype, ExecType.SPARK);
        preagg.getOutputParameters().setDimensions(rlenAgg, clen, brlen, bclen, -1);
        setLineNumbers(preagg);
        TEMP = preagg;
        level++;
        // in case of unknowns, generate one level
        force = false;
    }
    // in-memory cum sum (of partial aggregates)
    if (TEMP.getOutputParameters().getNumRows() != 1) {
        int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
        Unary unary1 = new Unary(TEMP, HopsOpOp1LopsU.get(_op), DataType.MATRIX, ValueType.DOUBLE, ExecType.CP, k);
        unary1.getOutputParameters().setDimensions(TEMP.getOutputParameters().getNumRows(), clen, brlen, bclen, -1);
        setLineNumbers(unary1);
        TEMP = unary1;
    }
    // split, group and mr cumsum
    while (level-- > 0) {
        // (for spark, the CumulativeOffsetBinary subsumes both the split aggregate and
        // the subsequent offset binary apply of split aggregates against the original data)
        double initValue = getCumulativeInitValue();
        CumulativeOffsetBinary binary = new CumulativeOffsetBinary(DATA.get(level), TEMP, DataType.MATRIX, ValueType.DOUBLE, initValue, aggtype, ExecType.SPARK);
        binary.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, -1);
        setLineNumbers(binary);
        TEMP = binary;
    }
    return TEMP;
}
Also used : CumulativePartialAggregate(org.apache.sysml.lops.CumulativePartialAggregate) OperationTypes(org.apache.sysml.lops.Aggregate.OperationTypes) CumulativeOffsetBinary(org.apache.sysml.lops.CumulativeOffsetBinary) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) ArrayList(java.util.ArrayList) Lop(org.apache.sysml.lops.Lop) CombineUnary(org.apache.sysml.lops.CombineUnary) Unary(org.apache.sysml.lops.Unary)

Example 20 with Lop

use of org.apache.sysml.lops.Lop in project incubator-systemml by apache.

the class UnaryOp method constructLops.

@Override
public Lop constructLops() {
    // reuse existing lop
    if (getLops() != null)
        return getLops();
    try {
        Hop input = getInput().get(0);
        if (// value type casts or matrix to scalar
        getDataType() == DataType.SCALAR || (_op == OpOp1.CAST_AS_MATRIX && getInput().get(0).getDataType() == DataType.SCALAR) || (_op == OpOp1.CAST_AS_FRAME && getInput().get(0).getDataType() == DataType.SCALAR)) {
            if (// special handling IQM
            _op == Hop.OpOp1.IQM) {
                Lop iqmLop = constructLopsIQM();
                setLops(iqmLop);
            } else if (_op == Hop.OpOp1.MEDIAN) {
                Lop medianLop = constructLopsMedian();
                setLops(medianLop);
            } else // general case SCALAR/CAST (always in CP)
            {
                UnaryCP.OperationTypes optype = HopsOpOp1LopsUS.get(_op);
                if (optype == null)
                    throw new HopsException("Unknown UnaryCP lop type for UnaryOp operation type '" + _op + "'");
                UnaryCP unary1 = new UnaryCP(input.constructLops(), optype, getDataType(), getValueType());
                setOutputDimensions(unary1);
                setLineNumbers(unary1);
                setLops(unary1);
            }
        } else // general case MATRIX
        {
            ExecType et = optFindExecType();
            // special handling cumsum/cumprod/cummin/cumsum
            if (isCumulativeUnaryOperation() && !(et == ExecType.CP || et == ExecType.GPU)) {
                // TODO additional physical operation if offsets fit in memory
                Lop cumsumLop = null;
                if (et == ExecType.MR)
                    cumsumLop = constructLopsMRCumulativeUnary();
                else
                    cumsumLop = constructLopsSparkCumulativeUnary();
                setLops(cumsumLop);
            } else // default unary
            {
                int k = isCumulativeUnaryOperation() ? OptimizerUtils.getConstrainedNumThreads(_maxNumThreads) : 1;
                Unary unary1 = new Unary(input.constructLops(), HopsOpOp1LopsU.get(_op), getDataType(), getValueType(), et, k);
                setOutputDimensions(unary1);
                setLineNumbers(unary1);
                setLops(unary1);
            }
        }
    } catch (Exception e) {
        throw new HopsException(this.printErrorLocation() + "error constructing Lops for UnaryOp Hop -- \n ", e);
    }
    // add reblock/checkpoint lops if necessary
    constructAndSetLopsDataFlowProperties();
    return getLops();
}
Also used : OperationTypes(org.apache.sysml.lops.Aggregate.OperationTypes) MultiThreadedHop(org.apache.sysml.hops.Hop.MultiThreadedHop) ExecType(org.apache.sysml.lops.LopProperties.ExecType) Lop(org.apache.sysml.lops.Lop) CombineUnary(org.apache.sysml.lops.CombineUnary) Unary(org.apache.sysml.lops.Unary) UnaryCP(org.apache.sysml.lops.UnaryCP)

Aggregations

Lop (org.apache.sysml.lops.Lop)171 MultiThreadedHop (org.apache.sysml.hops.Hop.MultiThreadedHop)66 ExecType (org.apache.sysml.lops.LopProperties.ExecType)52 Group (org.apache.sysml.lops.Group)45 ArrayList (java.util.ArrayList)35 Aggregate (org.apache.sysml.lops.Aggregate)32 DataPartition (org.apache.sysml.lops.DataPartition)30 LopsException (org.apache.sysml.lops.LopsException)30 Data (org.apache.sysml.lops.Data)24 Instruction (org.apache.sysml.runtime.instructions.Instruction)23 MRJobInstruction (org.apache.sysml.runtime.instructions.MRJobInstruction)18 Unary (org.apache.sysml.lops.Unary)16 Transform (org.apache.sysml.lops.Transform)15 HashMap (java.util.HashMap)14 UnaryCP (org.apache.sysml.lops.UnaryCP)14 Dag (org.apache.sysml.lops.compile.Dag)13 Hop (org.apache.sysml.hops.Hop)11 RepMat (org.apache.sysml.lops.RepMat)11 Binary (org.apache.sysml.lops.Binary)9 CPInstruction (org.apache.sysml.runtime.instructions.cp.CPInstruction)9