Search in sources :

Example 76 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class MatrixAppendRSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    // reduce-only append (output must have at most one column block)
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    checkBinaryAppendInputCharacteristics(sec, _cbind, true, false);
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    // execute reduce-append operations (partitioning preserving)
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.join(in2).mapValues(new ReduceSideAppendFunction(_cbind));
    // put output RDD handle into symbol table
    updateBinaryAppendOutputMatrixCharacteristics(sec, _cbind);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
    sec.addLineageRDD(output.getName(), input2.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 77 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class PmmSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    String rddVar = (_type == CacheType.LEFT) ? input2.getName() : input1.getName();
    String bcastVar = (_type == CacheType.LEFT) ? input1.getName() : input2.getName();
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(output.getName());
    long rlen = sec.getScalarInput(_nrow.getName(), _nrow.getValueType(), _nrow.isLiteral()).getLongValue();
    // get inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
    PartitionedBroadcast<MatrixBlock> in2 = sec.getBroadcastForVariable(bcastVar);
    // execute pmm instruction
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new RDDPMMFunction(_type, in2, rlen, mc.getRowsPerBlock()));
    out = RDDAggregateUtils.sumByKeyStable(out, false);
    // put output RDD handle into symbol table
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), rddVar);
    sec.addLineageBroadcast(output.getName(), bcastVar);
    // update output statistics if not inferred
    updateBinaryMMOutputMatrixCharacteristics(sec, false);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 78 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class QuantilePickSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get input rdds
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    // (in contrast to cp instructions, w/o weights does not materializes weights of 1)
    switch(_type) {
        case VALUEPICK:
            {
                ScalarObject quantile = ec.getScalarInput(input2);
                double[] wt = getWeightedQuantileSummary(in, mc, quantile.getDoubleValue());
                ec.setScalarOutput(output.getName(), new DoubleObject(wt[3]));
                break;
            }
        case MEDIAN:
            {
                double[] wt = getWeightedQuantileSummary(in, mc, 0.5);
                ec.setScalarOutput(output.getName(), new DoubleObject(wt[3]));
                break;
            }
        case IQM:
            {
                double[] wt = getWeightedQuantileSummary(in, mc, 0.25, 0.75);
                long key25 = (long) Math.ceil(wt[1]);
                long key75 = (long) Math.ceil(wt[2]);
                JavaPairRDD<MatrixIndexes, MatrixBlock> out = in.filter(new FilterFunction(key25 + 1, key75, mc.getRowsPerBlock())).mapToPair(new ExtractAndSumFunction(key25 + 1, key75, mc.getRowsPerBlock()));
                double sum = RDDAggregateUtils.sumStable(out).getValue(0, 0);
                double val = MatrixBlock.computeIQMCorrection(sum, wt[0], wt[3], wt[5], wt[4], wt[6]);
                ec.setScalarOutput(output.getName(), new DoubleObject(val));
                break;
            }
        default:
            throw new DMLRuntimeException("Unsupported qpick operation type: " + _type);
    }
}
Also used : ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 79 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class SpoofSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // decide upon broadcast side inputs
    boolean[] bcVect = determineBroadcastInputs(sec, _in);
    boolean[] bcVect2 = getMatrixBroadcastVector(sec, _in, bcVect);
    int main = getMainInputIndex(_in, bcVect);
    // create joined input rdd w/ replication if needed
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(_in[main].getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock[]> in = createJoinedInputRDD(sec, _in, bcVect, (_class.getSuperclass() == SpoofOuterProduct.class));
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    // create lists of input broadcasts and scalars
    ArrayList<PartitionedBroadcast<MatrixBlock>> bcMatrices = new ArrayList<>();
    ArrayList<ScalarObject> scalars = new ArrayList<>();
    for (int i = 0; i < _in.length; i++) {
        if (_in[i].getDataType() == DataType.MATRIX && bcVect[i]) {
            bcMatrices.add(sec.getBroadcastForVariable(_in[i].getName()));
        } else if (_in[i].getDataType() == DataType.SCALAR) {
            // note: even if literal, it might be compiled as scalar placeholder
            scalars.add(sec.getScalarInput(_in[i].getName(), _in[i].getValueType(), _in[i].isLiteral()));
        }
    }
    // execute generated operator
    if (// CELL
    _class.getSuperclass() == SpoofCellwise.class) {
        SpoofCellwise op = (SpoofCellwise) CodegenUtils.createInstance(_class);
        AggregateOperator aggop = getAggregateOperator(op.getAggOp());
        if (_out.getDataType() == DataType.MATRIX) {
            // execute codegen block operation
            out = in.mapPartitionsToPair(new CellwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
            if ((op.getCellType() == CellType.ROW_AGG && mcIn.getCols() > mcIn.getColsPerBlock()) || (op.getCellType() == CellType.COL_AGG && mcIn.getRows() > mcIn.getRowsPerBlock())) {
                long numBlocks = (op.getCellType() == CellType.ROW_AGG) ? mcIn.getNumRowBlocks() : mcIn.getNumColBlocks();
                out = RDDAggregateUtils.aggByKeyStable(out, aggop, (int) Math.min(out.getNumPartitions(), numBlocks), false);
            }
            sec.setRDDHandleForVariable(_out.getName(), out);
            // maintain lineage info and output characteristics
            maintainLineageInfo(sec, _in, bcVect, _out);
            updateOutputMatrixCharacteristics(sec, op);
        } else {
            // SCALAR
            out = in.mapPartitionsToPair(new CellwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
            MatrixBlock tmpMB = RDDAggregateUtils.aggStable(out, aggop);
            sec.setVariable(_out.getName(), new DoubleObject(tmpMB.getValue(0, 0)));
        }
    } else if (// MAGG
    _class.getSuperclass() == SpoofMultiAggregate.class) {
        SpoofMultiAggregate op = (SpoofMultiAggregate) CodegenUtils.createInstance(_class);
        AggOp[] aggOps = op.getAggOps();
        MatrixBlock tmpMB = in.mapToPair(new MultiAggregateFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars)).values().fold(new MatrixBlock(), new MultiAggAggregateFunction(aggOps));
        sec.setMatrixOutput(_out.getName(), tmpMB, getExtendedOpcode());
    } else if (// OUTER
    _class.getSuperclass() == SpoofOuterProduct.class) {
        if (_out.getDataType() == DataType.MATRIX) {
            SpoofOperator op = (SpoofOperator) CodegenUtils.createInstance(_class);
            OutProdType type = ((SpoofOuterProduct) op).getOuterProdType();
            // update matrix characteristics
            updateOutputMatrixCharacteristics(sec, op);
            MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(_out.getName());
            out = in.mapPartitionsToPair(new OuterProductFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
            if (type == OutProdType.LEFT_OUTER_PRODUCT || type == OutProdType.RIGHT_OUTER_PRODUCT) {
                long numBlocks = mcOut.getNumRowBlocks() * mcOut.getNumColBlocks();
                out = RDDAggregateUtils.sumByKeyStable(out, (int) Math.min(out.getNumPartitions(), numBlocks), false);
            }
            sec.setRDDHandleForVariable(_out.getName(), out);
            // maintain lineage info and output characteristics
            maintainLineageInfo(sec, _in, bcVect, _out);
        } else {
            out = in.mapPartitionsToPair(new OuterProductFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
            MatrixBlock tmp = RDDAggregateUtils.sumStable(out);
            sec.setVariable(_out.getName(), new DoubleObject(tmp.getValue(0, 0)));
        }
    } else if (_class.getSuperclass() == SpoofRowwise.class) {
        // ROW
        if (mcIn.getCols() > mcIn.getColsPerBlock()) {
            throw new DMLRuntimeException("Invalid spark rowwise operator w/ ncol=" + mcIn.getCols() + ", ncolpb=" + mcIn.getColsPerBlock() + ".");
        }
        SpoofRowwise op = (SpoofRowwise) CodegenUtils.createInstance(_class);
        long clen2 = op.getRowType().isConstDim2(op.getConstDim2()) ? op.getConstDim2() : op.getRowType().isRowTypeB1() ? sec.getMatrixCharacteristics(_in[1].getName()).getCols() : -1;
        RowwiseFunction fmmc = new RowwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars, (int) mcIn.getCols(), (int) clen2);
        out = in.mapPartitionsToPair(fmmc, op.getRowType() == RowType.ROW_AGG || op.getRowType() == RowType.NO_AGG);
        if (op.getRowType().isColumnAgg() || op.getRowType() == RowType.FULL_AGG) {
            MatrixBlock tmpMB = RDDAggregateUtils.sumStable(out);
            if (op.getRowType().isColumnAgg())
                sec.setMatrixOutput(_out.getName(), tmpMB, getExtendedOpcode());
            else
                sec.setScalarOutput(_out.getName(), new DoubleObject(tmpMB.quickGetValue(0, 0)));
        } else // row-agg or no-agg
        {
            if (op.getRowType() == RowType.ROW_AGG && mcIn.getCols() > mcIn.getColsPerBlock()) {
                out = RDDAggregateUtils.sumByKeyStable(out, (int) Math.min(out.getNumPartitions(), mcIn.getNumRowBlocks()), false);
            }
            sec.setRDDHandleForVariable(_out.getName(), out);
            // maintain lineage info and output characteristics
            maintainLineageInfo(sec, _in, bcVect, _out);
            updateOutputMatrixCharacteristics(sec, op);
        }
    } else {
        throw new DMLRuntimeException("Operator " + _class.getSuperclass() + " is not supported on Spark");
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) SpoofRowwise(org.apache.sysml.runtime.codegen.SpoofRowwise) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) ArrayList(java.util.ArrayList) SpoofOperator(org.apache.sysml.runtime.codegen.SpoofOperator) ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) PartitionedBroadcast(org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcast) AggregateOperator(org.apache.sysml.runtime.matrix.operators.AggregateOperator) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) SpoofMultiAggregate(org.apache.sysml.runtime.codegen.SpoofMultiAggregate) OutProdType(org.apache.sysml.runtime.codegen.SpoofOuterProduct.OutProdType) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SpoofOuterProduct(org.apache.sysml.runtime.codegen.SpoofOuterProduct) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) SpoofCellwise(org.apache.sysml.runtime.codegen.SpoofCellwise)

Example 80 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class UaggOuterChainSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    boolean rightCached = (_uaggOp.indexFn instanceof ReduceCol || _uaggOp.indexFn instanceof ReduceAll || !LibMatrixOuterAgg.isSupportedUaggOp(_uaggOp, _bOp));
    String rddVar = (rightCached) ? input1.getName() : input2.getName();
    String bcastVar = (rightCached) ? input2.getName() : input1.getName();
    // get rdd input
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(rddVar);
    boolean noKeyChange = preservesPartitioning(mcIn, _uaggOp.indexFn);
    // execute UAggOuterChain instruction
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    if (LibMatrixOuterAgg.isSupportedUaggOp(_uaggOp, _bOp)) {
        // create sorted broadcast matrix
        MatrixBlock mb = sec.getMatrixInput(bcastVar, getExtendedOpcode());
        sec.releaseMatrixInput(bcastVar, getExtendedOpcode());
        // prevent lineage tracking
        bcastVar = null;
        double[] vmb = DataConverter.convertToDoubleVector(mb);
        Broadcast<int[]> bvi = null;
        if (_uaggOp.aggOp.increOp.fn instanceof Builtin) {
            int[] vix = LibMatrixOuterAgg.prepareRowIndices(mb.getNumColumns(), vmb, _bOp, _uaggOp);
            bvi = sec.getSparkContext().broadcast(vix);
        } else
            Arrays.sort(vmb);
        Broadcast<double[]> bv = sec.getSparkContext().broadcast(vmb);
        // partitioning-preserving map-to-pair (under constraints)
        out = in1.mapPartitionsToPair(new RDDMapUAggOuterChainFunction(bv, bvi, _bOp, _uaggOp), noKeyChange);
    } else {
        PartitionedBroadcast<MatrixBlock> bv = sec.getBroadcastForVariable(bcastVar);
        // partitioning-preserving map-to-pair (under constraints)
        out = in1.mapPartitionsToPair(new RDDMapGenUAggOuterChainFunction(bv, _uaggOp, _aggOp, _bOp, mcIn), noKeyChange);
    }
    // final aggregation if required
    if (// RC AGG (output is scalar)
    _uaggOp.indexFn instanceof ReduceAll) {
        MatrixBlock tmp = RDDAggregateUtils.aggStable(out, _aggOp);
        // drop correction after aggregation
        tmp.dropLastRowsOrColumns(_aggOp.correctionLocation);
        // put output block into symbol table (no lineage because single block)
        sec.setMatrixOutput(output.getName(), tmp, getExtendedOpcode());
    } else // R/C AGG (output is rdd)
    {
        // put output RDD handle into symbol table
        updateUnaryAggOutputMatrixCharacteristics(sec);
        if (_uaggOp.aggOp.correctionExists)
            out = out.mapValues(new AggregateDropCorrectionFunction(_uaggOp.aggOp));
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), rddVar);
        if (bcastVar != null)
            sec.addLineageBroadcast(output.getName(), bcastVar);
    }
}
Also used : ReduceCol(org.apache.sysml.runtime.functionobjects.ReduceCol) ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) AggregateDropCorrectionFunction(org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) Builtin(org.apache.sysml.runtime.functionobjects.Builtin)

Aggregations

SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)112 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)92 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)92 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)71 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)39 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)14 DoubleObject (org.apache.sysml.runtime.instructions.cp.DoubleObject)12 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)9 PartitionedBroadcast (org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcast)8 FilterNonEmptyBlocksFunction (org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction)7 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)7 ArrayList (java.util.ArrayList)6 CPOperand (org.apache.sysml.runtime.instructions.cp.CPOperand)6 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)6 AggregateDropCorrectionFunction (org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction)6 AggregateOperator (org.apache.sysml.runtime.matrix.operators.AggregateOperator)6 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)5 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)5 ValueType (org.apache.sysml.parser.Expression.ValueType)4