Search in sources :

Example 71 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class CovarianceSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    COVOperator cop = ((COVOperator) _optr);
    // get input
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    // process central moment instruction
    CM_COV_Object cmobj = null;
    if (input3 == null) {
        // w/o weights
        cmobj = in1.join(in2).values().map(new RDDCOVFunction(cop)).fold(new CM_COV_Object(), new RDDCOVReduceFunction(cop));
    } else {
        // with weights
        JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
        cmobj = in1.join(in2).join(in3).values().map(new RDDCOVWeightsFunction(cop)).fold(new CM_COV_Object(), new RDDCOVReduceFunction(cop));
    }
    // create scalar output (no lineage information required)
    double val = cmobj.getRequiredResult(_optr);
    ec.setScalarOutput(output.getName(), new DoubleObject(val));
}
Also used : CM_COV_Object(org.apache.sysml.runtime.instructions.cp.CM_COV_Object) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) COVOperator(org.apache.sysml.runtime.matrix.operators.COVOperator) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 72 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class CpmmSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get rdd inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    if (_aggtype == SparkAggType.SINGLE_BLOCK) {
        // prune empty blocks of ultra-sparse matrices
        in1 = in1.filter(new FilterNonEmptyBlocksFunction());
        in2 = in2.filter(new FilterNonEmptyBlocksFunction());
    }
    // compute preferred join degree of parallelism
    int numPreferred = getPreferredParJoin(mc1, mc2, in1.getNumPartitions(), in2.getNumPartitions());
    int numPartJoin = Math.min(getMaxParJoin(mc1, mc2), numPreferred);
    // process core cpmm matrix multiply
    JavaPairRDD<Long, IndexedMatrixValue> tmp1 = in1.mapToPair(new CpmmIndexFunction(true));
    JavaPairRDD<Long, IndexedMatrixValue> tmp2 = in2.mapToPair(new CpmmIndexFunction(false));
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = tmp1.join(tmp2, // join over common dimension
    numPartJoin).mapToPair(// compute block multiplications
    new CpmmMultiplyFunction());
    // process cpmm aggregation and handle outputs
    if (_aggtype == SparkAggType.SINGLE_BLOCK) {
        // prune empty blocks and aggregate all results
        out = out.filter(new FilterNonEmptyBlocksFunction());
        MatrixBlock out2 = RDDAggregateUtils.sumStable(out);
        // put output block into symbol table (no lineage because single block)
        // this also includes implicit maintenance of matrix characteristics
        sec.setMatrixOutput(output.getName(), out2, getExtendedOpcode());
    } else {
        // DEFAULT: MULTI_BLOCK
        out = RDDAggregateUtils.sumByKeyStable(out, false);
        // put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
        sec.addLineageRDD(output.getName(), input2.getName());
        // update output statistics if not inferred
        updateBinaryMMOutputMatrixCharacteristics(sec, true);
    }
}
Also used : FilterNonEmptyBlocksFunction(org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 73 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class CtableSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get input rdd handle
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = null;
    JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = null;
    double scalar_input2 = -1, scalar_input3 = -1;
    Ctable.OperationTypes ctableOp = Ctable.findCtableOperationByInputDataTypes(input1.getDataType(), input2.getDataType(), input3.getDataType());
    ctableOp = _isExpand ? Ctable.OperationTypes.CTABLE_EXPAND_SCALAR_WEIGHT : ctableOp;
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    // First get the block sizes and then set them as -1 to allow for binary cell reblock
    int brlen = mc1.getRowsPerBlock();
    int bclen = mc1.getColsPerBlock();
    JavaPairRDD<MatrixIndexes, ArrayList<MatrixBlock>> inputMBs = null;
    JavaPairRDD<MatrixIndexes, CTableMap> ctables = null;
    JavaPairRDD<MatrixIndexes, Double> bincellsNoFilter = null;
    boolean setLineage2 = false;
    boolean setLineage3 = false;
    switch(ctableOp) {
        case // (VECTOR)
        CTABLE_TRANSFORM:
            // F=ctable(A,B,W)
            in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
            in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
            setLineage2 = true;
            setLineage3 = true;
            inputMBs = in1.cogroup(in2).cogroup(in3).mapToPair(new MapThreeMBIterableIntoAL());
            ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
            break;
        case // (VECTOR)
        CTABLE_EXPAND_SCALAR_WEIGHT:
            // F = ctable(seq,A) or F = ctable(seq,B,1)
            scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
            if (scalar_input3 == 1) {
                in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
                setLineage2 = true;
                bincellsNoFilter = in2.flatMapToPair(new ExpandScalarCtableOperation(brlen));
                break;
            }
        case // (VECTOR/MATRIX)
        CTABLE_TRANSFORM_SCALAR_WEIGHT:
            // F = ctable(A,B) or F = ctable(A,B,1)
            in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
            setLineage2 = true;
            scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
            inputMBs = in1.cogroup(in2).mapToPair(new MapTwoMBIterableIntoAL());
            ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
            break;
        case // (VECTOR)
        CTABLE_TRANSFORM_HISTOGRAM:
            // F=ctable(A,1) or F = ctable(A,1,1)
            scalar_input2 = sec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
            scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
            inputMBs = in1.mapToPair(new MapMBIntoAL());
            ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
            break;
        case // (VECTOR)
        CTABLE_TRANSFORM_WEIGHTED_HISTOGRAM:
            // F=ctable(A,1,W)
            in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
            setLineage3 = true;
            scalar_input2 = sec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
            inputMBs = in1.cogroup(in3).mapToPair(new MapTwoMBIterableIntoAL());
            ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
            break;
        default:
            throw new DMLRuntimeException("Encountered an invalid ctable operation (" + ctableOp + ") while executing instruction: " + this.toString());
    }
    // Now perform aggregation on ctables to get binaryCells
    if (bincellsNoFilter == null && ctables != null) {
        bincellsNoFilter = ctables.values().flatMapToPair(new ExtractBinaryCellsFromCTable());
        bincellsNoFilter = RDDAggregateUtils.sumCellsByKeyStable(bincellsNoFilter);
    } else if (!(bincellsNoFilter != null && ctables == null)) {
        throw new DMLRuntimeException("Incorrect ctable operation");
    }
    // handle known/unknown dimensions
    long outputDim1 = (_dim1Literal ? (long) Double.parseDouble(_outDim1) : (sec.getScalarInput(_outDim1, ValueType.DOUBLE, false)).getLongValue());
    long outputDim2 = (_dim2Literal ? (long) Double.parseDouble(_outDim2) : (sec.getScalarInput(_outDim2, ValueType.DOUBLE, false)).getLongValue());
    MatrixCharacteristics mcBinaryCells = null;
    boolean findDimensions = (outputDim1 == -1 && outputDim2 == -1);
    if (!findDimensions) {
        if ((outputDim1 == -1 && outputDim2 != -1) || (outputDim1 != -1 && outputDim2 == -1))
            throw new DMLRuntimeException("Incorrect output dimensions passed to TernarySPInstruction:" + outputDim1 + " " + outputDim2);
        else
            mcBinaryCells = new MatrixCharacteristics(outputDim1, outputDim2, brlen, bclen);
        // filtering according to given dimensions
        bincellsNoFilter = bincellsNoFilter.filter(new FilterCells(mcBinaryCells.getRows(), mcBinaryCells.getCols()));
    }
    // convert double values to matrix cell
    JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = bincellsNoFilter.mapToPair(new ConvertToBinaryCell());
    // find dimensions if necessary (w/ cache for reblock)
    if (findDimensions) {
        binaryCells = SparkUtils.cacheBinaryCellRDD(binaryCells);
        mcBinaryCells = SparkUtils.computeMatrixCharacteristics(binaryCells);
    }
    // store output rdd handle
    sec.setRDDHandleForVariable(output.getName(), binaryCells);
    mcOut.set(mcBinaryCells);
    // Since we are outputing binary cells, we set block sizes = -1
    mcOut.setRowsPerBlock(-1);
    mcOut.setColsPerBlock(-1);
    sec.addLineageRDD(output.getName(), input1.getName());
    if (setLineage2)
        sec.addLineageRDD(output.getName(), input2.getName());
    if (setLineage3)
        sec.addLineageRDD(output.getName(), input3.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ArrayList(java.util.ArrayList) Ctable(org.apache.sysml.lops.Ctable) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CTableMap(org.apache.sysml.runtime.matrix.data.CTableMap)

Example 74 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class FrameAppendRSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    JavaPairRDD<Long, FrameBlock> in1 = sec.getFrameBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<Long, FrameBlock> in2 = sec.getFrameBinaryBlockRDDHandleForVariable(input2.getName());
    JavaPairRDD<Long, FrameBlock> out = null;
    long leftRows = sec.getMatrixCharacteristics(input1.getName()).getRows();
    if (_cbind) {
        JavaPairRDD<Long, FrameBlock> in1Aligned = in1.mapToPair(new ReduceSideAppendAlignFunction(leftRows));
        in1Aligned = FrameRDDAggregateUtils.mergeByKey(in1Aligned);
        JavaPairRDD<Long, FrameBlock> in2Aligned = in2.mapToPair(new ReduceSideAppendAlignFunction(leftRows));
        in2Aligned = FrameRDDAggregateUtils.mergeByKey(in2Aligned);
        out = in1Aligned.join(in2Aligned).mapValues(new ReduceSideColumnsFunction(_cbind));
    } else {
        // rbind
        JavaPairRDD<Long, FrameBlock> right = in2.mapToPair(new ReduceSideAppendRowsFunction(leftRows));
        out = in1.union(right);
    }
    // put output RDD handle into symbol table
    updateBinaryAppendOutputMatrixCharacteristics(sec, _cbind);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
    sec.addLineageRDD(output.getName(), input2.getName());
    // update schema of output with merged input schemas
    sec.getFrameObject(output.getName()).setSchema(sec.getFrameObject(input1.getName()).mergeSchemas(sec.getFrameObject(input2.getName())));
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 75 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class MapmmChainSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get rdd and broadcast inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> inX = sec.getBinaryBlockRDDHandleForVariable(_input1.getName());
    PartitionedBroadcast<MatrixBlock> inV = sec.getBroadcastForVariable(_input2.getName());
    // execute mapmmchain (guaranteed to have single output block)
    MatrixBlock out = null;
    if (_chainType == ChainType.XtXv) {
        JavaRDD<MatrixBlock> tmp = inX.values().map(new RDDMapMMChainFunction(inV));
        out = RDDAggregateUtils.sumStable(tmp);
    } else {
        // ChainType.XtwXv / ChainType.XtXvy
        PartitionedBroadcast<MatrixBlock> inW = sec.getBroadcastForVariable(_input3.getName());
        JavaRDD<MatrixBlock> tmp = inX.map(new RDDMapMMChainFunction2(inV, inW, _chainType));
        out = RDDAggregateUtils.sumStable(tmp);
    }
    // put output block into symbol table (no lineage because single block)
    // this also includes implicit maintenance of matrix characteristics
    sec.setMatrixOutput(_output.getName(), out, getExtendedOpcode());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Aggregations

SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)112 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)92 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)92 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)71 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)39 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)14 DoubleObject (org.apache.sysml.runtime.instructions.cp.DoubleObject)12 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)9 PartitionedBroadcast (org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcast)8 FilterNonEmptyBlocksFunction (org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction)7 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)7 ArrayList (java.util.ArrayList)6 CPOperand (org.apache.sysml.runtime.instructions.cp.CPOperand)6 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)6 AggregateDropCorrectionFunction (org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction)6 AggregateOperator (org.apache.sysml.runtime.matrix.operators.AggregateOperator)6 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)5 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)5 ValueType (org.apache.sysml.parser.Expression.ValueType)4