Search in sources :

Example 51 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class ReblockSPInstruction method processMatrixReblockInstruction.

@SuppressWarnings("unchecked")
protected void processMatrixReblockInstruction(SparkExecutionContext sec, InputInfo iinfo) {
    MatrixObject mo = sec.getMatrixObject(input1.getName());
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
        // get the input textcell rdd
        JavaPairRDD<LongWritable, Text> lines = (JavaPairRDD<LongWritable, Text>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
        // convert textcell to binary block
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.textCellToBinaryBlock(sec.getSparkContext(), lines, mcOut, outputEmptyBlocks);
        // put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else if (iinfo == InputInfo.CSVInputInfo) {
        // HACK ALERT: Until we introduces the rewrite to insert csvrblock for non-persistent read
        // throw new DMLRuntimeException("CSVInputInfo is not supported for ReblockSPInstruction");
        CSVReblockSPInstruction csvInstruction = null;
        boolean hasHeader = false;
        String delim = ",";
        boolean fill = false;
        double fillValue = 0;
        if (mo.getFileFormatProperties() instanceof CSVFileFormatProperties && mo.getFileFormatProperties() != null) {
            CSVFileFormatProperties props = (CSVFileFormatProperties) mo.getFileFormatProperties();
            hasHeader = props.hasHeader();
            delim = props.getDelim();
            fill = props.isFill();
            fillValue = props.getFillValue();
        }
        csvInstruction = new CSVReblockSPInstruction(null, input1, output, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), hasHeader, delim, fill, fillValue, "csvrblk", instString);
        csvInstruction.processInstruction(sec);
        return;
    } else if (iinfo == InputInfo.BinaryCellInputInfo) {
        JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = (JavaPairRDD<MatrixIndexes, MatrixCell>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), binaryCells, mcOut, outputEmptyBlocks);
        // put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else if (iinfo == InputInfo.BinaryBlockInputInfo) {
        // BINARY BLOCK <- BINARY BLOCK (different sizes)
        JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
        boolean shuffleFreeReblock = mc.dimsKnown() && mcOut.dimsKnown() && (mc.getRows() < mcOut.getRowsPerBlock() || mc.getRowsPerBlock() % mcOut.getRowsPerBlock() == 0) && (mc.getCols() < mcOut.getColsPerBlock() || mc.getColsPerBlock() % mcOut.getColsPerBlock() == 0);
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new ExtractBlockForBinaryReblock(mc, mcOut));
        if (!shuffleFreeReblock)
            out = RDDAggregateUtils.mergeByKey(out, false);
        // put output RDD handle into symbol table
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
    } else {
        throw new DMLRuntimeException("The given InputInfo is not implemented " + "for ReblockSPInstruction:" + InputInfo.inputInfoToString(iinfo));
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) Text(org.apache.hadoop.io.Text) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) ExtractBlockForBinaryReblock(org.apache.sysml.runtime.instructions.spark.functions.ExtractBlockForBinaryReblock) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) MatrixCell(org.apache.sysml.runtime.matrix.data.MatrixCell) LongWritable(org.apache.hadoop.io.LongWritable)

Example 52 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class ReorgSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    String opcode = getOpcode();
    // get input rdd handle
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    if (// TRANSPOSE
    opcode.equalsIgnoreCase("r'")) {
        // execute transpose reorg operation
        out = in1.mapToPair(new ReorgMapFunction(opcode));
    } else if (// REVERSE
    opcode.equalsIgnoreCase("rev")) {
        // execute reverse reorg operation
        out = in1.flatMapToPair(new RDDRevFunction(mcIn));
        if (mcIn.getRows() % mcIn.getRowsPerBlock() != 0)
            out = RDDAggregateUtils.mergeByKey(out, false);
    } else if (// DIAG
    opcode.equalsIgnoreCase("rdiag")) {
        if (mcIn.getCols() == 1) {
            // diagV2M
            out = in1.flatMapToPair(new RDDDiagV2MFunction(mcIn));
        } else {
            // diagM2V
            // execute diagM2V operation
            out = in1.filter(new FilterDiagBlocksFunction()).mapToPair(new ReorgMapFunction(opcode));
        }
    } else if (// ORDER
    opcode.equalsIgnoreCase("rsort")) {
        // Sort by column 'col' in ascending/descending order and return either index/value
        // get parameters
        long[] cols = _col.getDataType().isMatrix() ? DataConverter.convertToLongVector(ec.getMatrixInput(_col.getName())) : new long[] { ec.getScalarInput(_col.getName(), _col.getValueType(), _col.isLiteral()).getLongValue() };
        boolean desc = ec.getScalarInput(_desc.getName(), _desc.getValueType(), _desc.isLiteral()).getBooleanValue();
        boolean ixret = ec.getScalarInput(_ixret.getName(), _ixret.getValueType(), _ixret.isLiteral()).getBooleanValue();
        boolean singleCol = (mcIn.getCols() == 1);
        out = in1;
        if (cols.length > mcIn.getColsPerBlock())
            LOG.warn("Unsupported sort with number of order-by columns large than blocksize: " + cols.length);
        if (singleCol || cols.length == 1) {
            // extract column (if necessary) and sort
            if (!singleCol)
                out = out.filter(new IsBlockInRange(1, mcIn.getRows(), cols[0], cols[0], mcIn)).mapValues(new ExtractColumn((int) UtilFunctions.computeCellInBlock(cols[0], mcIn.getColsPerBlock())));
            // actual index/data sort operation
            if (// sort indexes
            ixret)
                out = RDDSortUtils.sortIndexesByVal(out, !desc, mcIn.getRows(), mcIn.getRowsPerBlock());
            else if (// sort single-column matrix
            singleCol && !desc)
                out = RDDSortUtils.sortByVal(out, mcIn.getRows(), mcIn.getRowsPerBlock());
            else if (// sort multi-column matrix w/ rewrite
            !_bSortIndInMem)
                out = RDDSortUtils.sortDataByVal(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
            else
                // sort multi-column matrix
                out = RDDSortUtils.sortDataByValMemSort(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock(), sec, (ReorgOperator) _optr);
        } else {
            // extract columns (if necessary)
            if (cols.length < mcIn.getCols())
                out = out.filter(new IsBlockInList(cols, mcIn)).mapToPair(new ExtractColumns(cols, mcIn));
            // append extracted columns (if necessary)
            if (mcIn.getCols() > mcIn.getColsPerBlock())
                out = RDDAggregateUtils.mergeByKey(out);
            // actual index/data sort operation
            if (// sort indexes
            ixret)
                out = RDDSortUtils.sortIndexesByVals(out, !desc, mcIn.getRows(), (long) cols.length, mcIn.getRowsPerBlock());
            else if (// sort single-column matrix
            cols.length == mcIn.getCols() && !desc)
                out = RDDSortUtils.sortByVals(out, mcIn.getRows(), cols.length, mcIn.getRowsPerBlock());
            else
                // sort multi-column matrix
                out = RDDSortUtils.sortDataByVals(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), cols.length, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
        }
    } else {
        throw new DMLRuntimeException("Error: Incorrect opcode in ReorgSPInstruction:" + opcode);
    }
    // store output rdd handle
    if (opcode.equalsIgnoreCase("rsort") && _col.getDataType().isMatrix())
        sec.releaseMatrixInput(_col.getName());
    updateReorgMatrixCharacteristics(sec);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IsBlockInRange(org.apache.sysml.runtime.instructions.spark.functions.IsBlockInRange) ReorgOperator(org.apache.sysml.runtime.matrix.operators.ReorgOperator) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FilterDiagBlocksFunction(org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) ReorgMapFunction(org.apache.sysml.runtime.instructions.spark.functions.ReorgMapFunction) IsBlockInList(org.apache.sysml.runtime.instructions.spark.functions.IsBlockInList)

Example 53 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class ReorgSPInstruction method updateReorgMatrixCharacteristics.

private void updateReorgMatrixCharacteristics(SparkExecutionContext sec) {
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    // infer initially unknown dimensions from inputs
    if (!mcOut.dimsKnown()) {
        if (!mc1.dimsKnown())
            throw new DMLRuntimeException("Unable to compute output matrix characteristics from input.");
        if (getOpcode().equalsIgnoreCase("r'"))
            mcOut.set(mc1.getCols(), mc1.getRows(), mc1.getColsPerBlock(), mc1.getRowsPerBlock());
        else if (getOpcode().equalsIgnoreCase("rdiag"))
            mcOut.set(mc1.getRows(), (mc1.getCols() > 1) ? 1 : mc1.getRows(), mc1.getRowsPerBlock(), mc1.getColsPerBlock());
        else if (getOpcode().equalsIgnoreCase("rsort")) {
            boolean ixret = sec.getScalarInput(_ixret.getName(), _ixret.getValueType(), _ixret.isLiteral()).getBooleanValue();
            mcOut.set(mc1.getRows(), ixret ? 1 : mc1.getCols(), mc1.getRowsPerBlock(), mc1.getColsPerBlock());
        }
    }
    // infer initially unknown nnz from input
    if (!mcOut.nnzKnown() && mc1.nnzKnown()) {
        boolean sortIx = getOpcode().equalsIgnoreCase("rsort") && sec.getScalarInput(_ixret.getName(), _ixret.getValueType(), _ixret.isLiteral()).getBooleanValue();
        if (sortIx)
            mcOut.setNonZeros(mc1.getRows());
        else
            // default (r', rdiag, rsort data)
            mcOut.setNonZeros(mc1.getNonZeros());
    }
}
Also used : MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 54 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class RmmSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get input rdds
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    MatrixCharacteristics mcOut = updateBinaryMMOutputMatrixCharacteristics(sec, true);
    // execute Spark RMM instruction
    // step 1: prepare join keys (w/ shallow replication), i/j/k
    JavaPairRDD<TripleIndexes, MatrixBlock> tmp1 = in1.flatMapToPair(new RmmReplicateFunction(mc2.getCols(), mc2.getColsPerBlock(), true));
    JavaPairRDD<TripleIndexes, MatrixBlock> tmp2 = in2.flatMapToPair(new RmmReplicateFunction(mc1.getRows(), mc1.getRowsPerBlock(), false));
    // step 2: join prepared datasets, multiply, and aggregate
    int numPartJoin = Math.max(getNumJoinPartitions(mc1, mc2), SparkExecutionContext.getDefaultParallelism(true));
    int numPartOut = SparkUtils.getNumPreferredPartitions(mcOut);
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = tmp1.join(tmp2, // join by result block
    numPartJoin).mapToPair(// do matrix multiplication
    new RmmMultiplyFunction());
    out = // aggregation per result block
    RDDAggregateUtils.sumByKeyStable(// aggregation per result block
    out, numPartOut, false);
    // put output block into symbol table (no lineage because single block)
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
    sec.addLineageRDD(output.getName(), input2.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) TripleIndexes(org.apache.sysml.runtime.matrix.data.TripleIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 55 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class SpoofSPInstruction method updateOutputMatrixCharacteristics.

private void updateOutputMatrixCharacteristics(SparkExecutionContext sec, SpoofOperator op) {
    if (op instanceof SpoofCellwise) {
        MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(_in[0].getName());
        MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(_out.getName());
        if (((SpoofCellwise) op).getCellType() == CellType.ROW_AGG)
            mcOut.set(mcIn.getRows(), 1, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
        else if (((SpoofCellwise) op).getCellType() == CellType.NO_AGG)
            mcOut.set(mcIn);
    } else if (op instanceof SpoofOuterProduct) {
        // X
        MatrixCharacteristics mcIn1 = sec.getMatrixCharacteristics(_in[0].getName());
        // U
        MatrixCharacteristics mcIn2 = sec.getMatrixCharacteristics(_in[1].getName());
        // V
        MatrixCharacteristics mcIn3 = sec.getMatrixCharacteristics(_in[2].getName());
        MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(_out.getName());
        OutProdType type = ((SpoofOuterProduct) op).getOuterProdType();
        if (type == OutProdType.CELLWISE_OUTER_PRODUCT)
            mcOut.set(mcIn1.getRows(), mcIn1.getCols(), mcIn1.getRowsPerBlock(), mcIn1.getColsPerBlock());
        else if (type == OutProdType.LEFT_OUTER_PRODUCT)
            mcOut.set(mcIn3.getRows(), mcIn3.getCols(), mcIn3.getRowsPerBlock(), mcIn3.getColsPerBlock());
        else if (type == OutProdType.RIGHT_OUTER_PRODUCT)
            mcOut.set(mcIn2.getRows(), mcIn2.getCols(), mcIn2.getRowsPerBlock(), mcIn2.getColsPerBlock());
    } else if (op instanceof SpoofRowwise) {
        MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(_in[0].getName());
        MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(_out.getName());
        RowType type = ((SpoofRowwise) op).getRowType();
        if (type == RowType.NO_AGG)
            mcOut.set(mcIn);
        else if (type == RowType.ROW_AGG)
            mcOut.set(mcIn.getRows(), 1, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
        else if (type == RowType.COL_AGG)
            mcOut.set(1, mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
        else if (type == RowType.COL_AGG_T)
            mcOut.set(mcIn.getCols(), 1, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
    }
}
Also used : OutProdType(org.apache.sysml.runtime.codegen.SpoofOuterProduct.OutProdType) SpoofRowwise(org.apache.sysml.runtime.codegen.SpoofRowwise) RowType(org.apache.sysml.runtime.codegen.SpoofRowwise.RowType) SpoofOuterProduct(org.apache.sysml.runtime.codegen.SpoofOuterProduct) SpoofCellwise(org.apache.sysml.runtime.codegen.SpoofCellwise) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)296 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)102 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)89 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)70 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)50 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)47 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)45 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)42 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)37 CellIndex (org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex)37 IOException (java.io.IOException)30 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)27 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)22 ArrayList (java.util.ArrayList)19 ValueType (org.apache.sysml.parser.Expression.ValueType)19 Path (org.apache.hadoop.fs.Path)17 LongWritable (org.apache.hadoop.io.LongWritable)16 Test (org.junit.Test)15 Text (org.apache.hadoop.io.Text)14