Search in sources :

Example 21 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class OptimizerRuleBased method rewriteRemoveUnnecessaryCompareMatrix.

///////
//REWRITE remove compare matrix (for result merge, needs to be invoked before setting result merge)
///
protected void rewriteRemoveUnnecessaryCompareMatrix(OptNode n, ExecutionContext ec) throws DMLRuntimeException {
    ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
    ArrayList<String> cleanedVars = new ArrayList<String>();
    ArrayList<String> resultVars = pfpb.getResultVariables();
    String itervar = pfpb.getIterablePredicateVars()[0];
    for (String rvar : resultVars) {
        Data dat = ec.getVariable(rvar);
        if (//subject to result merge with compare
        dat instanceof MatrixObject && ((MatrixObject) dat).getNnz() != 0 && //guaranteed no conditional indexing	
        n.hasOnlySimpleChilds() && //guaranteed full matrix replace 
        rContainsResultFullReplace(n, rvar, itervar, (MatrixObject) dat) && //&& !pfsb.variablesRead().containsVariable(rvar)                  //never read variable in loop body
        !//never read variable in loop body
        rIsReadInRightIndexing(n, rvar) && ((MatrixObject) dat).getNumRows() <= Integer.MAX_VALUE && ((MatrixObject) dat).getNumColumns() <= Integer.MAX_VALUE) {
            //replace existing matrix object with empty matrix
            MatrixObject mo = (MatrixObject) dat;
            ec.cleanupMatrixObject(mo);
            ec.setMatrixOutput(rvar, new MatrixBlock((int) mo.getNumRows(), (int) mo.getNumColumns(), false));
            //keep track of cleaned result variables
            cleanedVars.add(rvar);
        }
    }
    _numEvaluatedPlans++;
    LOG.debug(getOptMode() + " OPT: rewrite 'remove unnecessary compare matrix' - result=" + (!cleanedVars.isEmpty()) + " (" + ProgramConverter.serializeStringCollection(cleanedVars) + ")");
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ArrayList(java.util.ArrayList) Data(org.apache.sysml.runtime.instructions.cp.Data) MatrixFormatMetaData(org.apache.sysml.runtime.matrix.MatrixFormatMetaData) ParForProgramBlock(org.apache.sysml.runtime.controlprogram.ParForProgramBlock)

Example 22 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class PairWritableBlock method readFields.

@Override
public void readFields(DataInput in) throws IOException {
    indexes = new MatrixIndexes();
    indexes.readFields(in);
    block = new MatrixBlock();
    block.readFields(in);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes)

Example 23 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class MatrixIndexingSPInstruction method createPartitionPruningRDD.

/**
	 * Wraps the input RDD into a PartitionPruningRDD, which acts as a filter
	 * of required partitions. The distinct set of required partitions is determined
	 * via the partitioner of the input RDD.
	 * 
	 * @param in input matrix as {@code JavaPairRDD<MatrixIndexes,MatrixBlock>}
	 * @param filter partition filter
	 * @return matrix as {@code JavaPairRDD<MatrixIndexes,MatrixBlock>}
	 */
private static JavaPairRDD<MatrixIndexes, MatrixBlock> createPartitionPruningRDD(JavaPairRDD<MatrixIndexes, MatrixBlock> in, List<MatrixIndexes> filter) {
    //build hashset of required partition ids
    HashSet<Integer> flags = new HashSet<Integer>();
    Partitioner partitioner = in.rdd().partitioner().get();
    for (MatrixIndexes key : filter) flags.add(partitioner.getPartition(key));
    //create partition pruning rdd
    Function1<Object, Object> f = new PartitionPruningFunction(flags);
    PartitionPruningRDD<Tuple2<MatrixIndexes, MatrixBlock>> ppRDD = PartitionPruningRDD.create(in.rdd(), f);
    //wrap output into java pair rdd
    return new JavaPairRDD<MatrixIndexes, MatrixBlock>(ppRDD, ClassManifestFactory.fromClass(MatrixIndexes.class), ClassManifestFactory.fromClass(MatrixBlock.class));
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) Tuple2(scala.Tuple2) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) Partitioner(org.apache.spark.Partitioner) HashSet(java.util.HashSet)

Example 24 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class MatrixReshapeSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    //get parameters
    //save cast
    int rows = (int) ec.getScalarInput(_opRows.getName(), _opRows.getValueType(), _opRows.isLiteral()).getLongValue();
    //save cast
    int cols = (int) ec.getScalarInput(_opCols.getName(), _opCols.getValueType(), _opCols.isLiteral()).getLongValue();
    boolean byRow = ec.getScalarInput(_opByRow.getName(), ValueType.BOOLEAN, _opByRow.isLiteral()).getBooleanValue();
    //get inputs 
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    //update output characteristics and sanity check
    mcOut.set(rows, cols, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
    if (mcIn.getRows() * mcIn.getCols() != mcOut.getRows() * mcOut.getCols()) {
        throw new DMLRuntimeException("Incompatible matrix characteristics for reshape: " + mcIn.getRows() + "x" + mcIn.getCols() + " vs " + mcOut.getRows() + "x" + mcOut.getCols());
    }
    //execute reshape instruction
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new RDDReshapeFunction(mcIn, mcOut, byRow));
    out = RDDAggregateUtils.mergeByKey(out);
    //put output RDD handle into symbol table
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 25 with MatrixBlock

use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.

the class MultiReturnParameterizedBuiltinSPInstruction method processInstruction.

@Override
@SuppressWarnings("unchecked")
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    try {
        //get input RDD and meta data
        FrameObject fo = sec.getFrameObject(input1.getName());
        FrameObject fometa = sec.getFrameObject(_outputs.get(1).getName());
        JavaPairRDD<Long, FrameBlock> in = (JavaPairRDD<Long, FrameBlock>) sec.getRDDHandleForFrameObject(fo, InputInfo.BinaryBlockInputInfo);
        String spec = ec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getStringValue();
        MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
        MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
        String[] colnames = !TfMetaUtils.isIDSpecification(spec) ? in.lookup(1L).get(0).getColumnNames() : null;
        //step 1: build transform meta data
        Encoder encoderBuild = EncoderFactory.createEncoder(spec, colnames, fo.getSchema(), (int) fo.getNumColumns(), null);
        MaxLongAccumulator accMax = registerMaxLongAccumulator(sec.getSparkContext());
        JavaRDD<String> rcMaps = in.mapPartitionsToPair(new TransformEncodeBuildFunction(encoderBuild)).distinct().groupByKey().flatMap(new TransformEncodeGroupFunction(accMax));
        if (containsMVImputeEncoder(encoderBuild)) {
            MVImputeAgent mva = getMVImputeEncoder(encoderBuild);
            rcMaps = rcMaps.union(in.mapPartitionsToPair(new TransformEncodeBuild2Function(mva)).groupByKey().flatMap(new TransformEncodeGroup2Function(mva)));
        }
        //trigger eval
        rcMaps.saveAsTextFile(fometa.getFileName());
        //consolidate meta data frame (reuse multi-threaded reader, special handling missing values) 
        FrameReader reader = FrameReaderFactory.createFrameReader(InputInfo.TextCellInputInfo);
        FrameBlock meta = reader.readFrameFromHDFS(fometa.getFileName(), accMax.value(), fo.getNumColumns());
        //recompute num distinct items per column
        meta.recomputeColumnCardinality();
        meta.setColumnNames((colnames != null) ? colnames : meta.getColumnNames());
        //step 2: transform apply (similar to spark transformapply)
        //compute omit offset map for block shifts
        TfOffsetMap omap = null;
        if (TfMetaUtils.containsOmitSpec(spec, colnames)) {
            omap = new TfOffsetMap(SparkUtils.toIndexedLong(in.mapToPair(new RDDTransformApplyOffsetFunction(spec, colnames)).collect()));
        }
        //create encoder broadcast (avoiding replication per task) 
        Encoder encoder = EncoderFactory.createEncoder(spec, colnames, fo.getSchema(), (int) fo.getNumColumns(), meta);
        mcOut.setDimension(mcIn.getRows() - ((omap != null) ? omap.getNumRmRows() : 0), encoder.getNumCols());
        Broadcast<Encoder> bmeta = sec.getSparkContext().broadcast(encoder);
        Broadcast<TfOffsetMap> bomap = (omap != null) ? sec.getSparkContext().broadcast(omap) : null;
        //execute transform apply
        JavaPairRDD<Long, FrameBlock> tmp = in.mapToPair(new RDDTransformApplyFunction(bmeta, bomap));
        JavaPairRDD<MatrixIndexes, MatrixBlock> out = FrameRDDConverterUtils.binaryBlockToMatrixBlock(tmp, mcOut, mcOut);
        //set output and maintain lineage/output characteristics
        sec.setRDDHandleForVariable(_outputs.get(0).getName(), out);
        sec.addLineageRDD(_outputs.get(0).getName(), input1.getName());
        sec.setFrameOutput(_outputs.get(1).getName(), meta);
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) Encoder(org.apache.sysml.runtime.transform.encode.Encoder) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) RDDTransformApplyOffsetFunction(org.apache.sysml.runtime.instructions.spark.ParameterizedBuiltinSPInstruction.RDDTransformApplyOffsetFunction) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) RDDTransformApplyFunction(org.apache.sysml.runtime.instructions.spark.ParameterizedBuiltinSPInstruction.RDDTransformApplyFunction) TfOffsetMap(org.apache.sysml.runtime.transform.meta.TfOffsetMap) FrameReader(org.apache.sysml.runtime.io.FrameReader) MVImputeAgent(org.apache.sysml.runtime.transform.MVImputeAgent)

Aggregations

MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)393 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)121 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)105 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)87 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)44 IOException (java.io.IOException)43 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)38 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)34 ArrayList (java.util.ArrayList)33 Path (org.apache.hadoop.fs.Path)25 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)23 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)21 FileSystem (org.apache.hadoop.fs.FileSystem)20 JobConf (org.apache.hadoop.mapred.JobConf)17 Tuple2 (scala.Tuple2)17 SequenceFile (org.apache.hadoop.io.SequenceFile)14 MatrixReader (org.apache.sysml.runtime.io.MatrixReader)14 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)13 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)12 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)12