use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class OptimizerRuleBased method rewriteRemoveUnnecessaryCompareMatrix.
///////
//REWRITE remove compare matrix (for result merge, needs to be invoked before setting result merge)
///
protected void rewriteRemoveUnnecessaryCompareMatrix(OptNode n, ExecutionContext ec) throws DMLRuntimeException {
ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping().getMappedProg(n.getID())[1];
ArrayList<String> cleanedVars = new ArrayList<String>();
ArrayList<String> resultVars = pfpb.getResultVariables();
String itervar = pfpb.getIterablePredicateVars()[0];
for (String rvar : resultVars) {
Data dat = ec.getVariable(rvar);
if (//subject to result merge with compare
dat instanceof MatrixObject && ((MatrixObject) dat).getNnz() != 0 && //guaranteed no conditional indexing
n.hasOnlySimpleChilds() && //guaranteed full matrix replace
rContainsResultFullReplace(n, rvar, itervar, (MatrixObject) dat) && //&& !pfsb.variablesRead().containsVariable(rvar) //never read variable in loop body
!//never read variable in loop body
rIsReadInRightIndexing(n, rvar) && ((MatrixObject) dat).getNumRows() <= Integer.MAX_VALUE && ((MatrixObject) dat).getNumColumns() <= Integer.MAX_VALUE) {
//replace existing matrix object with empty matrix
MatrixObject mo = (MatrixObject) dat;
ec.cleanupMatrixObject(mo);
ec.setMatrixOutput(rvar, new MatrixBlock((int) mo.getNumRows(), (int) mo.getNumColumns(), false));
//keep track of cleaned result variables
cleanedVars.add(rvar);
}
}
_numEvaluatedPlans++;
LOG.debug(getOptMode() + " OPT: rewrite 'remove unnecessary compare matrix' - result=" + (!cleanedVars.isEmpty()) + " (" + ProgramConverter.serializeStringCollection(cleanedVars) + ")");
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class PairWritableBlock method readFields.
@Override
public void readFields(DataInput in) throws IOException {
indexes = new MatrixIndexes();
indexes.readFields(in);
block = new MatrixBlock();
block.readFields(in);
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class MatrixIndexingSPInstruction method createPartitionPruningRDD.
/**
* Wraps the input RDD into a PartitionPruningRDD, which acts as a filter
* of required partitions. The distinct set of required partitions is determined
* via the partitioner of the input RDD.
*
* @param in input matrix as {@code JavaPairRDD<MatrixIndexes,MatrixBlock>}
* @param filter partition filter
* @return matrix as {@code JavaPairRDD<MatrixIndexes,MatrixBlock>}
*/
private static JavaPairRDD<MatrixIndexes, MatrixBlock> createPartitionPruningRDD(JavaPairRDD<MatrixIndexes, MatrixBlock> in, List<MatrixIndexes> filter) {
//build hashset of required partition ids
HashSet<Integer> flags = new HashSet<Integer>();
Partitioner partitioner = in.rdd().partitioner().get();
for (MatrixIndexes key : filter) flags.add(partitioner.getPartition(key));
//create partition pruning rdd
Function1<Object, Object> f = new PartitionPruningFunction(flags);
PartitionPruningRDD<Tuple2<MatrixIndexes, MatrixBlock>> ppRDD = PartitionPruningRDD.create(in.rdd(), f);
//wrap output into java pair rdd
return new JavaPairRDD<MatrixIndexes, MatrixBlock>(ppRDD, ClassManifestFactory.fromClass(MatrixIndexes.class), ClassManifestFactory.fromClass(MatrixBlock.class));
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class MatrixReshapeSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
//get parameters
//save cast
int rows = (int) ec.getScalarInput(_opRows.getName(), _opRows.getValueType(), _opRows.isLiteral()).getLongValue();
//save cast
int cols = (int) ec.getScalarInput(_opCols.getName(), _opCols.getValueType(), _opCols.isLiteral()).getLongValue();
boolean byRow = ec.getScalarInput(_opByRow.getName(), ValueType.BOOLEAN, _opByRow.isLiteral()).getBooleanValue();
//get inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
//update output characteristics and sanity check
mcOut.set(rows, cols, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
if (mcIn.getRows() * mcIn.getCols() != mcOut.getRows() * mcOut.getCols()) {
throw new DMLRuntimeException("Incompatible matrix characteristics for reshape: " + mcIn.getRows() + "x" + mcIn.getCols() + " vs " + mcOut.getRows() + "x" + mcOut.getCols());
}
//execute reshape instruction
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new RDDReshapeFunction(mcIn, mcOut, byRow));
out = RDDAggregateUtils.mergeByKey(out);
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class MultiReturnParameterizedBuiltinSPInstruction method processInstruction.
@Override
@SuppressWarnings("unchecked")
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
try {
//get input RDD and meta data
FrameObject fo = sec.getFrameObject(input1.getName());
FrameObject fometa = sec.getFrameObject(_outputs.get(1).getName());
JavaPairRDD<Long, FrameBlock> in = (JavaPairRDD<Long, FrameBlock>) sec.getRDDHandleForFrameObject(fo, InputInfo.BinaryBlockInputInfo);
String spec = ec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getStringValue();
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
String[] colnames = !TfMetaUtils.isIDSpecification(spec) ? in.lookup(1L).get(0).getColumnNames() : null;
//step 1: build transform meta data
Encoder encoderBuild = EncoderFactory.createEncoder(spec, colnames, fo.getSchema(), (int) fo.getNumColumns(), null);
MaxLongAccumulator accMax = registerMaxLongAccumulator(sec.getSparkContext());
JavaRDD<String> rcMaps = in.mapPartitionsToPair(new TransformEncodeBuildFunction(encoderBuild)).distinct().groupByKey().flatMap(new TransformEncodeGroupFunction(accMax));
if (containsMVImputeEncoder(encoderBuild)) {
MVImputeAgent mva = getMVImputeEncoder(encoderBuild);
rcMaps = rcMaps.union(in.mapPartitionsToPair(new TransformEncodeBuild2Function(mva)).groupByKey().flatMap(new TransformEncodeGroup2Function(mva)));
}
//trigger eval
rcMaps.saveAsTextFile(fometa.getFileName());
//consolidate meta data frame (reuse multi-threaded reader, special handling missing values)
FrameReader reader = FrameReaderFactory.createFrameReader(InputInfo.TextCellInputInfo);
FrameBlock meta = reader.readFrameFromHDFS(fometa.getFileName(), accMax.value(), fo.getNumColumns());
//recompute num distinct items per column
meta.recomputeColumnCardinality();
meta.setColumnNames((colnames != null) ? colnames : meta.getColumnNames());
//step 2: transform apply (similar to spark transformapply)
//compute omit offset map for block shifts
TfOffsetMap omap = null;
if (TfMetaUtils.containsOmitSpec(spec, colnames)) {
omap = new TfOffsetMap(SparkUtils.toIndexedLong(in.mapToPair(new RDDTransformApplyOffsetFunction(spec, colnames)).collect()));
}
//create encoder broadcast (avoiding replication per task)
Encoder encoder = EncoderFactory.createEncoder(spec, colnames, fo.getSchema(), (int) fo.getNumColumns(), meta);
mcOut.setDimension(mcIn.getRows() - ((omap != null) ? omap.getNumRmRows() : 0), encoder.getNumCols());
Broadcast<Encoder> bmeta = sec.getSparkContext().broadcast(encoder);
Broadcast<TfOffsetMap> bomap = (omap != null) ? sec.getSparkContext().broadcast(omap) : null;
//execute transform apply
JavaPairRDD<Long, FrameBlock> tmp = in.mapToPair(new RDDTransformApplyFunction(bmeta, bomap));
JavaPairRDD<MatrixIndexes, MatrixBlock> out = FrameRDDConverterUtils.binaryBlockToMatrixBlock(tmp, mcOut, mcOut);
//set output and maintain lineage/output characteristics
sec.setRDDHandleForVariable(_outputs.get(0).getName(), out);
sec.addLineageRDD(_outputs.get(0).getName(), input1.getName());
sec.setFrameOutput(_outputs.get(1).getName(), meta);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
Aggregations