use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class RmmSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get input rdds
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
MatrixCharacteristics mcOut = updateBinaryMMOutputMatrixCharacteristics(sec, true);
// execute Spark RMM instruction
// step 1: prepare join keys (w/ shallow replication), i/j/k
JavaPairRDD<TripleIndexes, MatrixBlock> tmp1 = in1.flatMapToPair(new RmmReplicateFunction(mc2.getCols(), mc2.getColsPerBlock(), true));
JavaPairRDD<TripleIndexes, MatrixBlock> tmp2 = in2.flatMapToPair(new RmmReplicateFunction(mc1.getRows(), mc1.getRowsPerBlock(), false));
// step 2: join prepared datasets, multiply, and aggregate
int numPartJoin = Math.max(getNumJoinPartitions(mc1, mc2), SparkExecutionContext.getDefaultParallelism(true));
int numPartOut = SparkUtils.getNumPreferredPartitions(mcOut);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = tmp1.join(tmp2, // join by result block
numPartJoin).mapToPair(// do matrix multiplication
new RmmMultiplyFunction());
out = // aggregation per result block
RDDAggregateUtils.sumByKeyStable(// aggregation per result block
out, numPartOut, false);
// put output block into symbol table (no lineage because single block)
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class TernarySPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = !input1.isMatrix() ? null : sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = !input2.isMatrix() ? null : sec.getBinaryBlockRDDHandleForVariable(input2.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = !input3.isMatrix() ? null : sec.getBinaryBlockRDDHandleForVariable(input3.getName());
MatrixBlock m1 = input1.isMatrix() ? null : new MatrixBlock(ec.getScalarInput(input1).getDoubleValue());
MatrixBlock m2 = input2.isMatrix() ? null : new MatrixBlock(ec.getScalarInput(input2).getDoubleValue());
MatrixBlock m3 = input3.isMatrix() ? null : new MatrixBlock(ec.getScalarInput(input3).getDoubleValue());
TernaryOperator op = (TernaryOperator) _optr;
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
if (input1.isMatrix() && !input2.isMatrix() && !input3.isMatrix())
out = in1.mapValues(new TernaryFunctionMSS(op, m1, m2, m3));
else if (!input1.isMatrix() && input2.isMatrix() && !input3.isMatrix())
out = in2.mapValues(new TernaryFunctionSMS(op, m1, m2, m3));
else if (!input1.isMatrix() && !input2.isMatrix() && input3.isMatrix())
out = in3.mapValues(new TernaryFunctionSSM(op, m1, m2, m3));
else if (input1.isMatrix() && input2.isMatrix() && !input3.isMatrix())
out = in1.join(in2).mapValues(new TernaryFunctionMMS(op, m1, m2, m3));
else if (input1.isMatrix() && !input2.isMatrix() && input3.isMatrix())
out = in1.join(in3).mapValues(new TernaryFunctionMSM(op, m1, m2, m3));
else if (!input1.isMatrix() && input2.isMatrix() && input3.isMatrix())
out = in2.join(in3).mapValues(new TernaryFunctionSMM(op, m1, m2, m3));
else
// all matrices
out = in1.join(in2).join(in3).mapValues(new TernaryFunctionMMM(op, m1, m2, m3));
// set output RDD
updateTernaryOutputMatrixCharacteristics(sec);
sec.setRDDHandleForVariable(output.getName(), out);
if (input1.isMatrix())
sec.addLineageRDD(output.getName(), input1.getName());
if (input2.isMatrix())
sec.addLineageRDD(output.getName(), input2.getName());
if (input3.isMatrix())
sec.addLineageRDD(output.getName(), input3.getName());
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class Tsmm2SPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get input
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
// execute tsmm2 instruction
// step 1: first pass of X, filter-collect-broadcast excess blocks
JavaPairRDD<MatrixIndexes, MatrixBlock> tmp1 = in.filter(new IsBlockInRange(_type.isLeft() ? 1 : mc.getRowsPerBlock() + 1, mc.getRows(), _type.isLeft() ? mc.getColsPerBlock() + 1 : 1, mc.getCols(), mc)).mapToPair(new ShiftTSMMIndexesFunction(_type));
PartitionedBlock<MatrixBlock> pmb = SparkExecutionContext.toPartitionedMatrixBlock(tmp1, (int) (_type.isLeft() ? mc.getRows() : mc.getRows() - mc.getRowsPerBlock()), (int) (_type.isLeft() ? mc.getCols() - mc.getColsPerBlock() : mc.getCols()), mc.getRowsPerBlock(), mc.getColsPerBlock(), -1L);
Broadcast<PartitionedBlock<MatrixBlock>> bpmb = sec.getSparkContext().broadcast(pmb);
// step 2: second pass of X, compute tsmm/mapmm and aggregate result blocks
int outputDim = (int) (_type.isLeft() ? mc.getCols() : mc.getRows());
if (OptimizerUtils.estimateSize(outputDim, outputDim) <= 32 * 1024 * 1024) {
// default: <=32MB
// output large blocks and reduceAll to avoid skew on combineByKey
JavaRDD<MatrixBlock> tmp2 = in.map(new RDDTSMM2ExtFunction(bpmb, _type, outputDim, (int) mc.getRowsPerBlock()));
MatrixBlock out = RDDAggregateUtils.sumStable(tmp2);
// put output block into symbol table (no lineage because single block)
// this also includes implicit maintenance of matrix characteristics
sec.setMatrixOutput(output.getName(), out, getExtendedOpcode());
} else {
// output individual output blocks and aggregate by key (no action)
JavaPairRDD<MatrixIndexes, MatrixBlock> tmp2 = in.flatMapToPair(new RDDTSMM2Function(bpmb, _type));
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDAggregateUtils.sumByKeyStable(tmp2, false);
// put output RDD handle into symbol table
sec.getMatrixCharacteristics(output.getName()).set(outputDim, outputDim, mc.getRowsPerBlock(), mc.getColsPerBlock());
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class TsmmSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get input
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
// execute tsmm instruction (always produce exactly one output block)
// (this formulation with values() requires --conf spark.driver.maxResultSize=0)
JavaRDD<MatrixBlock> tmp = in.map(new RDDTSMMFunction(_type));
MatrixBlock out = RDDAggregateUtils.sumStable(tmp);
// put output block into symbol table (no lineage because single block)
// this also includes implicit maintenance of matrix characteristics
sec.setMatrixOutput(output.getName(), out, getExtendedOpcode());
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class WriteSPInstruction method processMatrixWriteInstruction.
protected void processMatrixWriteInstruction(SparkExecutionContext sec, String fname, OutputInfo oi) throws IOException {
// get input rdd
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
if (oi == OutputInfo.MatrixMarketOutputInfo || oi == OutputInfo.TextCellOutputInfo) {
// piggyback nnz maintenance on write
LongAccumulator aNnz = null;
if (!mc.nnzKnown()) {
aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
}
JavaRDD<String> header = null;
if (oi == OutputInfo.MatrixMarketOutputInfo) {
ArrayList<String> headerContainer = new ArrayList<>(1);
// First output MM header
String headerStr = "%%MatrixMarket matrix coordinate real general\n" + // output number of rows, number of columns and number of nnz
mc.getRows() + " " + mc.getCols() + " " + mc.getNonZeros();
headerContainer.add(headerStr);
header = sec.getSparkContext().parallelize(headerContainer);
}
JavaRDD<String> ijv = RDDConverterUtils.binaryBlockToTextCell(in1, mc);
if (header != null)
customSaveTextFile(header.union(ijv), fname, true);
else
customSaveTextFile(ijv, fname, false);
if (!mc.nnzKnown())
mc.setNonZeros(aNnz.value());
} else if (oi == OutputInfo.CSVOutputInfo) {
if (mc.getRows() == 0 || mc.getCols() == 0) {
throw new IOException("Write of matrices with zero rows or columns" + " not supported (" + mc.getRows() + "x" + mc.getCols() + ").");
}
LongAccumulator aNnz = null;
// piggyback nnz computation on actual write
if (!mc.nnzKnown()) {
aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
}
JavaRDD<String> out = RDDConverterUtils.binaryBlockToCsv(in1, mc, (CSVFileFormatProperties) formatProperties, true);
customSaveTextFile(out, fname, false);
if (!mc.nnzKnown())
mc.setNonZeros((long) aNnz.value().longValue());
} else if (oi == OutputInfo.BinaryBlockOutputInfo) {
// piggyback nnz computation on actual write
LongAccumulator aNnz = null;
if (!mc.nnzKnown()) {
aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
}
// save binary block rdd on hdfs
in1.saveAsHadoopFile(fname, MatrixIndexes.class, MatrixBlock.class, SequenceFileOutputFormat.class);
if (!mc.nnzKnown())
mc.setNonZeros((long) aNnz.value().longValue());
} else {
// unsupported formats: binarycell (not externalized)
throw new DMLRuntimeException("Unexpected data format: " + OutputInfo.outputInfoToString(oi));
}
// write meta data file
MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, mc, oi, formatProperties);
}
Aggregations