use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class WriteSPInstruction method processMatrixWriteInstruction.
protected void processMatrixWriteInstruction(SparkExecutionContext sec, String fname, OutputInfo oi) throws DMLRuntimeException, IOException {
//get input rdd
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
if (oi == OutputInfo.MatrixMarketOutputInfo || oi == OutputInfo.TextCellOutputInfo) {
//piggyback nnz maintenance on write
LongAccumulator aNnz = null;
if (isInputMatrixBlock && !mc.nnzKnown()) {
aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
}
JavaRDD<String> header = null;
if (oi == OutputInfo.MatrixMarketOutputInfo) {
ArrayList<String> headerContainer = new ArrayList<String>(1);
// First output MM header
String headerStr = "%%MatrixMarket matrix coordinate real general\n" + // output number of rows, number of columns and number of nnz
mc.getRows() + " " + mc.getCols() + " " + mc.getNonZeros();
headerContainer.add(headerStr);
header = sec.getSparkContext().parallelize(headerContainer);
}
JavaRDD<String> ijv = RDDConverterUtils.binaryBlockToTextCell(in1, mc);
if (header != null)
customSaveTextFile(header.union(ijv), fname, true);
else
customSaveTextFile(ijv, fname, false);
if (isInputMatrixBlock && !mc.nnzKnown())
mc.setNonZeros(aNnz.value());
} else if (oi == OutputInfo.CSVOutputInfo) {
JavaRDD<String> out = null;
LongAccumulator aNnz = null;
if (isInputMatrixBlock) {
//piggyback nnz computation on actual write
if (!mc.nnzKnown()) {
aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
}
out = RDDConverterUtils.binaryBlockToCsv(in1, mc, (CSVFileFormatProperties) formatProperties, true);
} else {
// This case is applicable when the CSV output from transform() is written out
// TODO remove once transform over frames supported
@SuppressWarnings("unchecked") JavaPairRDD<Long, String> rdd = (JavaPairRDD<Long, String>) (sec.getMatrixObject(input1.getName())).getRDDHandle().getRDD();
out = rdd.values();
String sep = ",";
boolean hasHeader = false;
if (formatProperties != null) {
sep = ((CSVFileFormatProperties) formatProperties).getDelim();
hasHeader = ((CSVFileFormatProperties) formatProperties).hasHeader();
}
if (hasHeader) {
StringBuffer buf = new StringBuffer();
for (int j = 1; j < mc.getCols(); j++) {
if (j != 1) {
buf.append(sep);
}
buf.append("C" + j);
}
ArrayList<String> headerContainer = new ArrayList<String>(1);
headerContainer.add(0, buf.toString());
JavaRDD<String> header = sec.getSparkContext().parallelize(headerContainer);
out = header.union(out);
}
}
customSaveTextFile(out, fname, false);
if (isInputMatrixBlock && !mc.nnzKnown())
mc.setNonZeros((long) aNnz.value().longValue());
} else if (oi == OutputInfo.BinaryBlockOutputInfo) {
//piggyback nnz computation on actual write
LongAccumulator aNnz = null;
if (!mc.nnzKnown()) {
aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
}
//save binary block rdd on hdfs
in1.saveAsHadoopFile(fname, MatrixIndexes.class, MatrixBlock.class, SequenceFileOutputFormat.class);
if (!mc.nnzKnown())
mc.setNonZeros((long) aNnz.value().longValue());
} else {
//unsupported formats: binarycell (not externalized)
throw new DMLRuntimeException("Unexpected data format: " + OutputInfo.outputInfoToString(oi));
}
// write meta data file
MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, mc, oi, formatProperties);
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class PairWritableBlock method readFields.
@Override
public void readFields(DataInput in) throws IOException {
indexes = new MatrixIndexes();
indexes.readFields(in);
block = new MatrixBlock();
block.readFields(in);
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class PairWritableCell method readFields.
@Override
public void readFields(DataInput in) throws IOException {
indexes = new MatrixIndexes();
indexes.readFields(in);
cell = new MatrixCell();
cell.readFields(in);
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class MatrixIndexingSPInstruction method createPartitionPruningRDD.
/**
* Wraps the input RDD into a PartitionPruningRDD, which acts as a filter
* of required partitions. The distinct set of required partitions is determined
* via the partitioner of the input RDD.
*
* @param in input matrix as {@code JavaPairRDD<MatrixIndexes,MatrixBlock>}
* @param filter partition filter
* @return matrix as {@code JavaPairRDD<MatrixIndexes,MatrixBlock>}
*/
private static JavaPairRDD<MatrixIndexes, MatrixBlock> createPartitionPruningRDD(JavaPairRDD<MatrixIndexes, MatrixBlock> in, List<MatrixIndexes> filter) {
//build hashset of required partition ids
HashSet<Integer> flags = new HashSet<Integer>();
Partitioner partitioner = in.rdd().partitioner().get();
for (MatrixIndexes key : filter) flags.add(partitioner.getPartition(key));
//create partition pruning rdd
Function1<Object, Object> f = new PartitionPruningFunction(flags);
PartitionPruningRDD<Tuple2<MatrixIndexes, MatrixBlock>> ppRDD = PartitionPruningRDD.create(in.rdd(), f);
//wrap output into java pair rdd
return new JavaPairRDD<MatrixIndexes, MatrixBlock>(ppRDD, ClassManifestFactory.fromClass(MatrixIndexes.class), ClassManifestFactory.fromClass(MatrixBlock.class));
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class MatrixReshapeSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
//get parameters
//save cast
int rows = (int) ec.getScalarInput(_opRows.getName(), _opRows.getValueType(), _opRows.isLiteral()).getLongValue();
//save cast
int cols = (int) ec.getScalarInput(_opCols.getName(), _opCols.getValueType(), _opCols.isLiteral()).getLongValue();
boolean byRow = ec.getScalarInput(_opByRow.getName(), ValueType.BOOLEAN, _opByRow.isLiteral()).getBooleanValue();
//get inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
//update output characteristics and sanity check
mcOut.set(rows, cols, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
if (mcIn.getRows() * mcIn.getCols() != mcOut.getRows() * mcOut.getCols()) {
throw new DMLRuntimeException("Incompatible matrix characteristics for reshape: " + mcIn.getRows() + "x" + mcIn.getCols() + " vs " + mcOut.getRows() + "x" + mcOut.getCols());
}
//execute reshape instruction
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new RDDReshapeFunction(mcIn, mcOut, byRow));
out = RDDAggregateUtils.mergeByKey(out);
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
Aggregations