use of org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction in project incubator-systemml by apache.
the class WriteSPInstruction method processMatrixWriteInstruction.
protected void processMatrixWriteInstruction(SparkExecutionContext sec, String fname, OutputInfo oi) throws IOException {
// get input rdd
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
if (oi == OutputInfo.MatrixMarketOutputInfo || oi == OutputInfo.TextCellOutputInfo) {
// piggyback nnz maintenance on write
LongAccumulator aNnz = null;
if (!mc.nnzKnown()) {
aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
}
JavaRDD<String> header = null;
if (oi == OutputInfo.MatrixMarketOutputInfo) {
ArrayList<String> headerContainer = new ArrayList<>(1);
// First output MM header
String headerStr = "%%MatrixMarket matrix coordinate real general\n" + // output number of rows, number of columns and number of nnz
mc.getRows() + " " + mc.getCols() + " " + mc.getNonZeros();
headerContainer.add(headerStr);
header = sec.getSparkContext().parallelize(headerContainer);
}
JavaRDD<String> ijv = RDDConverterUtils.binaryBlockToTextCell(in1, mc);
if (header != null)
customSaveTextFile(header.union(ijv), fname, true);
else
customSaveTextFile(ijv, fname, false);
if (!mc.nnzKnown())
mc.setNonZeros(aNnz.value());
} else if (oi == OutputInfo.CSVOutputInfo) {
if (mc.getRows() == 0 || mc.getCols() == 0) {
throw new IOException("Write of matrices with zero rows or columns" + " not supported (" + mc.getRows() + "x" + mc.getCols() + ").");
}
LongAccumulator aNnz = null;
// piggyback nnz computation on actual write
if (!mc.nnzKnown()) {
aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
}
JavaRDD<String> out = RDDConverterUtils.binaryBlockToCsv(in1, mc, (CSVFileFormatProperties) formatProperties, true);
customSaveTextFile(out, fname, false);
if (!mc.nnzKnown())
mc.setNonZeros((long) aNnz.value().longValue());
} else if (oi == OutputInfo.BinaryBlockOutputInfo) {
// piggyback nnz computation on actual write
LongAccumulator aNnz = null;
if (!mc.nnzKnown()) {
aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
}
// save binary block rdd on hdfs
in1.saveAsHadoopFile(fname, MatrixIndexes.class, MatrixBlock.class, SequenceFileOutputFormat.class);
if (!mc.nnzKnown())
mc.setNonZeros((long) aNnz.value().longValue());
} else {
// unsupported formats: binarycell (not externalized)
throw new DMLRuntimeException("Unexpected data format: " + OutputInfo.outputInfoToString(oi));
}
// write meta data file
MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, mc, oi, formatProperties);
}
use of org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction in project incubator-systemml by apache.
the class SparkExecutionContext method writeRDDtoHDFS.
@SuppressWarnings("unchecked")
public static long writeRDDtoHDFS(RDDObject rdd, String path, OutputInfo oinfo) {
JavaPairRDD<MatrixIndexes, MatrixBlock> lrdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd.getRDD();
// piggyback nnz maintenance on write
LongAccumulator aNnz = getSparkContextStatic().sc().longAccumulator("nnz");
lrdd = lrdd.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
// save file is an action which also triggers nnz maintenance
lrdd.saveAsHadoopFile(path, oinfo.outputKeyClass, oinfo.outputValueClass, oinfo.outputFormatClass);
// return nnz aggregate of all blocks
return aNnz.value();
}
use of org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction in project systemml by apache.
the class WriteSPInstruction method processMatrixWriteInstruction.
protected void processMatrixWriteInstruction(SparkExecutionContext sec, String fname, OutputInfo oi) throws IOException {
// get input rdd
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
if (oi == OutputInfo.MatrixMarketOutputInfo || oi == OutputInfo.TextCellOutputInfo) {
// piggyback nnz maintenance on write
LongAccumulator aNnz = null;
if (!mc.nnzKnown()) {
aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
}
JavaRDD<String> header = null;
if (oi == OutputInfo.MatrixMarketOutputInfo) {
ArrayList<String> headerContainer = new ArrayList<>(1);
// First output MM header
String headerStr = "%%MatrixMarket matrix coordinate real general\n" + // output number of rows, number of columns and number of nnz
mc.getRows() + " " + mc.getCols() + " " + mc.getNonZeros();
headerContainer.add(headerStr);
header = sec.getSparkContext().parallelize(headerContainer);
}
JavaRDD<String> ijv = RDDConverterUtils.binaryBlockToTextCell(in1, mc);
if (header != null)
customSaveTextFile(header.union(ijv), fname, true);
else
customSaveTextFile(ijv, fname, false);
if (!mc.nnzKnown())
mc.setNonZeros(aNnz.value());
} else if (oi == OutputInfo.CSVOutputInfo) {
if (mc.getRows() == 0 || mc.getCols() == 0) {
throw new IOException("Write of matrices with zero rows or columns" + " not supported (" + mc.getRows() + "x" + mc.getCols() + ").");
}
LongAccumulator aNnz = null;
// piggyback nnz computation on actual write
if (!mc.nnzKnown()) {
aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
}
JavaRDD<String> out = RDDConverterUtils.binaryBlockToCsv(in1, mc, (CSVFileFormatProperties) formatProperties, true);
customSaveTextFile(out, fname, false);
if (!mc.nnzKnown())
mc.setNonZeros((long) aNnz.value().longValue());
} else if (oi == OutputInfo.BinaryBlockOutputInfo) {
// piggyback nnz computation on actual write
LongAccumulator aNnz = null;
if (!mc.nnzKnown()) {
aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
}
// save binary block rdd on hdfs
in1.saveAsHadoopFile(fname, MatrixIndexes.class, MatrixBlock.class, SequenceFileOutputFormat.class);
if (!mc.nnzKnown())
mc.setNonZeros((long) aNnz.value().longValue());
} else {
// unsupported formats: binarycell (not externalized)
throw new DMLRuntimeException("Unexpected data format: " + OutputInfo.outputInfoToString(oi));
}
// write meta data file
MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, mc, oi, formatProperties);
}
use of org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction in project systemml by apache.
the class SparkExecutionContext method writeRDDtoHDFS.
@SuppressWarnings("unchecked")
public static long writeRDDtoHDFS(RDDObject rdd, String path, OutputInfo oinfo) {
JavaPairRDD<MatrixIndexes, MatrixBlock> lrdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd.getRDD();
// piggyback nnz maintenance on write
LongAccumulator aNnz = getSparkContextStatic().sc().longAccumulator("nnz");
lrdd = lrdd.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
// save file is an action which also triggers nnz maintenance
lrdd.saveAsHadoopFile(path, oinfo.outputKeyClass, oinfo.outputValueClass, oinfo.outputFormatClass);
// return nnz aggregate of all blocks
return aNnz.value();
}
Aggregations