Search in sources :

Example 1 with ComputeBinaryBlockNnzFunction

use of org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction in project incubator-systemml by apache.

the class WriteSPInstruction method processMatrixWriteInstruction.

protected void processMatrixWriteInstruction(SparkExecutionContext sec, String fname, OutputInfo oi) throws IOException {
    // get input rdd
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    if (oi == OutputInfo.MatrixMarketOutputInfo || oi == OutputInfo.TextCellOutputInfo) {
        // piggyback nnz maintenance on write
        LongAccumulator aNnz = null;
        if (!mc.nnzKnown()) {
            aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
            in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
        }
        JavaRDD<String> header = null;
        if (oi == OutputInfo.MatrixMarketOutputInfo) {
            ArrayList<String> headerContainer = new ArrayList<>(1);
            // First output MM header
            String headerStr = "%%MatrixMarket matrix coordinate real general\n" + // output number of rows, number of columns and number of nnz
            mc.getRows() + " " + mc.getCols() + " " + mc.getNonZeros();
            headerContainer.add(headerStr);
            header = sec.getSparkContext().parallelize(headerContainer);
        }
        JavaRDD<String> ijv = RDDConverterUtils.binaryBlockToTextCell(in1, mc);
        if (header != null)
            customSaveTextFile(header.union(ijv), fname, true);
        else
            customSaveTextFile(ijv, fname, false);
        if (!mc.nnzKnown())
            mc.setNonZeros(aNnz.value());
    } else if (oi == OutputInfo.CSVOutputInfo) {
        if (mc.getRows() == 0 || mc.getCols() == 0) {
            throw new IOException("Write of matrices with zero rows or columns" + " not supported (" + mc.getRows() + "x" + mc.getCols() + ").");
        }
        LongAccumulator aNnz = null;
        // piggyback nnz computation on actual write
        if (!mc.nnzKnown()) {
            aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
            in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
        }
        JavaRDD<String> out = RDDConverterUtils.binaryBlockToCsv(in1, mc, (CSVFileFormatProperties) formatProperties, true);
        customSaveTextFile(out, fname, false);
        if (!mc.nnzKnown())
            mc.setNonZeros((long) aNnz.value().longValue());
    } else if (oi == OutputInfo.BinaryBlockOutputInfo) {
        // piggyback nnz computation on actual write
        LongAccumulator aNnz = null;
        if (!mc.nnzKnown()) {
            aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
            in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
        }
        // save binary block rdd on hdfs
        in1.saveAsHadoopFile(fname, MatrixIndexes.class, MatrixBlock.class, SequenceFileOutputFormat.class);
        if (!mc.nnzKnown())
            mc.setNonZeros((long) aNnz.value().longValue());
    } else {
        // unsupported formats: binarycell (not externalized)
        throw new DMLRuntimeException("Unexpected data format: " + OutputInfo.outputInfoToString(oi));
    }
    // write meta data file
    MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, mc, oi, formatProperties);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ComputeBinaryBlockNnzFunction(org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) JavaRDD(org.apache.spark.api.java.JavaRDD) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) LongAccumulator(org.apache.spark.util.LongAccumulator)

Example 2 with ComputeBinaryBlockNnzFunction

use of org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction in project incubator-systemml by apache.

the class SparkExecutionContext method writeRDDtoHDFS.

@SuppressWarnings("unchecked")
public static long writeRDDtoHDFS(RDDObject rdd, String path, OutputInfo oinfo) {
    JavaPairRDD<MatrixIndexes, MatrixBlock> lrdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd.getRDD();
    // piggyback nnz maintenance on write
    LongAccumulator aNnz = getSparkContextStatic().sc().longAccumulator("nnz");
    lrdd = lrdd.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
    // save file is an action which also triggers nnz maintenance
    lrdd.saveAsHadoopFile(path, oinfo.outputKeyClass, oinfo.outputValueClass, oinfo.outputFormatClass);
    // return nnz aggregate of all blocks
    return aNnz.value();
}
Also used : LongAccumulator(org.apache.spark.util.LongAccumulator) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) ComputeBinaryBlockNnzFunction(org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction)

Example 3 with ComputeBinaryBlockNnzFunction

use of org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction in project systemml by apache.

the class WriteSPInstruction method processMatrixWriteInstruction.

protected void processMatrixWriteInstruction(SparkExecutionContext sec, String fname, OutputInfo oi) throws IOException {
    // get input rdd
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    if (oi == OutputInfo.MatrixMarketOutputInfo || oi == OutputInfo.TextCellOutputInfo) {
        // piggyback nnz maintenance on write
        LongAccumulator aNnz = null;
        if (!mc.nnzKnown()) {
            aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
            in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
        }
        JavaRDD<String> header = null;
        if (oi == OutputInfo.MatrixMarketOutputInfo) {
            ArrayList<String> headerContainer = new ArrayList<>(1);
            // First output MM header
            String headerStr = "%%MatrixMarket matrix coordinate real general\n" + // output number of rows, number of columns and number of nnz
            mc.getRows() + " " + mc.getCols() + " " + mc.getNonZeros();
            headerContainer.add(headerStr);
            header = sec.getSparkContext().parallelize(headerContainer);
        }
        JavaRDD<String> ijv = RDDConverterUtils.binaryBlockToTextCell(in1, mc);
        if (header != null)
            customSaveTextFile(header.union(ijv), fname, true);
        else
            customSaveTextFile(ijv, fname, false);
        if (!mc.nnzKnown())
            mc.setNonZeros(aNnz.value());
    } else if (oi == OutputInfo.CSVOutputInfo) {
        if (mc.getRows() == 0 || mc.getCols() == 0) {
            throw new IOException("Write of matrices with zero rows or columns" + " not supported (" + mc.getRows() + "x" + mc.getCols() + ").");
        }
        LongAccumulator aNnz = null;
        // piggyback nnz computation on actual write
        if (!mc.nnzKnown()) {
            aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
            in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
        }
        JavaRDD<String> out = RDDConverterUtils.binaryBlockToCsv(in1, mc, (CSVFileFormatProperties) formatProperties, true);
        customSaveTextFile(out, fname, false);
        if (!mc.nnzKnown())
            mc.setNonZeros((long) aNnz.value().longValue());
    } else if (oi == OutputInfo.BinaryBlockOutputInfo) {
        // piggyback nnz computation on actual write
        LongAccumulator aNnz = null;
        if (!mc.nnzKnown()) {
            aNnz = sec.getSparkContext().sc().longAccumulator("nnz");
            in1 = in1.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
        }
        // save binary block rdd on hdfs
        in1.saveAsHadoopFile(fname, MatrixIndexes.class, MatrixBlock.class, SequenceFileOutputFormat.class);
        if (!mc.nnzKnown())
            mc.setNonZeros((long) aNnz.value().longValue());
    } else {
        // unsupported formats: binarycell (not externalized)
        throw new DMLRuntimeException("Unexpected data format: " + OutputInfo.outputInfoToString(oi));
    }
    // write meta data file
    MapReduceTool.writeMetaDataFile(fname + ".mtd", ValueType.DOUBLE, mc, oi, formatProperties);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ArrayList(java.util.ArrayList) IOException(java.io.IOException) ComputeBinaryBlockNnzFunction(org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) JavaRDD(org.apache.spark.api.java.JavaRDD) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) LongAccumulator(org.apache.spark.util.LongAccumulator)

Example 4 with ComputeBinaryBlockNnzFunction

use of org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction in project systemml by apache.

the class SparkExecutionContext method writeRDDtoHDFS.

@SuppressWarnings("unchecked")
public static long writeRDDtoHDFS(RDDObject rdd, String path, OutputInfo oinfo) {
    JavaPairRDD<MatrixIndexes, MatrixBlock> lrdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) rdd.getRDD();
    // piggyback nnz maintenance on write
    LongAccumulator aNnz = getSparkContextStatic().sc().longAccumulator("nnz");
    lrdd = lrdd.mapValues(new ComputeBinaryBlockNnzFunction(aNnz));
    // save file is an action which also triggers nnz maintenance
    lrdd.saveAsHadoopFile(path, oinfo.outputKeyClass, oinfo.outputValueClass, oinfo.outputFormatClass);
    // return nnz aggregate of all blocks
    return aNnz.value();
}
Also used : LongAccumulator(org.apache.spark.util.LongAccumulator) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) CompressedMatrixBlock(org.apache.sysml.runtime.compress.CompressedMatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) ComputeBinaryBlockNnzFunction(org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction)

Aggregations

LongAccumulator (org.apache.spark.util.LongAccumulator)4 ComputeBinaryBlockNnzFunction (org.apache.sysml.runtime.instructions.spark.functions.ComputeBinaryBlockNnzFunction)4 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)4 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)4 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)2 JavaRDD (org.apache.spark.api.java.JavaRDD)2 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)2 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)2 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)2 CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)2