Search in sources :

Example 1 with StorageLevel

use of org.apache.spark.storage.StorageLevel in project incubator-systemml by apache.

the class PMapmmSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    //get inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    // This avoids errors such as java.lang.UnsupportedOperationException: Cannot change storage level of an RDD after it was already assigned a level
    // Ideally, we should ensure that we donot redundantly call persist on the same RDD.
    StorageLevel pmapmmStorageLevel = StorageLevel.MEMORY_AND_DISK();
    //cache right hand side because accessed many times
    in2 = in2.repartition(sec.getSparkContext().defaultParallelism()).persist(pmapmmStorageLevel);
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    for (int i = 0; i < mc1.getRows(); i += NUM_ROWBLOCKS * mc1.getRowsPerBlock()) {
        //create broadcast for rdd partition
        JavaPairRDD<MatrixIndexes, MatrixBlock> rdd = in1.filter(new IsBlockInRange(i + 1, i + NUM_ROWBLOCKS * mc1.getRowsPerBlock(), 1, mc1.getCols(), mc1)).mapToPair(new PMapMMRebaseBlocksFunction(i / mc1.getRowsPerBlock()));
        int rlen = (int) Math.min(mc1.getRows() - i, NUM_ROWBLOCKS * mc1.getRowsPerBlock());
        PartitionedBlock<MatrixBlock> pmb = SparkExecutionContext.toPartitionedMatrixBlock(rdd, rlen, (int) mc1.getCols(), mc1.getRowsPerBlock(), mc1.getColsPerBlock(), -1L);
        Broadcast<PartitionedBlock<MatrixBlock>> bpmb = sec.getSparkContext().broadcast(pmb);
        //matrix multiplication
        JavaPairRDD<MatrixIndexes, MatrixBlock> rdd2 = in2.flatMapToPair(new PMapMMFunction(bpmb, i / mc1.getRowsPerBlock()));
        rdd2 = RDDAggregateUtils.sumByKeyStable(rdd2, false);
        rdd2.persist(pmapmmStorageLevel).count();
        bpmb.unpersist(false);
        if (out == null)
            out = rdd2;
        else
            out = out.union(rdd2);
    }
    //cache final result
    out = out.persist(pmapmmStorageLevel);
    out.count();
    //put output RDD handle into symbol table
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
    sec.addLineageRDD(output.getName(), input2.getName());
    //update output statistics if not inferred
    updateBinaryMMOutputMatrixCharacteristics(sec, true);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IsBlockInRange(org.apache.sysml.runtime.instructions.spark.functions.IsBlockInRange) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) PartitionedBlock(org.apache.sysml.runtime.instructions.spark.data.PartitionedBlock) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) StorageLevel(org.apache.spark.storage.StorageLevel)

Example 2 with StorageLevel

use of org.apache.spark.storage.StorageLevel in project incubator-systemml by apache.

the class CheckpointSPInstruction method parseInstruction.

public static CheckpointSPInstruction parseInstruction(String str) throws DMLRuntimeException {
    String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
    InstructionUtils.checkNumFields(parts, 3);
    String opcode = parts[0];
    CPOperand in = new CPOperand(parts[1]);
    CPOperand out = new CPOperand(parts[2]);
    StorageLevel level = StorageLevel.fromString(parts[3]);
    return new CheckpointSPInstruction(null, in, out, level, opcode, str);
}
Also used : CPOperand(org.apache.sysml.runtime.instructions.cp.CPOperand) StorageLevel(org.apache.spark.storage.StorageLevel)

Example 3 with StorageLevel

use of org.apache.spark.storage.StorageLevel in project cdap by caskdata.

the class MockSource method getStream.

@Override
public JavaDStream<StructuredRecord> getStream(StreamingContext context) throws Exception {
    Schema schema = Schema.parseJson(conf.schema);
    List<String> recordsAsStrings = new Gson().fromJson(conf.records, STRING_LIST_TYPE);
    final List<StructuredRecord> inputRecords = new ArrayList<>();
    for (String recordStr : recordsAsStrings) {
        inputRecords.add(StructuredRecordStringConverter.fromJsonString(recordStr, schema));
    }
    JavaStreamingContext jsc = context.getSparkStreamingContext();
    return jsc.receiverStream(new Receiver<StructuredRecord>(StorageLevel.MEMORY_ONLY()) {

        @Override
        public StorageLevel storageLevel() {
            return StorageLevel.MEMORY_ONLY();
        }

        @Override
        public void onStart() {
            new Thread() {

                @Override
                public void run() {
                    for (StructuredRecord record : inputRecords) {
                        if (isStarted()) {
                            store(record);
                            try {
                                TimeUnit.MILLISECONDS.sleep(conf.intervalMillis);
                            } catch (InterruptedException e) {
                                throw new RuntimeException(e);
                            }
                        }
                    }
                }

                @Override
                public void interrupt() {
                    super.interrupt();
                }
            }.start();
        }

        @Override
        public void onStop() {
        }
    });
}
Also used : Schema(co.cask.cdap.api.data.schema.Schema) ArrayList(java.util.ArrayList) Gson(com.google.gson.Gson) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) StorageLevel(org.apache.spark.storage.StorageLevel)

Aggregations

StorageLevel (org.apache.spark.storage.StorageLevel)3 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)1 Schema (co.cask.cdap.api.data.schema.Schema)1 Gson (com.google.gson.Gson)1 ArrayList (java.util.ArrayList)1 JavaStreamingContext (org.apache.spark.streaming.api.java.JavaStreamingContext)1 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)1 CPOperand (org.apache.sysml.runtime.instructions.cp.CPOperand)1 PartitionedBlock (org.apache.sysml.runtime.instructions.spark.data.PartitionedBlock)1 IsBlockInRange (org.apache.sysml.runtime.instructions.spark.functions.IsBlockInRange)1 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)1 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)1 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)1