use of org.apache.sysml.runtime.instructions.spark.data.BroadcastObject in project incubator-systemml by apache.
the class SparkExecutionContext method rCleanupLineageObject.
@SuppressWarnings({ "rawtypes", "unchecked" })
private void rCleanupLineageObject(LineageObject lob) throws IOException {
//abort recursive cleanup if still consumers
if (lob.getNumReferences() > 0)
return;
//robustness in function calls and to prevent repeated scans of the symbol table)
if (lob.hasBackReference())
return;
//incl deferred hdfs file removal (only if metadata set by cleanup call)
if (lob instanceof RDDObject) {
RDDObject rdd = (RDDObject) lob;
int rddID = rdd.getRDD().id();
cleanupRDDVariable(rdd.getRDD());
if (rdd.getHDFSFilename() != null) {
//deferred file removal
MapReduceTool.deleteFileWithMTDIfExistOnHDFS(rdd.getHDFSFilename());
}
if (rdd.isParallelizedRDD())
_parRDDs.deregisterRDD(rddID);
} else if (lob instanceof BroadcastObject) {
PartitionedBroadcast pbm = ((BroadcastObject) lob).getBroadcast();
if (//robustness for evictions
pbm != null)
for (Broadcast<PartitionedBlock> bc : pbm.getBroadcasts()) cleanupBroadcastVariable(bc);
CacheableData.addBroadcastSize(-((BroadcastObject) lob).getSize());
}
//recursively process lineage children
for (LineageObject c : lob.getLineageChilds()) {
c.decrementNumReferences();
rCleanupLineageObject(c);
}
}
use of org.apache.sysml.runtime.instructions.spark.data.BroadcastObject in project incubator-systemml by apache.
the class SparkExecutionContext method getBroadcastForVariable.
/**
* TODO So far we only create broadcast variables but never destroy
* them. This is a memory leak which might lead to executor out-of-memory.
* However, in order to handle this, we need to keep track when broadcast
* variables are no longer required.
*
* @param varname variable name
* @return wrapper for broadcast variables
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
@SuppressWarnings("unchecked")
public PartitionedBroadcast<MatrixBlock> getBroadcastForVariable(String varname) throws DMLRuntimeException {
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
MatrixObject mo = getMatrixObject(varname);
PartitionedBroadcast<MatrixBlock> bret = null;
//reuse existing broadcast handle
if (mo.getBroadcastHandle() != null && mo.getBroadcastHandle().isValid()) {
bret = mo.getBroadcastHandle().getBroadcast();
}
//create new broadcast handle (never created, evicted)
if (bret == null) {
//account for overwritten invalid broadcast (e.g., evicted)
if (mo.getBroadcastHandle() != null)
CacheableData.addBroadcastSize(-mo.getBroadcastHandle().getSize());
//obtain meta data for matrix
int brlen = (int) mo.getNumRowsPerBlock();
int bclen = (int) mo.getNumColumnsPerBlock();
//create partitioned matrix block and release memory consumed by input
MatrixBlock mb = mo.acquireRead();
PartitionedBlock<MatrixBlock> pmb = new PartitionedBlock<MatrixBlock>(mb, brlen, bclen);
mo.release();
//determine coarse-grained partitioning
int numPerPart = PartitionedBroadcast.computeBlocksPerPartition(mo.getNumRows(), mo.getNumColumns(), brlen, bclen);
int numParts = (int) Math.ceil((double) pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() / numPerPart);
Broadcast<PartitionedBlock<MatrixBlock>>[] ret = new Broadcast[numParts];
//create coarse-grained partitioned broadcasts
if (numParts > 1) {
for (int i = 0; i < numParts; i++) {
int offset = i * numPerPart;
int numBlks = Math.min(numPerPart, pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() - offset);
PartitionedBlock<MatrixBlock> tmp = pmb.createPartition(offset, numBlks, new MatrixBlock());
ret[i] = getSparkContext().broadcast(tmp);
}
} else {
//single partition
ret[0] = getSparkContext().broadcast(pmb);
}
bret = new PartitionedBroadcast<MatrixBlock>(ret);
BroadcastObject<MatrixBlock> bchandle = new BroadcastObject<MatrixBlock>(bret, varname, OptimizerUtils.estimatePartitionedSizeExactSparsity(mo.getMatrixCharacteristics()));
mo.setBroadcastHandle(bchandle);
CacheableData.addBroadcastSize(bchandle.getSize());
}
if (DMLScript.STATISTICS) {
Statistics.accSparkBroadCastTime(System.nanoTime() - t0);
Statistics.incSparkBroadcastCount(1);
}
return bret;
}
use of org.apache.sysml.runtime.instructions.spark.data.BroadcastObject in project incubator-systemml by apache.
the class SparkExecutionContext method getBroadcastForFrameVariable.
@SuppressWarnings("unchecked")
public PartitionedBroadcast<FrameBlock> getBroadcastForFrameVariable(String varname) throws DMLRuntimeException {
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
FrameObject fo = getFrameObject(varname);
PartitionedBroadcast<FrameBlock> bret = null;
//reuse existing broadcast handle
if (fo.getBroadcastHandle() != null && fo.getBroadcastHandle().isValid()) {
bret = fo.getBroadcastHandle().getBroadcast();
}
//create new broadcast handle (never created, evicted)
if (bret == null) {
//account for overwritten invalid broadcast (e.g., evicted)
if (fo.getBroadcastHandle() != null)
CacheableData.addBroadcastSize(-fo.getBroadcastHandle().getSize());
//obtain meta data for frame
int bclen = (int) fo.getNumColumns();
int brlen = OptimizerUtils.getDefaultFrameSize();
//create partitioned frame block and release memory consumed by input
FrameBlock mb = fo.acquireRead();
PartitionedBlock<FrameBlock> pmb = new PartitionedBlock<FrameBlock>(mb, brlen, bclen);
fo.release();
//determine coarse-grained partitioning
int numPerPart = PartitionedBroadcast.computeBlocksPerPartition(fo.getNumRows(), fo.getNumColumns(), brlen, bclen);
int numParts = (int) Math.ceil((double) pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() / numPerPart);
Broadcast<PartitionedBlock<FrameBlock>>[] ret = new Broadcast[numParts];
//create coarse-grained partitioned broadcasts
if (numParts > 1) {
for (int i = 0; i < numParts; i++) {
int offset = i * numPerPart;
int numBlks = Math.min(numPerPart, pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() - offset);
PartitionedBlock<FrameBlock> tmp = pmb.createPartition(offset, numBlks, new FrameBlock());
ret[i] = getSparkContext().broadcast(tmp);
}
} else {
//single partition
ret[0] = getSparkContext().broadcast(pmb);
}
bret = new PartitionedBroadcast<FrameBlock>(ret);
BroadcastObject<FrameBlock> bchandle = new BroadcastObject<FrameBlock>(bret, varname, OptimizerUtils.estimatePartitionedSizeExactSparsity(fo.getMatrixCharacteristics()));
fo.setBroadcastHandle(bchandle);
CacheableData.addBroadcastSize(bchandle.getSize());
}
if (DMLScript.STATISTICS) {
Statistics.accSparkBroadCastTime(System.nanoTime() - t0);
Statistics.incSparkBroadcastCount(1);
}
return bret;
}
Aggregations